diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-05-01 12:06:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-05-01 12:06:20 -0700 |
commit | c8c655c34e33544aec9d64b660872ab33c29b5f1 (patch) | |
tree | 4aad88f698f04cef9e5d9d573a6df6283085dadd /arch/arm64/kvm/hypercalls.c | |
parent | d75439d64a1e2b35e0f08906205b00279753cbed (diff) | |
parent | b3c98052d46948a8d65d2778c7f306ff38366aac (diff) | |
download | linux-c8c655c34e33544aec9d64b660872ab33c29b5f1.tar.gz linux-c8c655c34e33544aec9d64b660872ab33c29b5f1.tar.bz2 linux-c8c655c34e33544aec9d64b660872ab33c29b5f1.zip |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini:
"s390:
- More phys_to_virt conversions
- Improvement of AP management for VSIE (nested virtualization)
ARM64:
- Numerous fixes for the pathological lock inversion issue that
plagued KVM/arm64 since... forever.
- New framework allowing SMCCC-compliant hypercalls to be forwarded
to userspace, hopefully paving the way for some more features being
moved to VMMs rather than be implemented in the kernel.
- Large rework of the timer code to allow a VM-wide offset to be
applied to both virtual and physical counters as well as a
per-timer, per-vcpu offset that complements the global one. This
last part allows the NV timer code to be implemented on top.
- A small set of fixes to make sure that we don't change anything
affecting the EL1&0 translation regime just after having having
taken an exception to EL2 until we have executed a DSB. This
ensures that speculative walks started in EL1&0 have completed.
- The usual selftest fixes and improvements.
x86:
- Optimize CR0.WP toggling by avoiding an MMU reload when TDP is
enabled, and by giving the guest control of CR0.WP when EPT is
enabled on VMX (VMX-only because SVM doesn't support per-bit
controls)
- Add CR0/CR4 helpers to query single bits, and clean up related code
where KVM was interpreting kvm_read_cr4_bits()'s "unsigned long"
return as a bool
- Move AMD_PSFD to cpufeatures.h and purge KVM's definition
- Avoid unnecessary writes+flushes when the guest is only adding new
PTEs
- Overhaul .sync_page() and .invlpg() to utilize .sync_page()'s
optimizations when emulating invalidations
- Clean up the range-based flushing APIs
- Revamp the TDP MMU's reaping of Accessed/Dirty bits to clear a
single A/D bit using a LOCK AND instead of XCHG, and skip all of
the "handle changed SPTE" overhead associated with writing the
entire entry
- Track the number of "tail" entries in a pte_list_desc to avoid
having to walk (potentially) all descriptors during insertion and
deletion, which gets quite expensive if the guest is spamming
fork()
- Disallow virtualizing legacy LBRs if architectural LBRs are
available, the two are mutually exclusive in hardware
- Disallow writes to immutable feature MSRs (notably
PERF_CAPABILITIES) after KVM_RUN, similar to CPUID features
- Overhaul the vmx_pmu_caps selftest to better validate
PERF_CAPABILITIES
- Apply PMU filters to emulated events and add test coverage to the
pmu_event_filter selftest
- AMD SVM:
- Add support for virtual NMIs
- Fixes for edge cases related to virtual interrupts
- Intel AMX:
- Don't advertise XTILE_CFG in KVM_GET_SUPPORTED_CPUID if
XTILE_DATA is not being reported due to userspace not opting in
via prctl()
- Fix a bug in emulation of ENCLS in compatibility mode
- Allow emulation of NOP and PAUSE for L2
- AMX selftests improvements
- Misc cleanups
MIPS:
- Constify MIPS's internal callbacks (a leftover from the hardware
enabling rework that landed in 6.3)
Generic:
- Drop unnecessary casts from "void *" throughout kvm_main.c
- Tweak the layout of "struct kvm_mmu_memory_cache" to shrink the
struct size by 8 bytes on 64-bit kernels by utilizing a padding
hole
Documentation:
- Fix goof introduced by the conversion to rST"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (211 commits)
KVM: s390: pci: fix virtual-physical confusion on module unload/load
KVM: s390: vsie: clarifications on setting the APCB
KVM: s390: interrupt: fix virtual-physical confusion for next alert GISA
KVM: arm64: Have kvm_psci_vcpu_on() use WRITE_ONCE() to update mp_state
KVM: arm64: Acquire mp_state_lock in kvm_arch_vcpu_ioctl_vcpu_init()
KVM: selftests: Test the PMU event "Instructions retired"
KVM: selftests: Copy full counter values from guest in PMU event filter test
KVM: selftests: Use error codes to signal errors in PMU event filter test
KVM: selftests: Print detailed info in PMU event filter asserts
KVM: selftests: Add helpers for PMC asserts in PMU event filter test
KVM: selftests: Add a common helper for the PMU event filter guest code
KVM: selftests: Fix spelling mistake "perrmited" -> "permitted"
KVM: arm64: vhe: Drop extra isb() on guest exit
KVM: arm64: vhe: Synchronise with page table walker on MMU update
KVM: arm64: pkvm: Document the side effects of kvm_flush_dcache_to_poc()
KVM: arm64: nvhe: Synchronise with page table walker on TLBI
KVM: arm64: Handle 32bit CNTPCTSS traps
KVM: arm64: nvhe: Synchronise with page table walker on vcpu run
KVM: arm64: vgic: Don't acquire its_lock before config_lock
KVM: selftests: Add test to verify KVM's supported XCR0
...
Diffstat (limited to 'arch/arm64/kvm/hypercalls.c')
-rw-r--r-- | arch/arm64/kvm/hypercalls.c | 189 |
1 files changed, 179 insertions, 10 deletions
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c index c4b4678bc4a4..7fb4df0456de 100644 --- a/arch/arm64/kvm/hypercalls.c +++ b/arch/arm64/kvm/hypercalls.c @@ -47,7 +47,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset; break; case KVM_PTP_PHYS_COUNTER: - cycles = systime_snapshot.cycles; + cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.poffset; break; default: return; @@ -65,7 +65,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val) val[3] = lower_32_bits(cycles); } -static bool kvm_hvc_call_default_allowed(u32 func_id) +static bool kvm_smccc_default_allowed(u32 func_id) { switch (func_id) { /* @@ -93,7 +93,7 @@ static bool kvm_hvc_call_default_allowed(u32 func_id) } } -static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) +static bool kvm_smccc_test_fw_bmap(struct kvm_vcpu *vcpu, u32 func_id) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; @@ -117,20 +117,161 @@ static bool kvm_hvc_call_allowed(struct kvm_vcpu *vcpu, u32 func_id) return test_bit(KVM_REG_ARM_VENDOR_HYP_BIT_PTP, &smccc_feat->vendor_hyp_bmap); default: - return kvm_hvc_call_default_allowed(func_id); + return false; + } +} + +#define SMC32_ARCH_RANGE_BEGIN ARM_SMCCC_VERSION_FUNC_ID +#define SMC32_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, ARM_SMCCC_FUNC_MASK) + +#define SMC64_ARCH_RANGE_BEGIN ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, 0) +#define SMC64_ARCH_RANGE_END ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_64, \ + 0, ARM_SMCCC_FUNC_MASK) + +static void init_smccc_filter(struct kvm *kvm) +{ + int r; + + mt_init(&kvm->arch.smccc_filter); + + /* + * Prevent userspace from handling any SMCCC calls in the architecture + * range, avoiding the risk of misrepresenting Spectre mitigation status + * to the guest. + */ + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMC32_ARCH_RANGE_BEGIN, SMC32_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); + + r = mtree_insert_range(&kvm->arch.smccc_filter, + SMC64_ARCH_RANGE_BEGIN, SMC64_ARCH_RANGE_END, + xa_mk_value(KVM_SMCCC_FILTER_HANDLE), + GFP_KERNEL_ACCOUNT); + WARN_ON_ONCE(r); + +} + +static int kvm_smccc_set_filter(struct kvm *kvm, struct kvm_smccc_filter __user *uaddr) +{ + const void *zero_page = page_to_virt(ZERO_PAGE(0)); + struct kvm_smccc_filter filter; + u32 start, end; + int r; + + if (copy_from_user(&filter, uaddr, sizeof(filter))) + return -EFAULT; + + if (memcmp(filter.pad, zero_page, sizeof(filter.pad))) + return -EINVAL; + + start = filter.base; + end = start + filter.nr_functions - 1; + + if (end < start || filter.action >= NR_SMCCC_FILTER_ACTIONS) + return -EINVAL; + + mutex_lock(&kvm->arch.config_lock); + + if (kvm_vm_has_ran_once(kvm)) { + r = -EBUSY; + goto out_unlock; } + + r = mtree_insert_range(&kvm->arch.smccc_filter, start, end, + xa_mk_value(filter.action), GFP_KERNEL_ACCOUNT); + if (r) + goto out_unlock; + + set_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags); + +out_unlock: + mutex_unlock(&kvm->arch.config_lock); + return r; +} + +static u8 kvm_smccc_filter_get_action(struct kvm *kvm, u32 func_id) +{ + unsigned long idx = func_id; + void *val; + + if (!test_bit(KVM_ARCH_FLAG_SMCCC_FILTER_CONFIGURED, &kvm->arch.flags)) + return KVM_SMCCC_FILTER_HANDLE; + + /* + * But where's the error handling, you say? + * + * mt_find() returns NULL if no entry was found, which just so happens + * to match KVM_SMCCC_FILTER_HANDLE. + */ + val = mt_find(&kvm->arch.smccc_filter, &idx, idx); + return xa_to_value(val); } -int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +static u8 kvm_smccc_get_action(struct kvm_vcpu *vcpu, u32 func_id) +{ + /* + * Intervening actions in the SMCCC filter take precedence over the + * pseudo-firmware register bitmaps. + */ + u8 action = kvm_smccc_filter_get_action(vcpu->kvm, func_id); + if (action != KVM_SMCCC_FILTER_HANDLE) + return action; + + if (kvm_smccc_test_fw_bmap(vcpu, func_id) || + kvm_smccc_default_allowed(func_id)) + return KVM_SMCCC_FILTER_HANDLE; + + return KVM_SMCCC_FILTER_DENY; +} + +static void kvm_prepare_hypercall_exit(struct kvm_vcpu *vcpu, u32 func_id) +{ + u8 ec = ESR_ELx_EC(kvm_vcpu_get_esr(vcpu)); + struct kvm_run *run = vcpu->run; + u64 flags = 0; + + if (ec == ESR_ELx_EC_SMC32 || ec == ESR_ELx_EC_SMC64) + flags |= KVM_HYPERCALL_EXIT_SMC; + + if (!kvm_vcpu_trap_il_is32bit(vcpu)) + flags |= KVM_HYPERCALL_EXIT_16BIT; + + run->exit_reason = KVM_EXIT_HYPERCALL; + run->hypercall = (typeof(run->hypercall)) { + .nr = func_id, + .flags = flags, + }; +} + +int kvm_smccc_call_handler(struct kvm_vcpu *vcpu) { struct kvm_smccc_features *smccc_feat = &vcpu->kvm->arch.smccc_feat; u32 func_id = smccc_get_function(vcpu); u64 val[4] = {SMCCC_RET_NOT_SUPPORTED}; u32 feature; + u8 action; gpa_t gpa; - if (!kvm_hvc_call_allowed(vcpu, func_id)) + action = kvm_smccc_get_action(vcpu, func_id); + switch (action) { + case KVM_SMCCC_FILTER_HANDLE: + break; + case KVM_SMCCC_FILTER_DENY: + goto out; + case KVM_SMCCC_FILTER_FWD_TO_USER: + kvm_prepare_hypercall_exit(vcpu, func_id); + return 0; + default: + WARN_RATELIMIT(1, "Unhandled SMCCC filter action: %d\n", action); goto out; + } switch (func_id) { case ARM_SMCCC_VERSION_FUNC_ID: @@ -245,6 +386,13 @@ void kvm_arm_init_hypercalls(struct kvm *kvm) smccc_feat->std_bmap = KVM_ARM_SMCCC_STD_FEATURES; smccc_feat->std_hyp_bmap = KVM_ARM_SMCCC_STD_HYP_FEATURES; smccc_feat->vendor_hyp_bmap = KVM_ARM_SMCCC_VENDOR_HYP_FEATURES; + + init_smccc_filter(kvm); +} + +void kvm_arm_teardown_hypercalls(struct kvm *kvm) +{ + mtree_destroy(&kvm->arch.smccc_filter); } int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) @@ -377,17 +525,16 @@ static int kvm_arm_set_fw_reg_bmap(struct kvm_vcpu *vcpu, u64 reg_id, u64 val) if (val & ~fw_reg_features) return -EINVAL; - mutex_lock(&kvm->lock); + mutex_lock(&kvm->arch.config_lock); - if (test_bit(KVM_ARCH_FLAG_HAS_RAN_ONCE, &kvm->arch.flags) && - val != *fw_reg_bmap) { + if (kvm_vm_has_ran_once(kvm) && val != *fw_reg_bmap) { ret = -EBUSY; goto out; } WRITE_ONCE(*fw_reg_bmap, val); out: - mutex_unlock(&kvm->lock); + mutex_unlock(&kvm->arch.config_lock); return ret; } @@ -481,3 +628,25 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return -EINVAL; } + +int kvm_vm_smccc_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return 0; + default: + return -ENXIO; + } +} + +int kvm_vm_smccc_set_attr(struct kvm *kvm, struct kvm_device_attr *attr) +{ + void __user *uaddr = (void __user *)attr->addr; + + switch (attr->attr) { + case KVM_ARM_VM_SMCCC_FILTER: + return kvm_smccc_set_filter(kvm, uaddr); + default: + return -ENXIO; + } +} |