summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/include/asm/kvm_host.h17
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h8
-rw-r--r--arch/arm64/kernel/fpsimd.c8
-rw-r--r--arch/arm64/kernel/image-vars.h3
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/arm.c106
-rw-r--r--arch/arm64/kvm/debug.c26
-rw-r--r--arch/arm64/kvm/fpsimd.c14
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h4
-rw-r--r--arch/arm64/kvm/hyp/nvhe/Makefile3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/list_debug.c54
-rw-r--r--arch/arm64/kvm/hyp/nvhe/mem_protect.c3
-rw-r--r--arch/arm64/kvm/hyp/nvhe/stub.c22
-rw-r--r--arch/arm64/kvm/mmu.c50
-rw-r--r--arch/arm64/kvm/psci.c6
-rw-r--r--arch/arm64/kvm/sys_regs.c74
-rw-r--r--arch/arm64/kvm/vmid.c196
-rw-r--r--include/kvm/arm_psci.h6
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c58
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c1
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c45
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c10
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c12
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c9
25 files changed, 539 insertions, 202 deletions
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0bed0e33e1ae..76f795b628f1 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -50,6 +50,8 @@
#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
KVM_DIRTY_LOG_INITIALLY_SET)
+#define KVM_HAVE_MMU_RWLOCK
+
/*
* Mode of operation configurable with kvm-arm.mode early param.
* See Documentation/admin-guide/kernel-parameters.txt for more information.
@@ -71,9 +73,7 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_vmid {
- /* The VMID generation used for the virt. memory system */
- u64 vmid_gen;
- u32 vmid;
+ atomic64_t id;
};
struct kvm_s2_mmu {
@@ -174,6 +174,7 @@ enum vcpu_sysreg {
PAR_EL1, /* Physical Address Register */
MDSCR_EL1, /* Monitor Debug System Control Register */
MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */
+ OSLSR_EL1, /* OS Lock Status Register */
DISR_EL1, /* Deferred Interrupt Status Register */
/* Performance Monitors Registers */
@@ -705,6 +706,12 @@ int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
+extern unsigned int kvm_arm_vmid_bits;
+int kvm_arm_vmid_alloc_init(void);
+void kvm_arm_vmid_alloc_free(void);
+void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid);
+void kvm_arm_vmid_clear_active(void);
+
static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
{
vcpu_arch->steal.base = GPA_INVALID;
@@ -738,6 +745,10 @@ void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
+
+#define kvm_vcpu_os_lock_enabled(vcpu) \
+ (!!(__vcpu_sys_reg(vcpu, OSLSR_EL1) & SYS_OSLSR_OSLK))
+
int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr);
int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 81839e9a8a24..74735a864eee 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -115,6 +115,7 @@ alternative_cb_end
#include <asm/cache.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
+#include <asm/kvm_host.h>
void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
@@ -266,7 +267,8 @@ static __always_inline u64 kvm_get_vttbr(struct kvm_s2_mmu *mmu)
u64 cnp = system_supports_cnp() ? VTTBR_CNP_BIT : 0;
baddr = mmu->pgd_phys;
- vmid_field = (u64)READ_ONCE(vmid->vmid) << VTTBR_VMID_SHIFT;
+ vmid_field = atomic64_read(&vmid->id) << VTTBR_VMID_SHIFT;
+ vmid_field &= VTTBR_VMID_MASK(kvm_arm_vmid_bits);
return kvm_phys_to_vttbr(baddr) | vmid_field | cnp;
}
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 898bee0004ae..906a3550fc50 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -128,8 +128,16 @@
#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6)
#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7)
#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0)
+
#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4)
+#define SYS_OSLAR_OSLK BIT(0)
+
#define SYS_OSLSR_EL1 sys_reg(2, 0, 1, 1, 4)
+#define SYS_OSLSR_OSLM_MASK (BIT(3) | BIT(0))
+#define SYS_OSLSR_OSLM_NI 0
+#define SYS_OSLSR_OSLM_IMPLEMENTED BIT(3)
+#define SYS_OSLSR_OSLK BIT(1)
+
#define SYS_OSDLR_EL1 sys_reg(2, 0, 1, 3, 4)
#define SYS_DBGPRCR_EL1 sys_reg(2, 0, 1, 4, 4)
#define SYS_DBGCLAIMSET_EL1 sys_reg(2, 0, 7, 8, 6)
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5280e098cfb5..47af76e53221 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -348,7 +348,13 @@ static void task_fpsimd_load(void)
/*
* Ensure FPSIMD/SVE storage in memory for the loaded context is up to
- * date with respect to the CPU registers.
+ * date with respect to the CPU registers. Note carefully that the
+ * current context is the context last bound to the CPU stored in
+ * last, if KVM is involved this may be the guest VM context rather
+ * than the host thread for the VM pointed to by current. This means
+ * that we must always reference the state storage via last rather
+ * than via current, other than the TIF_ flags which KVM will
+ * carefully maintain for us.
*/
static void fpsimd_save(void)
{
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 7eaf1f7c4168..884844849c70 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -79,6 +79,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
/* Kernel symbol used by icache_is_vpipt(). */
KVM_NVHE_ALIAS(__icache_flags);
+/* VMID bits set by the KVM VMID allocator */
+KVM_NVHE_ALIAS(kvm_arm_vmid_bits);
+
/* Kernel symbols needed for cpus_have_final/const_caps checks. */
KVM_NVHE_ALIAS(arm64_const_caps_ready);
KVM_NVHE_ALIAS(cpu_hwcap_keys);
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 91861fd8b897..261644b1a6bb 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -14,7 +14,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
inject_fault.o va_layout.o handle_exit.o \
guest.o debug.o reset.o sys_regs.o \
vgic-sys-reg-v3.o fpsimd.o pmu.o pkvm.o \
- arch_timer.o trng.o\
+ arch_timer.o trng.o vmid.o \
vgic/vgic.o vgic/vgic-init.o \
vgic/vgic-irqfd.o vgic/vgic-v2.o \
vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 13c1318d8b9a..fefd5774ab55 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -53,11 +53,6 @@ static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
-/* The VMID used in the VTTBR */
-static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u32 kvm_next_vmid;
-static DEFINE_SPINLOCK(kvm_vmid_lock);
-
static bool vgic_present;
static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
@@ -430,6 +425,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
+ kvm_arm_vmid_clear_active();
vcpu_clear_on_unsupported_cpu(vcpu);
vcpu->cpu = -1;
@@ -498,87 +494,6 @@ unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu)
}
#endif
-/* Just ensure a guest exit from a particular CPU */
-static void exit_vm_noop(void *info)
-{
-}
-
-void force_vm_exit(const cpumask_t *mask)
-{
- preempt_disable();
- smp_call_function_many(mask, exit_vm_noop, NULL, true);
- preempt_enable();
-}
-
-/**
- * need_new_vmid_gen - check that the VMID is still valid
- * @vmid: The VMID to check
- *
- * return true if there is a new generation of VMIDs being used
- *
- * The hardware supports a limited set of values with the value zero reserved
- * for the host, so we check if an assigned value belongs to a previous
- * generation, which requires us to assign a new value. If we're the first to
- * use a VMID for the new generation, we must flush necessary caches and TLBs
- * on all CPUs.
- */
-static bool need_new_vmid_gen(struct kvm_vmid *vmid)
-{
- u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen);
- smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */
- return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen);
-}
-
-/**
- * update_vmid - Update the vmid with a valid VMID for the current generation
- * @vmid: The stage-2 VMID information struct
- */
-static void update_vmid(struct kvm_vmid *vmid)
-{
- if (!need_new_vmid_gen(vmid))
- return;
-
- spin_lock(&kvm_vmid_lock);
-
- /*
- * We need to re-check the vmid_gen here to ensure that if another vcpu
- * already allocated a valid vmid for this vm, then this vcpu should
- * use the same vmid.
- */
- if (!need_new_vmid_gen(vmid)) {
- spin_unlock(&kvm_vmid_lock);
- return;
- }
-
- /* First user of a new VMID generation? */
- if (unlikely(kvm_next_vmid == 0)) {
- atomic64_inc(&kvm_vmid_gen);
- kvm_next_vmid = 1;
-
- /*
- * On SMP we know no other CPUs can use this CPU's or each
- * other's VMID after force_vm_exit returns since the
- * kvm_vmid_lock blocks them from reentry to the guest.
- */
- force_vm_exit(cpu_all_mask);
- /*
- * Now broadcast TLB + ICACHE invalidation over the inner
- * shareable domain to make sure all data structures are
- * clean.
- */
- kvm_call_hyp(__kvm_flush_vm_context);
- }
-
- WRITE_ONCE(vmid->vmid, kvm_next_vmid);
- kvm_next_vmid++;
- kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1;
-
- smp_wmb();
- WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen));
-
- spin_unlock(&kvm_vmid_lock);
-}
-
static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu)
{
return vcpu->arch.target >= 0;
@@ -814,7 +729,6 @@ static bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu, int *ret)
}
return kvm_request_pending(vcpu) ||
- need_new_vmid_gen(&vcpu->arch.hw_mmu->vmid) ||
xfer_to_guest_mode_work_pending();
}
@@ -876,8 +790,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
if (!ret)
ret = 1;
- update_vmid(&vcpu->arch.hw_mmu->vmid);
-
check_vcpu_requests(vcpu);
/*
@@ -887,6 +799,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
*/
preempt_disable();
+ /*
+ * The VMID allocator only tracks active VMIDs per
+ * physical CPU, and therefore the VMID allocated may not be
+ * preserved on VMID roll-over if the task was preempted,
+ * making a thread's VMID inactive. So we need to call
+ * kvm_arm_vmid_update() in non-premptible context.
+ */
+ kvm_arm_vmid_update(&vcpu->arch.hw_mmu->vmid);
+
kvm_pmu_flush_hwstate(vcpu);
local_irq_disable();
@@ -2182,6 +2103,12 @@ int kvm_arch_init(void *opaque)
if (err)
return err;
+ err = kvm_arm_vmid_alloc_init();
+ if (err) {
+ kvm_err("Failed to initialize VMID allocator.\n");
+ return err;
+ }
+
if (!in_hyp_mode) {
err = init_hyp_mode();
if (err)
@@ -2221,6 +2148,7 @@ out_hyp:
if (!in_hyp_mode)
teardown_hyp_mode();
out_err:
+ kvm_arm_vmid_alloc_free();
return err;
}
diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
index db9361338b2a..4fd5c216c4bb 100644
--- a/arch/arm64/kvm/debug.c
+++ b/arch/arm64/kvm/debug.c
@@ -105,9 +105,11 @@ static void kvm_arm_setup_mdcr_el2(struct kvm_vcpu *vcpu)
* - Userspace is using the hardware to debug the guest
* (KVM_GUESTDBG_USE_HW is set).
* - The guest is not using debug (KVM_ARM64_DEBUG_DIRTY is clear).
+ * - The guest has enabled the OS Lock (debug exceptions are blocked).
*/
if ((vcpu->guest_debug & KVM_GUESTDBG_USE_HW) ||
- !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY))
+ !(vcpu->arch.flags & KVM_ARM64_DEBUG_DIRTY) ||
+ kvm_vcpu_os_lock_enabled(vcpu))
vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
@@ -160,8 +162,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
kvm_arm_setup_mdcr_el2(vcpu);
- /* Is Guest debugging in effect? */
- if (vcpu->guest_debug) {
+ /* Check if we need to use the debug registers. */
+ if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
/* Save guest debug state */
save_guest_debug_regs(vcpu);
@@ -223,6 +225,19 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
&vcpu->arch.debug_ptr->dbg_wcr[0],
&vcpu->arch.debug_ptr->dbg_wvr[0]);
+
+ /*
+ * The OS Lock blocks debug exceptions in all ELs when it is
+ * enabled. If the guest has enabled the OS Lock, constrain its
+ * effects to the guest. Emulate the behavior by clearing
+ * MDSCR_EL1.MDE. In so doing, we ensure that host debug
+ * exceptions are unaffected by guest configuration of the OS
+ * Lock.
+ */
+ } else if (kvm_vcpu_os_lock_enabled(vcpu)) {
+ mdscr = vcpu_read_sys_reg(vcpu, MDSCR_EL1);
+ mdscr &= ~DBG_MDSCR_MDE;
+ vcpu_write_sys_reg(vcpu, mdscr, MDSCR_EL1);
}
}
@@ -244,7 +259,10 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
{
trace_kvm_arm_clear_debug(vcpu->guest_debug);
- if (vcpu->guest_debug) {
+ /*
+ * Restore the guest's debug registers if we were using them.
+ */
+ if (vcpu->guest_debug || kvm_vcpu_os_lock_enabled(vcpu)) {
restore_guest_debug_regs(vcpu);
/*
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 2f48fd362a8c..397fdac75cb1 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -84,6 +84,11 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
}
+/*
+ * Called just before entering the guest once we are no longer
+ * preemptable. Syncs the host's TIF_FOREIGN_FPSTATE with the KVM
+ * mirror of the flag used by the hypervisor.
+ */
void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
{
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
@@ -93,10 +98,11 @@ void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu)
}
/*
- * If the guest FPSIMD state was loaded, update the host's context
- * tracking data mark the CPU FPSIMD regs as dirty and belonging to vcpu
- * so that they will be written back if the kernel clobbers them due to
- * kernel-mode NEON before re-entry into the guest.
+ * Called just after exiting the guest. If the guest FPSIMD state
+ * was loaded, update the host's context tracking data mark the CPU
+ * FPSIMD regs as dirty and belonging to vcpu so that they will be
+ * written back if the kernel clobbers them due to kernel-mode NEON
+ * before re-entry into the guest.
*/
void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
{
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 701cfb964905..667654bd3734 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -173,6 +173,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
/* Valid trap. Switch the context: */
+
+ /* First disable enough traps to allow us to update the registers */
if (has_vhe()) {
reg = CPACR_EL1_FPEN;
if (sve_guest)
@@ -188,11 +190,13 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
}
isb();
+ /* Write out the host state if it's in the registers */
if (vcpu->arch.flags & KVM_ARM64_FP_HOST) {
__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
}
+ /* Restore the guest state */
if (sve_guest)
__hyp_sve_restore_guest(vcpu);
else
diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile
index 24b2c2425b38..f9fe4dc21b1f 100644
--- a/arch/arm64/kvm/hyp/nvhe/Makefile
+++ b/arch/arm64/kvm/hyp/nvhe/Makefile
@@ -13,10 +13,11 @@ lib-objs := clear_page.o copy_page.o memcpy.o memset.o
lib-objs := $(addprefix ../../../lib/, $(lib-objs))
obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \
- hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \
+ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \
cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o
obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \
../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o
+obj-$(CONFIG_DEBUG_LIST) += list_debug.o
obj-y += $(lib-objs)
##
diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c
new file mode 100644
index 000000000000..d68abd7ea124
--- /dev/null
+++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 - Google LLC
+ * Author: Keir Fraser <keirf@google.com>
+ */
+
+#include <linux/list.h>
+#include <linux/bug.h>
+
+static inline __must_check bool nvhe_check_data_corruption(bool v)
+{
+ return v;
+}
+
+#define NVHE_CHECK_DATA_CORRUPTION(condition) \
+ nvhe_check_data_corruption(({ \
+ bool corruption = unlikely(condition); \
+ if (corruption) { \
+ if (IS_ENABLED(CONFIG_BUG_ON_DATA_CORRUPTION)) { \
+ BUG_ON(1); \
+ } else \
+ WARN_ON(1); \
+ } \
+ corruption; \
+ }))
+
+/* The predicates checked here are taken from lib/list_debug.c. */
+
+bool __list_add_valid(struct list_head *new, struct list_head *prev,
+ struct list_head *next)
+{
+ if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) ||
+ NVHE_CHECK_DATA_CORRUPTION(prev->next != next) ||
+ NVHE_CHECK_DATA_CORRUPTION(new == prev || new == next))
+ return false;
+
+ return true;
+}
+
+bool __list_del_entry_valid(struct list_head *entry)
+{
+ struct list_head *prev, *next;
+
+ prev = entry->prev;
+ next = entry->next;
+
+ if (NVHE_CHECK_DATA_CORRUPTION(next == LIST_POISON1) ||
+ NVHE_CHECK_DATA_CORRUPTION(prev == LIST_POISON2) ||
+ NVHE_CHECK_DATA_CORRUPTION(prev->next != entry) ||
+ NVHE_CHECK_DATA_CORRUPTION(next->prev != entry))
+ return false;
+
+ return true;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 674f10564373..78edf077fa3b 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -138,8 +138,7 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
mmu->pgt = &host_kvm.pgt;
- WRITE_ONCE(mmu->vmid.vmid_gen, 0);
- WRITE_ONCE(mmu->vmid.vmid, 0);
+ atomic64_set(&mmu->vmid.id, 0);
return 0;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/stub.c b/arch/arm64/kvm/hyp/nvhe/stub.c
deleted file mode 100644
index c0aa6bbfd79d..000000000000
--- a/arch/arm64/kvm/hyp/nvhe/stub.c
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Stubs for out-of-line function calls caused by re-using kernel
- * infrastructure at EL2.
- *
- * Copyright (C) 2020 - Google LLC
- */
-
-#include <linux/list.h>
-
-#ifdef CONFIG_DEBUG_LIST
-bool __list_add_valid(struct list_head *new, struct list_head *prev,
- struct list_head *next)
-{
- return true;
-}
-
-bool __list_del_entry_valid(struct list_head *entry)
-{
- return true;
-}
-#endif
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index bc2aba953299..1623abc56af2 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -58,7 +58,7 @@ static int stage2_apply_range(struct kvm *kvm, phys_addr_t addr,
break;
if (resched && next != end)
- cond_resched_lock(&kvm->mmu_lock);
+ cond_resched_rwlock_write(&kvm->mmu_lock);
} while (addr = next, addr != end);
return ret;
@@ -179,7 +179,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
phys_addr_t end = start + size;
- assert_spin_locked(&kvm->mmu_lock);
+ lockdep_assert_held_write(&kvm->mmu_lock);
WARN_ON(size & ~PAGE_MASK);
WARN_ON(stage2_apply_range(kvm, start, end, kvm_pgtable_stage2_unmap,
may_block));
@@ -213,13 +213,13 @@ static void stage2_flush_vm(struct kvm *kvm)
int idx, bkt;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, bkt, slots)
stage2_flush_memslot(kvm, memslot);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -653,7 +653,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);
- WRITE_ONCE(mmu->vmid.vmid_gen, 0);
return 0;
out_destroy_pgtable:
@@ -720,13 +719,13 @@ void stage2_unmap_vm(struct kvm *kvm)
idx = srcu_read_lock(&kvm->srcu);
mmap_read_lock(current->mm);
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
slots = kvm_memslots(kvm);
kvm_for_each_memslot(memslot, bkt, slots)
stage2_unmap_memslot(kvm, memslot);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
mmap_read_unlock(current->mm);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -736,14 +735,14 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
struct kvm *kvm = kvm_s2_mmu_to_kvm(mmu);
struct kvm_pgtable *pgt = NULL;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
pgt = mmu->pgt;
if (pgt) {
mmu->pgd_phys = 0;
mmu->pgt = NULL;
free_percpu(mmu->last_vcpu_ran);
}
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
if (pgt) {
kvm_pgtable_stage2_destroy(pgt);
@@ -783,10 +782,10 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
if (ret)
break;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
&cache);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
if (ret)
break;
@@ -834,9 +833,9 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
start = memslot->base_gfn << PAGE_SHIFT;
end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
stage2_wp_range(&kvm->arch.mmu, start, end);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
}
@@ -1080,6 +1079,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
gfn_t gfn;
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
+ bool logging_perm_fault = false;
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
unsigned long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
@@ -1114,6 +1114,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (logging_active) {
force_pte = true;
vma_shift = PAGE_SHIFT;
+ logging_perm_fault = (fault_status == FSC_PERM && write_fault);
} else {
vma_shift = get_vma_page_shift(vma, hva);
}
@@ -1212,7 +1213,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (exec_fault && device)
return -ENOEXEC;
- spin_lock(&kvm->mmu_lock);
+ /*
+ * To reduce MMU contentions and enhance concurrency during dirty
+ * logging dirty logging, only acquire read lock for permission
+ * relaxation.
+ */
+ if (logging_perm_fault)
+ read_lock(&kvm->mmu_lock);
+ else
+ write_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
@@ -1271,7 +1280,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
out_unlock:
- spin_unlock(&kvm->mmu_lock);
+ if (logging_perm_fault)
+ read_unlock(&kvm->mmu_lock);
+ else
+ write_unlock(&kvm->mmu_lock);
kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret != -EAGAIN ? ret : 0;
@@ -1286,10 +1298,10 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
trace_kvm_access_fault(fault_ipa);
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
mmu = vcpu->arch.hw_mmu;
kpte = kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa);
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
pte = __pte(kpte);
if (pte_valid(pte))
@@ -1692,9 +1704,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;
- spin_lock(&kvm->mmu_lock);
+ write_lock(&kvm->mmu_lock);
unmap_stage2_range(&kvm->arch.mmu, gpa, size);
- spin_unlock(&kvm->mmu_lock);
+ write_unlock(&kvm->mmu_lock);
}
/*
diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c
index 3eae32876897..a0c10c11f40e 100644
--- a/arch/arm64/kvm/psci.c
+++ b/arch/arm64/kvm/psci.c
@@ -85,7 +85,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
if (!vcpu)
return PSCI_RET_INVALID_PARAMS;
if (!vcpu->arch.power_off) {
- if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1)
+ if (kvm_psci_version(source_vcpu) != KVM_ARM_PSCI_0_1)
return PSCI_RET_ALREADY_ON;
else
return PSCI_RET_INVALID_PARAMS;
@@ -392,7 +392,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
*/
int kvm_psci_call(struct kvm_vcpu *vcpu)
{
- switch (kvm_psci_version(vcpu, vcpu->kvm)) {
+ switch (kvm_psci_version(vcpu)) {
case KVM_ARM_PSCI_1_0:
return kvm_psci_1_0_call(vcpu);
case KVM_ARM_PSCI_0_2:
@@ -471,7 +471,7 @@ int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
switch (reg->id) {
case KVM_REG_ARM_PSCI_VERSION:
- val = kvm_psci_version(vcpu, vcpu->kvm);
+ val = kvm_psci_version(vcpu);
break;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 4dc2fba316ff..dd34b5ab51d4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -44,6 +44,10 @@
* 64bit interface.
*/
+static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
+static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
+static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
+
static bool read_from_write_only(struct kvm_vcpu *vcpu,
struct sys_reg_params *params,
const struct sys_reg_desc *r)
@@ -287,16 +291,55 @@ static bool trap_loregion(struct kvm_vcpu *vcpu,
return trap_raz_wi(vcpu, p, r);
}
+static bool trap_oslar_el1(struct kvm_vcpu *vcpu,
+ struct sys_reg_params *p,
+ const struct sys_reg_desc *r)
+{
+ u64 oslsr;
+
+ if (!p->is_write)
+ return read_from_write_only(vcpu, p, r);
+
+ /* Forward the OSLK bit to OSLSR */
+ oslsr = __vcpu_sys_reg(vcpu, OSLSR_EL1) & ~SYS_OSLSR_OSLK;
+ if (p->regval & SYS_OSLAR_OSLK)
+ oslsr |= SYS_OSLSR_OSLK;
+
+ __vcpu_sys_reg(vcpu, OSLSR_EL1) = oslsr;
+ return true;
+}
+
static bool trap_oslsr_el1(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
- if (p->is_write) {
- return ignore_write(vcpu, p);
- } else {
- p->regval = (1 << 3);
- return true;
- }
+ if (p->is_write)
+ return write_to_read_only(vcpu, p, r);
+
+ p->regval = __vcpu_sys_reg(vcpu, r->reg);
+ return true;
+}
+
+static int set_oslsr_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
+ const struct kvm_one_reg *reg, void __user *uaddr)
+{
+ u64 id = sys_reg_to_index(rd);
+ u64 val;
+ int err;
+
+ err = reg_from_user(&val, uaddr, id);
+ if (err)
+ return err;
+
+ /*
+ * The only modifiable bit is the OSLK bit. Refuse the write if
+ * userspace attempts to change any other bit in the register.
+ */
+ if ((val ^ rd->val) & ~SYS_OSLSR_OSLK)
+ return -EINVAL;
+
+ __vcpu_sys_reg(vcpu, rd->reg) = val;
+ return 0;
}
static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
@@ -1164,10 +1207,6 @@ static bool access_raz_id_reg(struct kvm_vcpu *vcpu,
return __access_id_reg(vcpu, p, r, true);
}
-static int reg_from_user(u64 *val, const void __user *uaddr, u64 id);
-static int reg_to_user(void __user *uaddr, const u64 *val, u64 id);
-static u64 sys_reg_to_index(const struct sys_reg_desc *reg);
-
/* Visibility overrides for SVE-specific control registers */
static unsigned int sve_visibility(const struct kvm_vcpu *vcpu,
const struct sys_reg_desc *rd)
@@ -1418,9 +1457,9 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
* Debug handling: We do trap most, if not all debug related system
* registers. The implementation is good enough to ensure that a guest
* can use these with minimal performance degradation. The drawback is
- * that we don't implement any of the external debug, none of the
- * OSlock protocol. This should be revisited if we ever encounter a
- * more demanding guest...
+ * that we don't implement any of the external debug architecture.
+ * This should be revisited if we ever encounter a more demanding
+ * guest...
*/
static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
@@ -1447,8 +1486,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
DBG_BCR_BVR_WCR_WVR_EL1(15),
{ SYS_DESC(SYS_MDRAR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_OSLAR_EL1), trap_raz_wi },
- { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1 },
+ { SYS_DESC(SYS_OSLAR_EL1), trap_oslar_el1 },
+ { SYS_DESC(SYS_OSLSR_EL1), trap_oslsr_el1, reset_val, OSLSR_EL1,
+ SYS_OSLSR_OSLM_IMPLEMENTED, .set_user = set_oslsr_el1, },
{ SYS_DESC(SYS_OSDLR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGPRCR_EL1), trap_raz_wi },
{ SYS_DESC(SYS_DBGCLAIMSET_EL1), trap_raz_wi },
@@ -1920,10 +1960,10 @@ static const struct sys_reg_desc cp14_regs[] = {
DBGBXVR(0),
/* DBGOSLAR */
- { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_raz_wi },
+ { Op1( 0), CRn( 1), CRm( 0), Op2( 4), trap_oslar_el1 },
DBGBXVR(1),
/* DBGOSLSR */
- { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1 },
+ { Op1( 0), CRn( 1), CRm( 1), Op2( 4), trap_oslsr_el1, NULL, OSLSR_EL1 },
DBGBXVR(2),
DBGBXVR(3),
/* DBGOSDLR */
diff --git a/arch/arm64/kvm/vmid.c b/arch/arm64/kvm/vmid.c
new file mode 100644
index 000000000000..8d5f0506fd87
--- /dev/null
+++ b/arch/arm64/kvm/vmid.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VMID allocator.
+ *
+ * Based on Arm64 ASID allocator algorithm.
+ * Please refer arch/arm64/mm/context.c for detailed
+ * comments on algorithm.
+ *
+ * Copyright (C) 2002-2003 Deep Blue Solutions Ltd, all rights reserved.
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+unsigned int kvm_arm_vmid_bits;
+static DEFINE_RAW_SPINLOCK(cpu_vmid_lock);
+
+static atomic64_t vmid_generation;
+static unsigned long *vmid_map;
+
+static DEFINE_PER_CPU(atomic64_t, active_vmids);
+static DEFINE_PER_CPU(u64, reserved_vmids);
+
+#define VMID_MASK (~GENMASK(kvm_arm_vmid_bits - 1, 0))
+#define VMID_FIRST_VERSION (1UL << kvm_arm_vmid_bits)
+
+#define NUM_USER_VMIDS VMID_FIRST_VERSION
+#define vmid2idx(vmid) ((vmid) & ~VMID_MASK)
+#define idx2vmid(idx) vmid2idx(idx)
+
+/*
+ * As vmid #0 is always reserved, we will never allocate one
+ * as below and can be treated as invalid. This is used to
+ * set the active_vmids on vCPU schedule out.
+ */
+#define VMID_ACTIVE_INVALID VMID_FIRST_VERSION
+
+#define vmid_gen_match(vmid) \
+ (!(((vmid) ^ atomic64_read(&vmid_generation)) >> kvm_arm_vmid_bits))
+
+static void flush_context(void)
+{
+ int cpu;
+ u64 vmid;
+
+ bitmap_clear(vmid_map, 0, NUM_USER_VMIDS);
+
+ for_each_possible_cpu(cpu) {
+ vmid = atomic64_xchg_relaxed(&per_cpu(active_vmids, cpu), 0);
+
+ /* Preserve reserved VMID */
+ if (vmid == 0)
+ vmid = per_cpu(reserved_vmids, cpu);
+ __set_bit(vmid2idx(vmid), vmid_map);
+ per_cpu(reserved_vmids, cpu) = vmid;
+ }
+
+ /*
+ * Unlike ASID allocator, we expect less frequent rollover in
+ * case of VMIDs. Hence, instead of marking the CPU as
+ * flush_pending and issuing a local context invalidation on
+ * the next context-switch, we broadcast TLB flush + I-cache
+ * invalidation over the inner shareable domain on rollover.
+ */
+ kvm_call_hyp(__kvm_flush_vm_context);
+}
+
+static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
+{
+ int cpu;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved VMIDs looking for a match
+ * and update to use newvmid (i.e. the same VMID in the current
+ * generation).
+ */
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(reserved_vmids, cpu) == vmid) {
+ hit = true;
+ per_cpu(reserved_vmids, cpu) = newvmid;
+ }
+ }
+
+ return hit;
+}
+
+static u64 new_vmid(struct kvm_vmid *kvm_vmid)
+{
+ static u32 cur_idx = 1;
+ u64 vmid = atomic64_read(&kvm_vmid->id);
+ u64 generation = atomic64_read(&vmid_generation);
+
+ if (vmid != 0) {
+ u64 newvmid = generation | (vmid & ~VMID_MASK);
+
+ if (check_update_reserved_vmid(vmid, newvmid)) {
+ atomic64_set(&kvm_vmid->id, newvmid);
+ return newvmid;
+ }
+
+ if (!__test_and_set_bit(vmid2idx(vmid), vmid_map)) {
+ atomic64_set(&kvm_vmid->id, newvmid);
+ return newvmid;
+ }
+ }
+
+ vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, cur_idx);
+ if (vmid != NUM_USER_VMIDS)
+ goto set_vmid;
+
+ /* We're out of VMIDs, so increment the global generation count */
+ generation = atomic64_add_return_relaxed(VMID_FIRST_VERSION,
+ &vmid_generation);
+ flush_context();
+
+ /* We have more VMIDs than CPUs, so this will always succeed */
+ vmid = find_next_zero_bit(vmid_map, NUM_USER_VMIDS, 1);
+
+set_vmid:
+ __set_bit(vmid, vmid_map);
+ cur_idx = vmid;
+ vmid = idx2vmid(vmid) | generation;
+ atomic64_set(&kvm_vmid->id, vmid);
+ return vmid;
+}
+
+/* Called from vCPU sched out with preemption disabled */
+void kvm_arm_vmid_clear_active(void)
+{
+ atomic64_set(this_cpu_ptr(&active_vmids), VMID_ACTIVE_INVALID);
+}
+
+void kvm_arm_vmid_update(struct kvm_vmid *kvm_vmid)
+{
+ unsigned long flags;
+ u64 vmid, old_active_vmid;
+
+ vmid = atomic64_read(&kvm_vmid->id);
+
+ /*
+ * Please refer comments in check_and_switch_context() in
+ * arch/arm64/mm/context.c.
+ *
+ * Unlike ASID allocator, we set the active_vmids to
+ * VMID_ACTIVE_INVALID on vCPU schedule out to avoid
+ * reserving the VMID space needlessly on rollover.
+ * Hence explicitly check here for a "!= 0" to
+ * handle the sync with a concurrent rollover.
+ */
+ old_active_vmid = atomic64_read(this_cpu_ptr(&active_vmids));
+ if (old_active_vmid != 0 && vmid_gen_match(vmid) &&
+ 0 != atomic64_cmpxchg_relaxed(this_cpu_ptr(&active_vmids),
+ old_active_vmid, vmid))
+ return;
+
+ raw_spin_lock_irqsave(&cpu_vmid_lock, flags);
+
+ /* Check that our VMID belongs to the current generation. */
+ vmid = atomic64_read(&kvm_vmid->id);
+ if (!vmid_gen_match(vmid))
+ vmid = new_vmid(kvm_vmid);
+
+ atomic64_set(this_cpu_ptr(&active_vmids), vmid);
+ raw_spin_unlock_irqrestore(&cpu_vmid_lock, flags);
+}
+
+/*
+ * Initialize the VMID allocator
+ */
+int kvm_arm_vmid_alloc_init(void)
+{
+ kvm_arm_vmid_bits = kvm_get_vmid_bits();
+
+ /*
+ * Expect allocation after rollover to fail if we don't have
+ * at least one more VMID than CPUs. VMID #0 is always reserved.
+ */
+ WARN_ON(NUM_USER_VMIDS - 1 <= num_possible_cpus());
+ atomic64_set(&vmid_generation, VMID_FIRST_VERSION);
+ vmid_map = kcalloc(BITS_TO_LONGS(NUM_USER_VMIDS),
+ sizeof(*vmid_map), GFP_KERNEL);
+ if (!vmid_map)
+ return -ENOMEM;
+
+ return 0;
+}
+
+void kvm_arm_vmid_alloc_free(void)
+{
+ kfree(vmid_map);
+}
diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h
index 5b58bd2fe088..297645edcaff 100644
--- a/include/kvm/arm_psci.h
+++ b/include/kvm/arm_psci.h
@@ -16,11 +16,7 @@
#define KVM_ARM_PSCI_LATEST KVM_ARM_PSCI_1_0
-/*
- * We need the KVM pointer independently from the vcpu as we can call
- * this from HYP, and need to apply kern_hyp_va on it...
- */
-static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm)
+static inline int kvm_psci_version(struct kvm_vcpu *vcpu)
{
/*
* Our PSCI implementation stays the same across versions from
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index ea189d83abf7..63b2178210c4 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -23,7 +23,7 @@
#define SPSR_D (1 << 9)
#define SPSR_SS (1 << 21)
-extern unsigned char sw_bp, hw_bp, bp_svc, bp_brk, hw_wp, ss_start;
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start;
static volatile uint64_t sw_bp_addr, hw_bp_addr;
static volatile uint64_t wp_addr, wp_data_addr;
static volatile uint64_t svc_addr;
@@ -47,6 +47,14 @@ static void reset_debug_state(void)
isb();
}
+static void enable_os_lock(void)
+{
+ write_sysreg(1, oslar_el1);
+ isb();
+
+ GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
+}
+
static void install_wp(uint64_t addr)
{
uint32_t wcr;
@@ -99,6 +107,7 @@ static void guest_code(void)
GUEST_SYNC(0);
/* Software-breakpoint */
+ reset_debug_state();
asm volatile("sw_bp: brk #0");
GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
@@ -152,6 +161,51 @@ static void guest_code(void)
GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+ GUEST_SYNC(6);
+
+ /* OS Lock does not block software-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ sw_bp_addr = 0;
+ asm volatile("sw_bp2: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
+
+ GUEST_SYNC(7);
+
+ /* OS Lock blocking hardware-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ install_hw_bp(PC(hw_bp2));
+ hw_bp_addr = 0;
+ asm volatile("hw_bp2: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, 0);
+
+ GUEST_SYNC(8);
+
+ /* OS Lock blocking watchpoint */
+ reset_debug_state();
+ enable_os_lock();
+ write_data = '\0';
+ wp_data_addr = 0;
+ install_wp(PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, 0);
+
+ GUEST_SYNC(9);
+
+ /* OS Lock blocking single-step */
+ reset_debug_state();
+ enable_os_lock();
+ ss_addr[0] = 0;
+ install_ss();
+ ss_idx = 0;
+ asm volatile("mrs x0, esr_el1\n\t"
+ "add x0, x0, #1\n\t"
+ "msr daifset, #8\n\t"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], 0);
+
GUEST_DONE();
}
@@ -223,7 +277,7 @@ int main(int argc, char *argv[])
vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
ESR_EC_SVC64, guest_svc_handler);
- for (stage = 0; stage < 7; stage++) {
+ for (stage = 0; stage < 11; stage++) {
vcpu_run(vm, VCPU_ID);
switch (get_ucall(vm, VCPU_ID, &uc)) {
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index f769fc6cd927..f12147c43464 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -760,6 +760,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(2, 0, 0, 15, 5),
ARM64_SYS_REG(2, 0, 0, 15, 6),
ARM64_SYS_REG(2, 0, 0, 15, 7),
+ ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */
ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index e6c7d7f8fbd1..f0230711fbe9 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -306,7 +306,8 @@ static void guest_restore_active(struct test_args *args,
uint32_t prio, intid, ap1r;
int i;
- /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ /*
+ * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
* in descending order, so intid+1 can preempt intid.
*/
for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
@@ -315,7 +316,8 @@ static void guest_restore_active(struct test_args *args,
gic_set_priority(intid, prio);
}
- /* In a real migration, KVM would restore all GIC state before running
+ /*
+ * In a real migration, KVM would restore all GIC state before running
* guest code.
*/
for (i = 0; i < num; i++) {
@@ -472,10 +474,10 @@ static void test_restore_active(struct test_args *args, struct kvm_inject_desc *
guest_restore_active(args, MIN_SPI, 4, f->cmd);
}
-static void guest_code(struct test_args args)
+static void guest_code(struct test_args *args)
{
- uint32_t i, nr_irqs = args.nr_irqs;
- bool level_sensitive = args.level_sensitive;
+ uint32_t i, nr_irqs = args->nr_irqs;
+ bool level_sensitive = args->level_sensitive;
struct kvm_inject_desc *f, *inject_fns;
gic_init(GIC_V3, 1, dist, redist);
@@ -484,11 +486,11 @@ static void guest_code(struct test_args args)
gic_irq_enable(i);
for (i = MIN_SPI; i < nr_irqs; i++)
- gic_irq_set_config(i, !args.level_sensitive);
+ gic_irq_set_config(i, !level_sensitive);
- gic_set_eoi_split(args.eoi_split);
+ gic_set_eoi_split(args->eoi_split);
- reset_priorities(&args);
+ reset_priorities(args);
gic_set_priority_mask(CPU_PRIO_MASK);
inject_fns = level_sensitive ? inject_level_fns
@@ -497,17 +499,18 @@ static void guest_code(struct test_args args)
local_irq_enable();
/* Start the tests. */
- for_each_supported_inject_fn(&args, inject_fns, f) {
- test_injection(&args, f);
- test_preemption(&args, f);
- test_injection_failure(&args, f);
+ for_each_supported_inject_fn(args, inject_fns, f) {
+ test_injection(args, f);
+ test_preemption(args, f);
+ test_injection_failure(args, f);
}
- /* Restore the active state of IRQs. This would happen when live
+ /*
+ * Restore the active state of IRQs. This would happen when live
* migrating IRQs in the middle of being handled.
*/
- for_each_supported_activate_fn(&args, set_active_fns, f)
- test_restore_active(&args, f);
+ for_each_supported_activate_fn(args, set_active_fns, f)
+ test_restore_active(args, f);
GUEST_DONE();
}
@@ -573,8 +576,8 @@ static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
kvm_gsi_routing_write(vm, routing);
} else {
ret = _kvm_gsi_routing_write(vm, routing);
- /* The kernel only checks for KVM_IRQCHIP_NUM_PINS. */
- if (intid >= KVM_IRQCHIP_NUM_PINS)
+ /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
+ if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
TEST_ASSERT(ret != 0 && errno == EINVAL,
"Bad intid %u did not cause KVM_SET_GSI_ROUTING "
"error: rc: %i errno: %i", intid, ret, errno);
@@ -739,6 +742,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
int gic_fd;
struct kvm_vm *vm;
struct kvm_inject_args inject_args;
+ vm_vaddr_t args_gva;
struct test_args args = {
.nr_irqs = nr_irqs,
@@ -757,7 +761,9 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
vcpu_init_descriptor_tables(vm, VCPU_ID);
/* Setup the guest args page (so it gets the args). */
- vcpu_args_set(vm, 0, 1, args);
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vm, 0, 1, args_gva);
gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
GICD_BASE_GPA, GICR_BASE_GPA);
@@ -837,7 +843,8 @@ int main(int argc, char **argv)
}
}
- /* If the user just specified nr_irqs and/or gic_version, then run all
+ /*
+ * If the user just specified nr_irqs and/or gic_version, then run all
* combinations.
*/
if (default_args) {
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 1954b964d1cf..b501338d9430 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -18,6 +18,12 @@
#include "test_util.h"
#include "perf_test_util.h"
#include "guest_modes.h"
+#ifdef __aarch64__
+#include "aarch64/vgic.h"
+
+#define GICD_BASE_GPA 0x8000000ULL
+#define GICR_BASE_GPA 0x80A0000ULL
+#endif
/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
#define TEST_HOST_LOOP_N 2UL
@@ -200,6 +206,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
vm_enable_cap(vm, &cap);
}
+#ifdef __aarch64__
+ vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+#endif
+
/* Start the iterations */
iteration = 0;
host_quit = false;
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index 00f613c0583c..263bf3ed8fd5 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -19,7 +19,7 @@ struct gicv3_data {
unsigned int nr_spis;
};
-#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
#define DIST_BIT (1U << 31)
enum gicv3_intid_range {
@@ -105,7 +105,8 @@ static void gicv3_set_eoi_split(bool split)
{
uint32_t val;
- /* All other fields are read-only, so no need to read CTLR first. In
+ /*
+ * All other fields are read-only, so no need to read CTLR first. In
* fact, the kernel does the same.
*/
val = split ? (1U << 1) : 0;
@@ -159,9 +160,10 @@ static void gicv3_access_reg(uint32_t intid, uint64_t offset,
uint32_t cpu_or_dist;
GUEST_ASSERT(bits_per_field <= reg_bits);
- GUEST_ASSERT(*val < (1U << bits_per_field));
- /* Some registers like IROUTER are 64 bit long. Those are currently not
- * supported by readl nor writel, so just asserting here until then.
+ GUEST_ASSERT(!write || *val < (1U << bits_per_field));
+ /*
+ * This function does not support 64 bit accesses. Just asserting here
+ * until we implement readq/writeq.
*/
GUEST_ASSERT(reg_bits == 32);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index b3a0fca0d780..f365c32a7296 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -138,9 +138,6 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
uint64_t val;
bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
- /* Check that the addr part of the attr is within 32 bits. */
- assert(attr <= KVM_DEV_ARM_VGIC_OFFSET_MASK);
-
uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
: KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
@@ -150,7 +147,11 @@ static void vgic_poke_irq(int gic_fd, uint32_t intid,
attr += SZ_64K;
}
- /* All calls will succeed, even with invalid intid's, as long as the
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
+ /*
+ * All calls will succeed, even with invalid intid's, as long as the
* addr part of the attr is within 32 bits (checked above). An invalid
* intid will just make the read/writes point to above the intended
* register space (i.e., ICPENDR after ISPENDR).