summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2019-12-18 17:47:38 +0100
committerPaolo Bonzini <pbonzini@redhat.com>2019-12-18 17:47:38 +0100
commitf5d5f5fae4605b66fd17e3c46feb14c6c3372e5c (patch)
tree04fa761fb1d7adbbecfec06914d6702428c08b3c
parent8715f05269bfbc6453e25e80825d781a82902f8e (diff)
parent6d674e28f642e3ff676fbae2d8d1b872814d32b6 (diff)
downloadlinux-f5d5f5fae4605b66fd17e3c46feb14c6c3372e5c.tar.gz
linux-f5d5f5fae4605b66fd17e3c46feb14c6c3372e5c.tar.bz2
linux-f5d5f5fae4605b66fd17e3c46feb14c6c3372e5c.zip
Merge tag 'kvmarm-fixes-5.5-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-master
KVM/arm fixes for .5.5, take #1 - Fix uninitialised sysreg accessor - Fix handling of demand-paged device mappings - Stop spamming the console on IMPDEF sysregs - Relax mappings of writable memslots - Assorted cleanups
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/virt/kvm/api.txt55
-rw-r--r--Documentation/virt/kvm/arm/pvtime.rst80
-rw-r--r--Documentation/virt/kvm/devices/vcpu.txt14
-rw-r--r--arch/arm/include/asm/kvm_arm.h1
-rw-r--r--arch/arm/include/asm/kvm_emulate.h9
-rw-r--r--arch/arm/include/asm/kvm_host.h33
-rw-r--r--arch/arm/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm/kvm/Makefile2
-rw-r--r--arch/arm/kvm/guest.c14
-rw-r--r--arch/arm/kvm/handle_exit.c2
-rw-r--r--arch/arm/mm/proc-v7-bugs.c21
-rw-r--r--arch/arm64/include/asm/kvm_arm.h3
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h26
-rw-r--r--arch/arm64/include/asm/kvm_host.h37
-rw-r--r--arch/arm64/include/asm/paravirt.h9
-rw-r--r--arch/arm64/include/asm/pvclock-abi.h17
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h5
-rw-r--r--arch/arm64/kernel/cpu_errata.c102
-rw-r--r--arch/arm64/kernel/paravirt.c140
-rw-r--r--arch/arm64/kernel/sdei.c3
-rw-r--r--arch/arm64/kernel/time.c3
-rw-r--r--arch/arm64/kvm/Kconfig4
-rw-r--r--arch/arm64/kvm/Makefile2
-rw-r--r--arch/arm64/kvm/guest.c23
-rw-r--r--arch/arm64/kvm/handle_exit.c4
-rw-r--r--arch/arm64/kvm/inject_fault.c4
-rw-r--r--arch/arm64/kvm/sys_regs.c25
-rw-r--r--arch/arm64/kvm/sys_regs.h17
-rw-r--r--drivers/firmware/arm_sdei.c12
-rw-r--r--drivers/firmware/psci/psci.c24
-rw-r--r--drivers/irqchip/irq-gic-v4.c7
-rw-r--r--include/Kbuild2
-rw-r--r--include/kvm/arm_hypercalls.h43
-rw-r--r--include/kvm/arm_psci.h2
-rw-r--r--include/kvm/arm_vgic.h8
-rw-r--r--include/linux/arm-smccc.h75
-rw-r--r--include/linux/arm_sdei.h6
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/irqchip/arm-gic-v4.h4
-rw-r--r--include/linux/kvm_host.h26
-rw-r--r--include/linux/kvm_types.h2
-rw-r--r--include/linux/psci.h9
-rw-r--r--include/uapi/linux/kvm.h10
-rw-r--r--virt/kvm/arm/arch_timer.c8
-rw-r--r--virt/kvm/arm/arm.c53
-rw-r--r--virt/kvm/arm/hypercalls.c71
-rw-r--r--virt/kvm/arm/mmio.c9
-rw-r--r--virt/kvm/arm/mmu.c30
-rw-r--r--virt/kvm/arm/psci.c84
-rw-r--r--virt/kvm/arm/pvtime.c131
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c21
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c12
-rw-r--r--virt/kvm/arm/vgic/vgic-v4.c59
-rw-r--r--virt/kvm/arm/vgic/vgic.c4
-rw-r--r--virt/kvm/arm/vgic/vgic.h2
-rw-r--r--virt/kvm/kvm_main.c6
58 files changed, 1073 insertions, 315 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8dee8f68fe15..9b847f06cb19 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3110,9 +3110,9 @@
[X86,PV_OPS] Disable paravirtualized VMware scheduler
clock and use the default one.
- no-steal-acc [X86,KVM] Disable paravirtualized steal time accounting.
- steal time is computed, but won't influence scheduler
- behaviour
+ no-steal-acc [X86,KVM,ARM64] Disable paravirtualized steal time
+ accounting. steal time is computed, but won't
+ influence scheduler behaviour
nolapic [X86-32,APIC] Do not enable or use the local APIC.
diff --git a/Documentation/virt/kvm/api.txt b/Documentation/virt/kvm/api.txt
index 4833904d32a5..bd29d44af32b 100644
--- a/Documentation/virt/kvm/api.txt
+++ b/Documentation/virt/kvm/api.txt
@@ -1002,12 +1002,18 @@ Specifying exception.has_esr on a system that does not support it will return
-EINVAL. Setting anything other than the lower 24bits of exception.serror_esr
will return -EINVAL.
+It is not possible to read back a pending external abort (injected via
+KVM_SET_VCPU_EVENTS or otherwise) because such an exception is always delivered
+directly to the virtual CPU).
+
+
struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
+ __u8 ext_dabt_pending;
/* Align it to 8 bytes */
- __u8 pad[6];
+ __u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
@@ -1051,9 +1057,23 @@ contain a valid state and shall be written into the VCPU.
ARM/ARM64:
+User space may need to inject several types of events to the guest.
+
Set the pending SError exception state for this VCPU. It is not possible to
'cancel' an Serror that has been made pending.
+If the guest performed an access to I/O memory which could not be handled by
+userspace, for example because of missing instruction syndrome decode
+information or because there is no device mapped at the accessed IPA, then
+userspace can ask the kernel to inject an external abort using the address
+from the exiting fault on the VCPU. It is a programming error to set
+ext_dabt_pending after an exit which was not either KVM_EXIT_MMIO or
+KVM_EXIT_ARM_NISV. This feature is only available if the system supports
+KVM_CAP_ARM_INJECT_EXT_DABT. This is a helper which provides commonality in
+how userspace reports accesses for the above cases to guests, across different
+userspace implementations. Nevertheless, userspace can still emulate all Arm
+exceptions by manipulating individual registers using the KVM_SET_ONE_REG API.
+
See KVM_GET_VCPU_EVENTS for the data structure.
@@ -4468,6 +4488,39 @@ Hyper-V SynIC state change. Notification is used to remap SynIC
event/message pages and to enable/disable SynIC messages/events processing
in userspace.
+ /* KVM_EXIT_ARM_NISV */
+ struct {
+ __u64 esr_iss;
+ __u64 fault_ipa;
+ } arm_nisv;
+
+Used on arm and arm64 systems. If a guest accesses memory not in a memslot,
+KVM will typically return to userspace and ask it to do MMIO emulation on its
+behalf. However, for certain classes of instructions, no instruction decode
+(direction, length of memory access) is provided, and fetching and decoding
+the instruction from the VM is overly complicated to live in the kernel.
+
+Historically, when this situation occurred, KVM would print a warning and kill
+the VM. KVM assumed that if the guest accessed non-memslot memory, it was
+trying to do I/O, which just couldn't be emulated, and the warning message was
+phrased accordingly. However, what happened more often was that a guest bug
+caused access outside the guest memory areas which should lead to a more
+meaningful warning message and an external abort in the guest, if the access
+did not fall within an I/O window.
+
+Userspace implementations can query for KVM_CAP_ARM_NISV_TO_USER, and enable
+this capability at VM creation. Once this is done, these types of errors will
+instead return to userspace with KVM_EXIT_ARM_NISV, with the valid bits from
+the HSR (arm) and ESR_EL2 (arm64) in the esr_iss field, and the faulting IPA
+in the fault_ipa field. Userspace can either fix up the access if it's
+actually an I/O access by decoding the instruction from guest memory (if it's
+very brave) and continue executing the guest, or it can decide to suspend,
+dump, or restart the guest.
+
+Note that KVM does not skip the faulting instruction as it does for
+KVM_EXIT_MMIO, but userspace has to emulate any change to the processing state
+if it decides to decode and emulate the instruction.
+
/* Fix the size of the union. */
char padding[256];
};
diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst
new file mode 100644
index 000000000000..2357dd2d8655
--- /dev/null
+++ b/Documentation/virt/kvm/arm/pvtime.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Paravirtualized time support for arm64
+======================================
+
+Arm specification DEN0057/A defines a standard for paravirtualised time
+support for AArch64 guests:
+
+https://developer.arm.com/docs/den0057/a
+
+KVM/arm64 implements the stolen time part of this specification by providing
+some hypervisor service calls to support a paravirtualized guest obtaining a
+view of the amount of time stolen from its execution.
+
+Two new SMCCC compatible hypercalls are defined:
+
+* PV_TIME_FEATURES: 0xC5000020
+* PV_TIME_ST: 0xC5000021
+
+These are only available in the SMC64/HVC64 calling convention as
+paravirtualized time is not available to 32 bit Arm guests. The existence of
+the PV_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES
+mechanism before calling it.
+
+PV_TIME_FEATURES
+ ============= ======== ==========
+ Function ID: (uint32) 0xC5000020
+ PV_call_id: (uint32) The function to query for support.
+ Currently only PV_TIME_ST is supported.
+ Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant
+ PV-time feature is supported by the hypervisor.
+ ============= ======== ==========
+
+PV_TIME_ST
+ ============= ======== ==========
+ Function ID: (uint32) 0xC5000021
+ Return value: (int64) IPA of the stolen time data structure for this
+ VCPU. On failure:
+ NOT_SUPPORTED (-1)
+ ============= ======== ==========
+
+The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory
+with inner and outer write back caching attributes, in the inner shareable
+domain. A total of 16 bytes from the IPA returned are guaranteed to be
+meaningfully filled by the hypervisor (see structure below).
+
+PV_TIME_ST returns the structure for the calling VCPU.
+
+Stolen Time
+-----------
+
+The structure pointed to by the PV_TIME_ST hypercall is as follows:
+
++-------------+-------------+-------------+----------------------------+
+| Field | Byte Length | Byte Offset | Description |
++=============+=============+=============+============================+
+| Revision | 4 | 0 | Must be 0 for version 1.0 |
++-------------+-------------+-------------+----------------------------+
+| Attributes | 4 | 4 | Must be 0 |
++-------------+-------------+-------------+----------------------------+
+| Stolen time | 8 | 8 | Stolen time in unsigned |
+| | | | nanoseconds indicating how |
+| | | | much time this VCPU thread |
+| | | | was involuntarily not |
+| | | | running on a physical CPU. |
++-------------+-------------+-------------+----------------------------+
+
+All values in the structure are stored little-endian.
+
+The structure will be updated by the hypervisor prior to scheduling a VCPU. It
+will be present within a reserved region of the normal memory given to the
+guest. The guest should not attempt to write into this memory. There is a
+structure per VCPU of the guest.
+
+It is advisable that one or more 64k pages are set aside for the purpose of
+these structures and not used for other purposes, this enables the guest to map
+the region using 64k pages and avoids conflicting attributes with other memory.
+
+For the user space interface see Documentation/virt/kvm/devices/vcpu.txt
+section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL".
diff --git a/Documentation/virt/kvm/devices/vcpu.txt b/Documentation/virt/kvm/devices/vcpu.txt
index 2b5dab16c4f2..6f3bd64a05b0 100644
--- a/Documentation/virt/kvm/devices/vcpu.txt
+++ b/Documentation/virt/kvm/devices/vcpu.txt
@@ -60,3 +60,17 @@ time to use the number provided for a given timer, overwriting any previously
configured values on other VCPUs. Userspace should configure the interrupt
numbers on at least one VCPU after creating all VCPUs and before running any
VCPUs.
+
+3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
+Architectures: ARM64
+
+3.1 ATTRIBUTE: KVM_ARM_VCPU_PVTIME_IPA
+Parameters: 64-bit base address
+Returns: -ENXIO: Stolen time not implemented
+ -EEXIST: Base address already set for this VCPU
+ -EINVAL: Base address not 64 byte aligned
+
+Specifies the base address of the stolen time structure for this VCPU. The
+base address must be 64 byte aligned and exist within a valid guest memory
+region. See Documentation/virt/kvm/arm/pvtime.txt for more information
+including the layout of the stolen time structure.
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 0125aa059d5b..9c04bd810d07 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -162,6 +162,7 @@
#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT)
#define HSR_SRT_SHIFT (16)
#define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT)
+#define HSR_CM (1 << 8)
#define HSR_FSC (0x3f)
#define HSR_FSC_TYPE (0x3c)
#define HSR_SSE (1 << 21)
diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index 40002416efec..9b118516d2db 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h
@@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu->arch.hcr;
}
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr &= ~HCR_TWE;
}
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr |= HCR_TWE;
}
@@ -167,6 +167,11 @@ static inline bool kvm_vcpu_dabt_isvalid(struct kvm_vcpu *vcpu)
return kvm_vcpu_get_hsr(vcpu) & HSR_ISV;
}
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_hsr(vcpu) & (HSR_CM | HSR_WNR | HSR_FSC);
+}
+
static inline bool kvm_vcpu_dabt_iswrite(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_get_hsr(vcpu) & HSR_WNR;
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a37c8e89777..556cd818eccf 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -7,6 +7,7 @@
#ifndef __ARM_KVM_HOST_H__
#define __ARM_KVM_HOST_H__
+#include <linux/arm-smccc.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kvm_types.h>
@@ -38,6 +39,7 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
@@ -76,6 +78,14 @@ struct kvm_arch {
/* Mandated version of PSCI */
u32 psci_version;
+
+ /*
+ * If we encounter a data abort without valid instruction syndrome
+ * information, report this to user space. User space can (and
+ * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+ * supported.
+ */
+ bool return_nisv_io_abort_to_user;
};
#define KVM_NR_MEM_OBJS 40
@@ -323,6 +333,29 @@ static inline int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+static inline long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+ return SMCCC_RET_NOT_SUPPORTED;
+}
+
+static inline gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+ return GPA_INVALID;
+}
+
+static inline void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+ return false;
+}
+
void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 2769360f195c..03cd7c19a683 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -131,8 +131,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
+ __u8 ext_dabt_pending;
/* Align it to 8 bytes */
- __u8 pad[6];
+ __u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index b76b75bd9e00..e442d82821df 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -24,7 +24,7 @@ obj-y += kvm-arm.o init.o interrupts.o
obj-y += handle_exit.o guest.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o coproc_a7.o vgic-v3-coproc.o
obj-y += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
-obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+obj-y += $(KVM)/arm/psci.o $(KVM)/arm/perf.o $(KVM)/arm/hypercalls.o
obj-y += $(KVM)/arm/aarch32.o
obj-y += $(KVM)/arm/vgic/vgic.o
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 684cf64b4033..0e6f23504c26 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -21,6 +21,10 @@
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
struct kvm_stats_debugfs_item debugfs_entries[] = {
+ VCPU_STAT(halt_successful_poll),
+ VCPU_STAT(halt_attempted_poll),
+ VCPU_STAT(halt_poll_invalid),
+ VCPU_STAT(halt_wakeup),
VCPU_STAT(hvc_exit_stat),
VCPU_STAT(wfe_exit_stat),
VCPU_STAT(wfi_exit_stat),
@@ -255,6 +259,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
{
events->exception.serror_pending = !!(*vcpu_hcr(vcpu) & HCR_VA);
+ /*
+ * We never return a pending ext_dabt here because we deliver it to
+ * the virtual CPU directly when setting the event and it's no longer
+ * 'pending' at this point.
+ */
+
return 0;
}
@@ -263,12 +273,16 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
+ bool ext_dabt_pending = events->exception.ext_dabt_pending;
if (serror_pending && has_esr)
return -EINVAL;
else if (serror_pending)
kvm_inject_vabt(vcpu);
+ if (ext_dabt_pending)
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
return 0;
}
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 2a6a1394d26e..e58a89d2f13f 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -9,7 +9,7 @@
#include <asm/kvm_emulate.h>
#include <asm/kvm_coproc.h>
#include <asm/kvm_mmu.h>
-#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
#include <trace/events/kvm.h>
#include "trace.h"
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 9a07916af8dd..7c90b4c615a5 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/arm-smccc.h>
#include <linux/kernel.h>
-#include <linux/psci.h>
#include <linux/smp.h>
#include <asm/cp15.h>
@@ -75,26 +74,20 @@ static void cpu_v7_spectre_init(void)
case ARM_CPU_PART_CORTEX_A72: {
struct arm_smccc_res res;
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
- break;
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+ if ((int)res.a0 != 0)
+ return;
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if ((int)res.a0 != 0)
- break;
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
per_cpu(harden_branch_predictor_fn, cpu) =
call_hvc_arch_workaround_1;
cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
spectre_v2_method = "hypervisor";
break;
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if ((int)res.a0 != 0)
- break;
+ case SMCCC_CONDUIT_SMC:
per_cpu(harden_branch_predictor_fn, cpu) =
call_smc_arch_workaround_1;
cpu_do_switch_mm = cpu_v7_smc_switch_mm;
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ddf9d762ac62..6e5d839f42b5 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -61,7 +61,6 @@
* RW: 64bit by default, can be overridden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
- * TVM: Trap VM ops (until M+C set in SCTLR_EL1)
* TSW: Trap cache operations by set/way
* TWE: Trap WFE
* TWI: Trap WFI
@@ -74,7 +73,7 @@
* SWIO: Turn set/way invalidates into set/way clean+invalidate
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
- HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
+ HCR_BSU_IS | HCR_FB | HCR_TAC | \
HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \
HCR_FMO | HCR_IMO)
#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index d69c1efc63e7..5efe5ca8fecf 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -53,8 +53,18 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
/* trap error record accesses */
vcpu->arch.hcr_el2 |= HCR_TERR;
}
- if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB))
+
+ if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) {
vcpu->arch.hcr_el2 |= HCR_FWB;
+ } else {
+ /*
+ * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
+ * get set in SCTLR_EL1 such that we can detect when the guest
+ * MMU gets turned on and do the necessary cache maintenance
+ * then.
+ */
+ vcpu->arch.hcr_el2 |= HCR_TVM;
+ }
if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features))
vcpu->arch.hcr_el2 &= ~HCR_RW;
@@ -77,14 +87,19 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
return (unsigned long *)&vcpu->arch.hcr_el2;
}
-static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 &= ~HCR_TWE;
+ if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count))
+ vcpu->arch.hcr_el2 &= ~HCR_TWI;
+ else
+ vcpu->arch.hcr_el2 |= HCR_TWI;
}
-static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu)
+static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu)
{
vcpu->arch.hcr_el2 |= HCR_TWE;
+ vcpu->arch.hcr_el2 |= HCR_TWI;
}
static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu)
@@ -258,6 +273,11 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu)
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV);
}
+static inline unsigned long kvm_vcpu_dabt_iss_nisv_sanitized(const struct kvm_vcpu *vcpu)
+{
+ return kvm_vcpu_get_hsr(vcpu) & (ESR_ELx_CM | ESR_ELx_WNR | ESR_ELx_FSC);
+}
+
static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu)
{
return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f656169db8c3..b36dae9ee5f9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,6 +44,7 @@
KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1)
#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2)
+#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3)
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
@@ -83,6 +84,14 @@ struct kvm_arch {
/* Mandated version of PSCI */
u32 psci_version;
+
+ /*
+ * If we encounter a data abort without valid instruction syndrome
+ * information, report this to user space. User space can (and
+ * should) opt in to this feature if KVM_CAP_ARM_NISV_TO_USER is
+ * supported.
+ */
+ bool return_nisv_io_abort_to_user;
};
#define KVM_NR_MEM_OBJS 40
@@ -338,6 +347,13 @@ struct kvm_vcpu_arch {
/* True when deferrable sysregs are loaded on the physical CPU,
* see kvm_vcpu_load_sysregs and kvm_vcpu_put_sysregs. */
bool sysregs_loaded_on_cpu;
+
+ /* Guest PV state */
+ struct {
+ u64 steal;
+ u64 last_steal;
+ gpa_t base;
+ } steal;
};
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
@@ -478,6 +494,27 @@ void handle_exit_early(struct kvm_vcpu *vcpu, struct kvm_run *run,
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu);
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu);
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu);
+
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr);
+
+static inline void kvm_arm_pvtime_vcpu_init(struct kvm_vcpu_arch *vcpu_arch)
+{
+ vcpu_arch->steal.base = GPA_INVALID;
+}
+
+static inline bool kvm_arm_is_pvtime_enabled(struct kvm_vcpu_arch *vcpu_arch)
+{
+ return (vcpu_arch->steal.base != GPA_INVALID);
+}
+
void kvm_set_sei_esr(struct kvm_vcpu *vcpu, u64 syndrome);
struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
diff --git a/arch/arm64/include/asm/paravirt.h b/arch/arm64/include/asm/paravirt.h
index 799d9dd6f7cc..cf3a0fd7c1a7 100644
--- a/arch/arm64/include/asm/paravirt.h
+++ b/arch/arm64/include/asm/paravirt.h
@@ -21,6 +21,13 @@ static inline u64 paravirt_steal_clock(int cpu)
{
return pv_ops.time.steal_clock(cpu);
}
-#endif
+
+int __init pv_time_init(void);
+
+#else
+
+#define pv_time_init() do {} while (0)
+
+#endif // CONFIG_PARAVIRT
#endif
diff --git a/arch/arm64/include/asm/pvclock-abi.h b/arch/arm64/include/asm/pvclock-abi.h
new file mode 100644
index 000000000000..c4f1c0a0789c
--- /dev/null
+++ b/arch/arm64/include/asm/pvclock-abi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Arm Ltd. */
+
+#ifndef __ASM_PVCLOCK_ABI_H
+#define __ASM_PVCLOCK_ABI_H
+
+/* The below structure is defined in ARM DEN0057A */
+
+struct pvclock_vcpu_stolen_time {
+ __le32 revision;
+ __le32 attributes;
+ __le64 stolen_time;
+ /* Structure must be 64 byte aligned, pad to that size */
+ u8 padding[48];
+} __packed;
+
+#endif
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 67c21f9bdbad..820e5751ada7 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -164,8 +164,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
+ __u8 ext_dabt_pending;
/* Align it to 8 bytes */
- __u8 pad[6];
+ __u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
@@ -323,6 +324,8 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_PVTIME_CTRL 2
+#define KVM_ARM_VCPU_PVTIME_IPA 0
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 93f34b4eca25..90e8aa63af25 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -6,7 +6,6 @@
*/
#include <linux/arm-smccc.h>
-#include <linux/psci.h>
#include <linux/types.h>
#include <linux/cpu.h>
#include <asm/cpu.h>
@@ -167,9 +166,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
}
#endif /* CONFIG_KVM_INDIRECT_VECTORS */
-#include <uapi/linux/psci.h>
#include <linux/arm-smccc.h>
-#include <linux/psci.h>
static void call_smc_arch_workaround_1(void)
{
@@ -213,43 +210,31 @@ static int detect_harden_bp_fw(void)
struct arm_smccc_res res;
u32 midr = read_cpuid_id();
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+ switch ((int)res.a0) {
+ case 1:
+ /* Firmware says we're just fine */
+ return 0;
+ case 0:
+ break;
+ default:
return -1;
+ }
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- switch ((int)res.a0) {
- case 1:
- /* Firmware says we're just fine */
- return 0;
- case 0:
- cb = call_hvc_arch_workaround_1;
- /* This is a guest, no need to patch KVM vectors */
- smccc_start = NULL;
- smccc_end = NULL;
- break;
- default:
- return -1;
- }
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
+ cb = call_hvc_arch_workaround_1;
+ /* This is a guest, no need to patch KVM vectors */
+ smccc_start = NULL;
+ smccc_end = NULL;
break;
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- switch ((int)res.a0) {
- case 1:
- /* Firmware says we're just fine */
- return 0;
- case 0:
- cb = call_smc_arch_workaround_1;
- smccc_start = __smccc_workaround_1_smc_start;
- smccc_end = __smccc_workaround_1_smc_end;
- break;
- default:
- return -1;
- }
+ case SMCCC_CONDUIT_SMC:
+ cb = call_smc_arch_workaround_1;
+ smccc_start = __smccc_workaround_1_smc_start;
+ smccc_end = __smccc_workaround_1_smc_end;
break;
default:
@@ -309,11 +294,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt,
BUG_ON(nr_inst != 1);
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
+ switch (arm_smccc_1_1_get_conduit()) {
+ case SMCCC_CONDUIT_HVC:
insn = aarch64_insn_get_hvc_value();
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
insn = aarch64_insn_get_smc_value();
break;
default:
@@ -339,6 +324,8 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
void arm64_set_ssbd_mitigation(bool state)
{
+ int conduit;
+
if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
pr_info_once("SSBD disabled by kernel configuration\n");
return;
@@ -352,19 +339,10 @@ void arm64_set_ssbd_mitigation(bool state)
return;
}
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
- break;
-
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL);
- break;
+ conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_WORKAROUND_2, state,
+ NULL);
- default:
- WARN_ON_ONCE(1);
- break;
- }
+ WARN_ON_ONCE(conduit == SMCCC_CONDUIT_NONE);
}
static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
@@ -374,6 +352,7 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
bool required = true;
s32 val;
bool this_cpu_safe = false;
+ int conduit;
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
@@ -391,25 +370,10 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
goto out_printmsg;
}
- if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
- ssbd_state = ARM64_SSBD_UNKNOWN;
- if (!this_cpu_safe)
- __ssb_safe = false;
- return false;
- }
-
- switch (psci_ops.conduit) {
- case PSCI_CONDUIT_HVC:
- arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- break;
+ conduit = arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- case PSCI_CONDUIT_SMC:
- arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
- ARM_SMCCC_ARCH_WORKAROUND_2, &res);
- break;
-
- default:
+ if (conduit == SMCCC_CONDUIT_NONE) {
ssbd_state = ARM64_SSBD_UNKNOWN;
if (!this_cpu_safe)
__ssb_safe = false;
diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c
index 4cfed91fe256..1ef702b0be2d 100644
--- a/arch/arm64/kernel/paravirt.c
+++ b/arch/arm64/kernel/paravirt.c
@@ -6,13 +6,153 @@
* Author: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
*/
+#define pr_fmt(fmt) "arm-pv: " fmt
+
+#include <linux/arm-smccc.h>
+#include <linux/cpuhotplug.h>
#include <linux/export.h>
+#include <linux/io.h>
#include <linux/jump_label.h>
+#include <linux/printk.h>
+#include <linux/psci.h>
+#include <linux/reboot.h>
+#include <linux/slab.h>
#include <linux/types.h>
+
#include <asm/paravirt.h>
+#include <asm/pvclock-abi.h>
+#include <asm/smp_plat.h>
struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;
struct paravirt_patch_template pv_ops;
EXPORT_SYMBOL_GPL(pv_ops);
+
+struct pv_time_stolen_time_region {
+ struct pvclock_vcpu_stolen_time *kaddr;
+};
+
+static DEFINE_PER_CPU(struct pv_time_stolen_time_region, stolen_time_region);
+
+static bool steal_acc = true;
+static int __init parse_no_stealacc(char *arg)
+{
+ steal_acc = false;
+ return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
+/* return stolen time in ns by asking the hypervisor */
+static u64 pv_steal_clock(int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+
+ reg = per_cpu_ptr(&stolen_time_region, cpu);
+ if (!reg->kaddr) {
+ pr_warn_once("stolen time enabled but not configured for cpu %d\n",
+ cpu);
+ return 0;
+ }
+
+ return le64_to_cpu(READ_ONCE(reg->kaddr->stolen_time));
+}
+
+static int stolen_time_dying_cpu(unsigned int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+
+ reg = this_cpu_ptr(&stolen_time_region);
+ if (!reg->kaddr)
+ return 0;
+
+ memunmap(reg->kaddr);
+ memset(reg, 0, sizeof(*reg));
+
+ return 0;
+}
+
+static int init_stolen_time_cpu(unsigned int cpu)
+{
+ struct pv_time_stolen_time_region *reg;
+ struct arm_smccc_res res;
+
+ reg = this_cpu_ptr(&stolen_time_region);
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+ if (res.a0 == SMCCC_RET_NOT_SUPPORTED)
+ return -EINVAL;
+
+ reg->kaddr = memremap(res.a0,
+ sizeof(struct pvclock_vcpu_stolen_time),
+ MEMREMAP_WB);
+
+ if (!reg->kaddr) {
+ pr_warn("Failed to map stolen time data structure\n");
+ return -ENOMEM;
+ }
+
+ if (le32_to_cpu(reg->kaddr->revision) != 0 ||
+ le32_to_cpu(reg->kaddr->attributes) != 0) {
+ pr_warn_once("Unexpected revision or attributes in stolen time data\n");
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int pv_time_init_stolen_time(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ARM_KVMPV_STARTING,
+ "hypervisor/arm/pvtime:starting",
+ init_stolen_time_cpu, stolen_time_dying_cpu);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+static bool has_pv_steal_clock(void)
+{
+ struct arm_smccc_res res;
+
+ /* To detect the presence of PV time support we require SMCCC 1.1+ */
+ if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+ return false;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+ ARM_SMCCC_HV_PV_TIME_FEATURES, &res);
+
+ if (res.a0 != SMCCC_RET_SUCCESS)
+ return false;
+
+ arm_smccc_1_1_invoke(ARM_SMCCC_HV_PV_TIME_FEATURES,
+ ARM_SMCCC_HV_PV_TIME_ST, &res);
+
+ return (res.a0 == SMCCC_RET_SUCCESS);
+}
+
+int __init pv_time_init(void)
+{
+ int ret;
+
+ if (!has_pv_steal_clock())
+ return 0;
+
+ ret = pv_time_init_stolen_time();
+ if (ret)
+ return ret;
+
+ pv_ops.time.steal_clock = pv_steal_clock;
+
+ static_key_slow_inc(&paravirt_steal_enabled);
+ if (steal_acc)
+ static_key_slow_inc(&paravirt_steal_rq_enabled);
+
+ pr_info("using stolen time PV\n");
+
+ return 0;
+}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index ea94cf8f9dc6..d6259dac62b6 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -2,6 +2,7 @@
// Copyright (C) 2017 Arm Ltd.
#define pr_fmt(fmt) "sdei: " fmt
+#include <linux/arm-smccc.h>
#include <linux/arm_sdei.h>
#include <linux/hardirq.h>
#include <linux/irqflags.h>
@@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}
- sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
+ sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
if (arm64_kernel_unmapped_at_el0()) {
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 0b2946414dc9..73f06d4b3aae 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -30,6 +30,7 @@
#include <asm/thread_info.h>
#include <asm/stacktrace.h>
+#include <asm/paravirt.h>
unsigned long profile_pc(struct pt_regs *regs)
{
@@ -65,4 +66,6 @@ void __init time_init(void)
/* Calibrate the delay loop directly */
lpj_fine = arch_timer_rate / HZ;
+
+ pv_time_init();
}
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a67121d419a2..a475c68cbfec 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -21,6 +21,8 @@ if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on OF
+ # for TASKSTATS/TASK_DELAY_ACCT:
+ depends on NET && MULTIUSER
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -39,6 +41,8 @@ config KVM
select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_VCPU_RUN_PID_CHANGE
+ select TASKSTATS
+ select TASK_DELAY_ACCT
---help---
Support hosting virtualized guest machines.
We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 3ac1a64d2fb9..5ffbdc39e780 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -13,6 +13,8 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o
kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index dfd626447482..2fff06114a8f 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -34,6 +34,10 @@
#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
struct kvm_stats_debugfs_item debugfs_entries[] = {
+ VCPU_STAT(halt_successful_poll),
+ VCPU_STAT(halt_attempted_poll),
+ VCPU_STAT(halt_poll_invalid),
+ VCPU_STAT(halt_wakeup),
VCPU_STAT(hvc_exit_stat),
VCPU_STAT(wfe_exit_stat),
VCPU_STAT(wfi_exit_stat),
@@ -712,6 +716,12 @@ int __kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu,
if (events->exception.serror_pending && events->exception.serror_has_esr)
events->exception.serror_esr = vcpu_get_vsesr(vcpu);
+ /*
+ * We never return a pending ext_dabt here because we deliver it to
+ * the virtual CPU directly when setting the event and it's no longer
+ * 'pending' at this point.
+ */
+
return 0;
}
@@ -720,6 +730,7 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
{
bool serror_pending = events->exception.serror_pending;
bool has_esr = events->exception.serror_has_esr;
+ bool ext_dabt_pending = events->exception.ext_dabt_pending;
if (serror_pending && has_esr) {
if (!cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
@@ -733,6 +744,9 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
kvm_inject_vabt(vcpu);
}
+ if (ext_dabt_pending)
+ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+
return 0;
}
@@ -858,6 +872,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_set_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_set_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -878,6 +895,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_get_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_get_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
@@ -898,6 +918,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
case KVM_ARM_VCPU_TIMER_CTRL:
ret = kvm_arm_timer_has_attr(vcpu, attr);
break;
+ case KVM_ARM_VCPU_PVTIME_CTRL:
+ ret = kvm_arm_pvtime_has_attr(vcpu, attr);
+ break;
default:
ret = -ENXIO;
break;
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 706cca23f0d2..aacfc55de44c 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -11,8 +11,6 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
-#include <kvm/arm_psci.h>
-
#include <asm/esr.h>
#include <asm/exception.h>
#include <asm/kvm_asm.h>
@@ -22,6 +20,8 @@
#include <asm/debug-monitors.h>
#include <asm/traps.h>
+#include <kvm/arm_hypercalls.h>
+
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index a9d25a305af5..ccdb6a051ab2 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -109,7 +109,7 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
/**
* kvm_inject_dabt - inject a data abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the data abort
* @addr: The address to report in the DFAR
*
* It is assumed that this code is called from the VCPU thread and that the
@@ -125,7 +125,7 @@ void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr)
/**
* kvm_inject_pabt - inject a prefetch abort into the guest
- * @vcpu: The VCPU to receive the undefined exception
+ * @vcpu: The VCPU to receive the prefetch abort
* @addr: The address to report in the DFAR
*
* It is assumed that this code is called from the VCPU thread and that the
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 46822afc57e0..9f2165937f7d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2098,9 +2098,9 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
WARN_ON(1);
}
- kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n",
- cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
- print_sys_reg_instr(params);
+ print_sys_reg_msg(params,
+ "Unsupported guest CP%d access at: %08lx [%08lx]\n",
+ cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
@@ -2233,6 +2233,12 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
NULL, 0);
}
+static bool is_imp_def_sys_reg(struct sys_reg_params *params)
+{
+ // See ARM DDI 0487E.a, section D12.3.2
+ return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
+}
+
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
@@ -2248,10 +2254,12 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
if (likely(r)) {
perform_access(vcpu, params, r);
+ } else if (is_imp_def_sys_reg(params)) {
+ kvm_inject_undefined(vcpu);
} else {
- kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n",
- *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
- print_sys_reg_instr(params);
+ print_sys_reg_msg(params,
+ "Unsupported guest sys_reg access at: %lx [%08lx]\n",
+ *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
return 1;
@@ -2360,8 +2368,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
return NULL;
+ if (!index_to_params(id, &params))
+ return NULL;
+
table = get_target_table(vcpu->arch.target, true, &num);
- r = find_reg_by_id(id, &params, table, num);
+ r = find_reg(&params, table, num);
if (!r)
r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 9bca0312d798..5a6fc30f5989 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -62,11 +62,24 @@ struct sys_reg_desc {
#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */
#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */
-static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+static __printf(2, 3)
+inline void print_sys_reg_msg(const struct sys_reg_params *p,
+ char *fmt, ...)
{
+ va_list va;
+
+ va_start(va, fmt);
/* Look, we even formatted it for you to paste into the table! */
- kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ &(struct va_format){ fmt, &va },
p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+ va_end(va);
+}
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+ /* GCC warns on an empty format string */
+ print_sys_reg_msg(p, "%s", "");
}
static inline bool ignore_write(struct kvm_vcpu *vcpu,
diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c
index 9cd70d1a5622..a479023fa036 100644
--- a/drivers/firmware/arm_sdei.c
+++ b/drivers/firmware/arm_sdei.c
@@ -967,29 +967,29 @@ static int sdei_get_conduit(struct platform_device *pdev)
if (np) {
if (of_property_read_string(np, "method", &method)) {
pr_warn("missing \"method\" property\n");
- return CONDUIT_INVALID;
+ return SMCCC_CONDUIT_NONE;
}
if (!strcmp("hvc", method)) {
sdei_firmware_call = &sdei_smccc_hvc;
- return CONDUIT_HVC;
+ return SMCCC_CONDUIT_HVC;
} else if (!strcmp("smc", method)) {
sdei_firmware_call = &sdei_smccc_smc;
- return CONDUIT_SMC;
+ return SMCCC_CONDUIT_SMC;
}
pr_warn("invalid \"method\" property: %s\n", method);
} else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) {
if (acpi_psci_use_hvc()) {
sdei_firmware_call = &sdei_smccc_hvc;
- return CONDUIT_HVC;
+ return SMCCC_CONDUIT_HVC;
} else {
sdei_firmware_call = &sdei_smccc_smc;
- return CONDUIT_SMC;
+ return SMCCC_CONDUIT_SMC;
}
}
- return CONDUIT_INVALID;
+ return SMCCC_CONDUIT_NONE;
}
static int sdei_probe(struct platform_device *pdev)
diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c
index 84f4ff351c62..b3b6c15e7b36 100644
--- a/drivers/firmware/psci/psci.c
+++ b/drivers/firmware/psci/psci.c
@@ -53,10 +53,18 @@ bool psci_tos_resident_on(int cpu)
}
struct psci_operations psci_ops = {
- .conduit = PSCI_CONDUIT_NONE,
+ .conduit = SMCCC_CONDUIT_NONE,
.smccc_version = SMCCC_VERSION_1_0,
};
+enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void)
+{
+ if (psci_ops.smccc_version < SMCCC_VERSION_1_1)
+ return SMCCC_CONDUIT_NONE;
+
+ return psci_ops.conduit;
+}
+
typedef unsigned long (psci_fn)(unsigned long, unsigned long,
unsigned long, unsigned long);
static psci_fn *invoke_psci_fn;
@@ -212,13 +220,13 @@ static unsigned long psci_migrate_info_up_cpu(void)
0, 0, 0);
}
-static void set_conduit(enum psci_conduit conduit)
+static void set_conduit(enum arm_smccc_conduit conduit)
{
switch (conduit) {
- case PSCI_CONDUIT_HVC:
+ case SMCCC_CONDUIT_HVC:
invoke_psci_fn = __invoke_psci_fn_hvc;
break;
- case PSCI_CONDUIT_SMC:
+ case SMCCC_CONDUIT_SMC:
invoke_psci_fn = __invoke_psci_fn_smc;
break;
default:
@@ -240,9 +248,9 @@ static int get_set_conduit_method(struct device_node *np)
}
if (!strcmp("hvc", method)) {
- set_conduit(PSCI_CONDUIT_HVC);
+ set_conduit(SMCCC_CONDUIT_HVC);
} else if (!strcmp("smc", method)) {
- set_conduit(PSCI_CONDUIT_SMC);
+ set_conduit(SMCCC_CONDUIT_SMC);
} else {
pr_warn("invalid \"method\" property: %s\n", method);
return -EINVAL;
@@ -583,9 +591,9 @@ int __init psci_acpi_init(void)
pr_info("probing for conduit method from ACPI.\n");
if (acpi_psci_use_hvc())
- set_conduit(PSCI_CONDUIT_HVC);
+ set_conduit(SMCCC_CONDUIT_HVC);
else
- set_conduit(PSCI_CONDUIT_SMC);
+ set_conduit(SMCCC_CONDUIT_SMC);
return psci_probe();
}
diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c
index 563e87ed0766..45969927cc81 100644
--- a/drivers/irqchip/irq-gic-v4.c
+++ b/drivers/irqchip/irq-gic-v4.c
@@ -141,12 +141,17 @@ static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info)
int its_schedule_vpe(struct its_vpe *vpe, bool on)
{
struct its_cmd_info info;
+ int ret;
WARN_ON(preemptible());
info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE;
- return its_send_vpe_cmd(vpe, &info);
+ ret = its_send_vpe_cmd(vpe, &info);
+ if (!ret)
+ vpe->resident = on;
+
+ return ret;
}
int its_invall_vpe(struct its_vpe *vpe)
diff --git a/include/Kbuild b/include/Kbuild
index ffba79483cc5..e8154f8bcac5 100644
--- a/include/Kbuild
+++ b/include/Kbuild
@@ -67,6 +67,8 @@ header-test- += keys/big_key-type.h
header-test- += keys/request_key_auth-type.h
header-test- += keys/trusted.h
header-test- += kvm/arm_arch_timer.h
+header-test-$(CONFIG_ARM) += kvm/arm_hypercalls.h
+header-test-$(CONFIG_ARM64) += kvm/arm_hypercalls.h
header-test- += kvm/arm_pmu.h
header-test-$(CONFIG_ARM) += kvm/arm_psci.h
header-test-$(CONFIG_ARM64) += kvm/arm_psci.h
diff --git a/include/kvm/arm_hypercalls.h b/include/kvm/arm_hypercalls.h
new file mode 100644
index 000000000000..0e2509d27910
--- /dev/null
+++ b/include/kvm/arm_hypercalls.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Arm Ltd. */
+
+#ifndef __KVM_ARM_HYPERCALLS_H
+#define __KVM_ARM_HYPERCALLS_H
+
+#include <asm/kvm_emulate.h>
+
+int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
+
+static inline u32 smccc_get_function(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 0);
+}
+
+static inline unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 1);
+}
+
+static inline unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 2);
+}
+
+static inline unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
+{
+ return vcpu_get_reg(vcpu, 3);
+}
+
+static inline void smccc_set_retval(struct kvm_vcpu *vcpu,
+ unsigned long a0,
+ unsigned long a1,
+ unsigned long a2,
+ unsigned long a3)
+{
+ vcpu_set_reg(vcpu, 0, a0);
+ vcpu_set_reg(vcpu, 1, a1);
+ vcpu_set_reg(vcpu, 2, a2);
+ vcpu_set_reg(vcpu, 3, a3);
+}
+
+#endif
diff --git a/include/kvm/arm_psci.h b/include/kvm/arm_psci.h
index 632e78bdef4d..5b58bd2fe088 100644
--- a/include/kvm/arm_psci.h
+++ b/include/kvm/arm_psci.h
@@ -40,7 +40,7 @@ static inline int kvm_psci_version(struct kvm_vcpu *vcpu, struct kvm *kvm)
}
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu);
+int kvm_psci_call(struct kvm_vcpu *vcpu);
struct kvm_one_reg;
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index af4f09c02bf1..9d53f545a3d5 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -240,7 +240,7 @@ struct vgic_dist {
* Contains the attributes and gpa of the LPI configuration table.
* Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share
* one address across all redistributors.
- * GICv3 spec: 6.1.2 "LPI Configuration tables"
+ * GICv3 spec: IHI 0069E 6.1.1 "LPI Configuration tables"
*/
u64 propbaser;
@@ -378,8 +378,6 @@ static inline int kvm_vgic_get_max_vcpus(void)
return kvm_vgic_global_state.max_gic_vcpus;
}
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
-
/**
* kvm_vgic_setup_default_irq_routing:
* Setup a default flat gsi routing table mapping all SPIs
@@ -396,7 +394,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
struct kvm_kernel_irq_routing_entry *irq_entry);
-void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
-void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
+int vgic_v4_load(struct kvm_vcpu *vcpu);
+int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
#endif /* __KVM_ARM_VGIC_H */
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index 080012a6f025..59494df0f55b 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -45,6 +45,7 @@
#define ARM_SMCCC_OWNER_SIP 2
#define ARM_SMCCC_OWNER_OEM 3
#define ARM_SMCCC_OWNER_STANDARD 4
+#define ARM_SMCCC_OWNER_STANDARD_HYP 5
#define ARM_SMCCC_OWNER_TRUSTED_APP 48
#define ARM_SMCCC_OWNER_TRUSTED_APP_END 49
#define ARM_SMCCC_OWNER_TRUSTED_OS 50
@@ -80,6 +81,22 @@
#include <linux/linkage.h>
#include <linux/types.h>
+
+enum arm_smccc_conduit {
+ SMCCC_CONDUIT_NONE,
+ SMCCC_CONDUIT_SMC,
+ SMCCC_CONDUIT_HVC,
+};
+
+/**
+ * arm_smccc_1_1_get_conduit()
+ *
+ * Returns the conduit to be used for SMCCCv1.1 or later.
+ *
+ * When SMCCCv1.1 is not present, returns SMCCC_CONDUIT_NONE.
+ */
+enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void);
+
/**
* struct arm_smccc_res - Result from SMC/HVC call
* @a0-a3 result values from registers 0 to 3
@@ -302,5 +319,63 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1,
#define SMCCC_RET_NOT_SUPPORTED -1
#define SMCCC_RET_NOT_REQUIRED -2
+/*
+ * Like arm_smccc_1_1* but always returns SMCCC_RET_NOT_SUPPORTED.
+ * Used when the SMCCC conduit is not defined. The empty asm statement
+ * avoids compiler warnings about unused variables.
+ */
+#define __fail_smccc_1_1(...) \
+ do { \
+ __declare_args(__count_args(__VA_ARGS__), __VA_ARGS__); \
+ asm ("" __constraints(__count_args(__VA_ARGS__))); \
+ if (___res) \
+ ___res->a0 = SMCCC_RET_NOT_SUPPORTED; \
+ } while (0)
+
+/*
+ * arm_smccc_1_1_invoke() - make an SMCCC v1.1 compliant call
+ *
+ * This is a variadic macro taking one to eight source arguments, and
+ * an optional return structure.
+ *
+ * @a0-a7: arguments passed in registers 0 to 7
+ * @res: result values from registers 0 to 3
+ *
+ * This macro will make either an HVC call or an SMC call depending on the
+ * current SMCCC conduit. If no valid conduit is available then -1
+ * (SMCCC_RET_NOT_SUPPORTED) is returned in @res.a0 (if supplied).
+ *
+ * The return value also provides the conduit that was used.
+ */
+#define arm_smccc_1_1_invoke(...) ({ \
+ int method = arm_smccc_1_1_get_conduit(); \
+ switch (method) { \
+ case SMCCC_CONDUIT_HVC: \
+ arm_smccc_1_1_hvc(__VA_ARGS__); \
+ break; \
+ case SMCCC_CONDUIT_SMC: \
+ arm_smccc_1_1_smc(__VA_ARGS__); \
+ break; \
+ default: \
+ __fail_smccc_1_1(__VA_ARGS__); \
+ method = SMCCC_CONDUIT_NONE; \
+ break; \
+ } \
+ method; \
+ })
+
+/* Paravirtualised time calls (defined by ARM DEN0057A) */
+#define ARM_SMCCC_HV_PV_TIME_FEATURES \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_STANDARD_HYP, \
+ 0x20)
+
+#define ARM_SMCCC_HV_PV_TIME_ST \
+ ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \
+ ARM_SMCCC_SMC_64, \
+ ARM_SMCCC_OWNER_STANDARD_HYP, \
+ 0x21)
+
#endif /*__ASSEMBLY__*/
#endif /*__LINUX_ARM_SMCCC_H*/
diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h
index 3305ea7f9dc7..0a241c5c911d 100644
--- a/include/linux/arm_sdei.h
+++ b/include/linux/arm_sdei.h
@@ -5,12 +5,6 @@
#include <uapi/linux/arm_sdei.h>
-enum sdei_conduit_types {
- CONDUIT_INVALID = 0,
- CONDUIT_SMC,
- CONDUIT_HVC,
-};
-
#include <acpi/ghes.h>
#ifdef CONFIG_ARM_SDE_INTERFACE
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 068793a619ca..89d75edb5750 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -136,6 +136,7 @@ enum cpuhp_state {
/* Must be the last timer callback */
CPUHP_AP_DUMMY_TIMER_STARTING,
CPUHP_AP_ARM_XEN_STARTING,
+ CPUHP_AP_ARM_KVMPV_STARTING,
CPUHP_AP_ARM_CORESIGHT_STARTING,
CPUHP_AP_ARM64_ISNDEP_STARTING,
CPUHP_AP_SMPCFD_DYING,
diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h
index e6b155713b47..5dbcfc65f21e 100644
--- a/include/linux/irqchip/arm-gic-v4.h
+++ b/include/linux/irqchip/arm-gic-v4.h
@@ -32,9 +32,13 @@ struct its_vm {
struct its_vpe {
struct page *vpt_page;
struct its_vm *its_vm;
+ /* per-vPE VLPI tracking */
+ atomic_t vlpi_count;
/* Doorbell interrupt */
int irq;
irq_hw_number_t vpe_db_lpi;
+ /* VPE resident */
+ bool resident;
/* VPE proxy mapping */
int vpe_proxy_event;
/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d41c521a39da..a0f4c0f9cf3b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -746,6 +746,28 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
unsigned long len);
int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
gpa_t gpa, unsigned long len);
+
+#define __kvm_put_guest(kvm, gfn, offset, value, type) \
+({ \
+ unsigned long __addr = gfn_to_hva(kvm, gfn); \
+ type __user *__uaddr = (type __user *)(__addr + offset); \
+ int __ret = -EFAULT; \
+ \
+ if (!kvm_is_error_hva(__addr)) \
+ __ret = put_user(value, __uaddr); \
+ if (!__ret) \
+ mark_page_dirty(kvm, gfn); \
+ __ret; \
+})
+
+#define kvm_put_guest(kvm, gpa, value, type) \
+({ \
+ gpa_t __gpa = gpa; \
+ struct kvm *__kvm = kvm; \
+ __kvm_put_guest(__kvm, __gpa >> PAGE_SHIFT, \
+ offset_in_page(__gpa), (value), type); \
+})
+
int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
@@ -1241,7 +1263,7 @@ extern unsigned int halt_poll_ns_grow_start;
extern unsigned int halt_poll_ns_shrink;
struct kvm_device {
- struct kvm_device_ops *ops;
+ const struct kvm_device_ops *ops;
struct kvm *kvm;
void *private;
struct list_head vm_node;
@@ -1294,7 +1316,7 @@ struct kvm_device_ops {
void kvm_device_get(struct kvm_device *dev);
void kvm_device_put(struct kvm_device *dev);
struct kvm_device *kvm_device_from_filp(struct file *filp);
-int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
+int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type);
void kvm_unregister_device_ops(u32 type);
extern struct kvm_device_ops kvm_mpic_ops;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index bde5374ae021..1c88e69db3d9 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -35,6 +35,8 @@ typedef unsigned long gva_t;
typedef u64 gpa_t;
typedef u64 gfn_t;
+#define GPA_INVALID (~(gpa_t)0)
+
typedef unsigned long hva_t;
typedef u64 hpa_t;
typedef u64 hfn_t;
diff --git a/include/linux/psci.h b/include/linux/psci.h
index e2bacc6fd2f2..ebe0a881d13d 100644
--- a/include/linux/psci.h
+++ b/include/linux/psci.h
@@ -7,6 +7,7 @@
#ifndef __LINUX_PSCI_H
#define __LINUX_PSCI_H
+#include <linux/arm-smccc.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -18,12 +19,6 @@ bool psci_tos_resident_on(int cpu);
int psci_cpu_suspend_enter(u32 state);
bool psci_power_state_is_valid(u32 state);
-enum psci_conduit {
- PSCI_CONDUIT_NONE,
- PSCI_CONDUIT_SMC,
- PSCI_CONDUIT_HVC,
-};
-
enum smccc_version {
SMCCC_VERSION_1_0,
SMCCC_VERSION_1_1,
@@ -38,7 +33,7 @@ struct psci_operations {
int (*affinity_info)(unsigned long target_affinity,
unsigned long lowest_affinity_level);
int (*migrate_info_type)(void);
- enum psci_conduit conduit;
+ enum arm_smccc_conduit conduit;
enum smccc_version smccc_version;
};
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 52641d8ca9e8..fb47c0fad6a6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -235,6 +235,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_S390_STSI 25
#define KVM_EXIT_IOAPIC_EOI 26
#define KVM_EXIT_HYPERV 27
+#define KVM_EXIT_ARM_NISV 28
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -394,6 +395,11 @@ struct kvm_run {
} eoi;
/* KVM_EXIT_HYPERV */
struct kvm_hyperv_exit hyperv;
+ /* KVM_EXIT_ARM_NISV */
+ struct {
+ __u64 esr_iss;
+ __u64 fault_ipa;
+ } arm_nisv;
/* Fix the size of the union. */
char padding[256];
};
@@ -1000,6 +1006,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PMU_EVENT_FILTER 173
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
+#define KVM_CAP_ARM_NISV_TO_USER 176
+#define KVM_CAP_ARM_INJECT_EXT_DABT 177
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1227,6 +1235,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS
KVM_DEV_TYPE_XIVE,
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
+ KVM_DEV_TYPE_ARM_PV_TIME,
+#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
KVM_DEV_TYPE_MAX,
};
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index e2bb5bd60227..f182b2380345 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -80,7 +80,7 @@ static inline bool userspace_irqchip(struct kvm *kvm)
static void soft_timer_start(struct hrtimer *hrt, u64 ns)
{
hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_HARD);
}
static void soft_timer_cancel(struct hrtimer *hrt)
@@ -697,11 +697,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
ptimer->cntvoff = 0;
- hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
timer->bg_timer.function = kvm_bg_timer_expire;
- hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
- hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
+ hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
vtimer->hrtimer.function = kvm_hrtimer_expire;
ptimer->hrtimer.function = kvm_hrtimer_expire;
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 86c6aa1cb58e..8de4daf25097 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -40,6 +40,10 @@
#include <asm/kvm_coproc.h>
#include <asm/sections.h>
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_pmu.h>
+#include <kvm/arm_psci.h>
+
#ifdef REQUIRES_VIRT
__asm__(".arch_extension virt");
#endif
@@ -98,6 +102,26 @@ int kvm_arch_check_processor_compat(void)
return 0;
}
+int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+ struct kvm_enable_cap *cap)
+{
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_ARM_NISV_TO_USER:
+ r = 0;
+ kvm->arch.return_nisv_io_abort_to_user = true;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
/**
* kvm_arch_init_vm - initializes a VM data structure
@@ -197,6 +221,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_VCPU_EVENTS:
case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2:
+ case KVM_CAP_ARM_NISV_TO_USER:
+ case KVM_CAP_ARM_INJECT_EXT_DABT:
r = 1;
break;
case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -322,20 +348,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
/*
* If we're about to block (most likely because we've just hit a
* WFI), we need to sync back the state of the GIC CPU interface
- * so that we have the lastest PMR and group enables. This ensures
+ * so that we have the latest PMR and group enables. This ensures
* that kvm_arch_vcpu_runnable has up-to-date data to decide
* whether we have pending interrupts.
+ *
+ * For the same reason, we want to tell GICv4 that we need
+ * doorbells to be signalled, should an interrupt become pending.
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
+ vgic_v4_put(vcpu, true);
preempt_enable();
-
- kvm_vgic_v4_enable_doorbell(vcpu);
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- kvm_vgic_v4_disable_doorbell(vcpu);
+ preempt_disable();
+ vgic_v4_load(vcpu);
+ preempt_enable();
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -351,6 +381,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_arm_reset_debug_ptr(vcpu);
+ kvm_arm_pvtime_vcpu_init(&vcpu->arch);
+
return kvm_vgic_vcpu_init(vcpu);
}
@@ -380,11 +412,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
kvm_vcpu_load_sysregs(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
kvm_vcpu_pmu_restore_guest(vcpu);
+ if (kvm_arm_is_pvtime_enabled(&vcpu->arch))
+ kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu);
if (single_task_running())
- vcpu_clear_wfe_traps(vcpu);
+ vcpu_clear_wfx_traps(vcpu);
else
- vcpu_set_wfe_traps(vcpu);
+ vcpu_set_wfx_traps(vcpu);
vcpu_ptrauth_setup_lazy(vcpu);
}
@@ -645,6 +679,9 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu)
* that a VCPU sees new virtual interrupts.
*/
kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu);
+
+ if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu))
+ kvm_update_stolen_time(vcpu);
}
}
@@ -1315,7 +1352,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
-static void cpu_init_hyp_mode(void *dummy)
+static void cpu_init_hyp_mode(void)
{
phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr;
@@ -1349,7 +1386,7 @@ static void cpu_hyp_reinit(void)
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
else
- cpu_init_hyp_mode(NULL);
+ cpu_init_hyp_mode();
kvm_arm_init_debug();
diff --git a/virt/kvm/arm/hypercalls.c b/virt/kvm/arm/hypercalls.c
new file mode 100644
index 000000000000..550dfa3e53cd
--- /dev/null
+++ b/virt/kvm/arm/hypercalls.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_psci.h>
+
+int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
+{
+ u32 func_id = smccc_get_function(vcpu);
+ long val = SMCCC_RET_NOT_SUPPORTED;
+ u32 feature;
+ gpa_t gpa;
+
+ switch (func_id) {
+ case ARM_SMCCC_VERSION_FUNC_ID:
+ val = ARM_SMCCC_VERSION_1_1;
+ break;
+ case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
+ feature = smccc_get_arg1(vcpu);
+ switch (feature) {
+ case ARM_SMCCC_ARCH_WORKAROUND_1:
+ switch (kvm_arm_harden_branch_predictor()) {
+ case KVM_BP_HARDEN_UNKNOWN:
+ break;
+ case KVM_BP_HARDEN_WA_NEEDED:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_BP_HARDEN_NOT_REQUIRED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
+ break;
+ case ARM_SMCCC_ARCH_WORKAROUND_2:
+ switch (kvm_arm_have_ssbd()) {
+ case KVM_SSBD_FORCE_DISABLE:
+ case KVM_SSBD_UNKNOWN:
+ break;
+ case KVM_SSBD_KERNEL:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ case KVM_SSBD_FORCE_ENABLE:
+ case KVM_SSBD_MITIGATED:
+ val = SMCCC_RET_NOT_REQUIRED;
+ break;
+ }
+ break;
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ }
+ break;
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ val = kvm_hypercall_pv_features(vcpu);
+ break;
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ gpa = kvm_init_stolen_time(vcpu);
+ if (gpa != GPA_INVALID)
+ val = gpa;
+ break;
+ default:
+ return kvm_psci_call(vcpu);
+ }
+
+ smccc_set_retval(vcpu, val, 0, 0, 0);
+ return 1;
+}
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index 6af5c91337f2..70d3b449692c 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -167,7 +167,14 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
if (ret)
return ret;
} else {
- kvm_err("load/store instruction decoding not implemented\n");
+ if (vcpu->kvm->arch.return_nisv_io_abort_to_user) {
+ run->exit_reason = KVM_EXIT_ARM_NISV;
+ run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu);
+ run->arm_nisv.fault_ipa = fault_ipa;
+ return 0;
+ }
+
+ kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n");
return -ENOSYS;
}
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 38b4c910b6c3..0b32a904a1bb 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -38,6 +38,11 @@ static unsigned long io_map_base;
#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
+static bool is_iomap(unsigned long flags)
+{
+ return flags & KVM_S2PTE_FLAG_IS_IOMAP;
+}
+
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -1698,6 +1703,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_pagesize = vma_kernel_pagesize(vma);
if (logging_active ||
+ (vma->vm_flags & VM_PFNMAP) ||
!fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
force_pte = true;
vma_pagesize = PAGE_SIZE;
@@ -1760,6 +1766,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
writable = false;
}
+ if (exec_fault && is_iomap(flags))
+ return -ENOEXEC;
+
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
@@ -1781,7 +1790,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
kvm_set_pfn_dirty(pfn);
- if (fault_status != FSC_PERM)
+ if (fault_status != FSC_PERM && !is_iomap(flags))
clean_dcache_guest_page(pfn, vma_pagesize);
if (exec_fault)
@@ -1948,9 +1957,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
- goto out_unlock;
+ ret = -ENOEXEC;
+ goto out;
}
/*
@@ -1992,6 +2000,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
+out:
+ if (ret == -ENOEXEC) {
+ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ ret = 1;
+ }
out_unlock:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
@@ -2302,15 +2315,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
break;
/*
- * Mapping a read-only VMA is only allowed if the
- * memory region is configured as read-only.
- */
- if (writable && !(vma->vm_flags & VM_WRITE)) {
- ret = -EPERM;
- break;
- }
-
- /*
* Take the intersection of this VMA with the memory region
*/
vm_start = max(hva, vma->vm_start);
diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c
index 87927f7e1ee7..17e2bdd4b76f 100644
--- a/virt/kvm/arm/psci.c
+++ b/virt/kvm/arm/psci.c
@@ -15,6 +15,7 @@
#include <asm/kvm_host.h>
#include <kvm/arm_psci.h>
+#include <kvm/arm_hypercalls.h>
/*
* This is an implementation of the Power State Coordination Interface
@@ -23,38 +24,6 @@
#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1)
-static u32 smccc_get_function(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 0);
-}
-
-static unsigned long smccc_get_arg1(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 1);
-}
-
-static unsigned long smccc_get_arg2(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 2);
-}
-
-static unsigned long smccc_get_arg3(struct kvm_vcpu *vcpu)
-{
- return vcpu_get_reg(vcpu, 3);
-}
-
-static void smccc_set_retval(struct kvm_vcpu *vcpu,
- unsigned long a0,
- unsigned long a1,
- unsigned long a2,
- unsigned long a3)
-{
- vcpu_set_reg(vcpu, 0, a0);
- vcpu_set_reg(vcpu, 1, a1);
- vcpu_set_reg(vcpu, 2, a2);
- vcpu_set_reg(vcpu, 3, a3);
-}
-
static unsigned long psci_affinity_mask(unsigned long affinity_level)
{
if (affinity_level <= 3)
@@ -373,7 +342,7 @@ static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu)
* Errors:
* -EINVAL: Unrecognized PSCI function
*/
-static int kvm_psci_call(struct kvm_vcpu *vcpu)
+int kvm_psci_call(struct kvm_vcpu *vcpu)
{
switch (kvm_psci_version(vcpu, vcpu->kvm)) {
case KVM_ARM_PSCI_1_0:
@@ -387,55 +356,6 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu)
};
}
-int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
-{
- u32 func_id = smccc_get_function(vcpu);
- u32 val = SMCCC_RET_NOT_SUPPORTED;
- u32 feature;
-
- switch (func_id) {
- case ARM_SMCCC_VERSION_FUNC_ID:
- val = ARM_SMCCC_VERSION_1_1;
- break;
- case ARM_SMCCC_ARCH_FEATURES_FUNC_ID:
- feature = smccc_get_arg1(vcpu);
- switch(feature) {
- case ARM_SMCCC_ARCH_WORKAROUND_1:
- switch (kvm_arm_harden_branch_predictor()) {
- case KVM_BP_HARDEN_UNKNOWN:
- break;
- case KVM_BP_HARDEN_WA_NEEDED:
- val = SMCCC_RET_SUCCESS;
- break;
- case KVM_BP_HARDEN_NOT_REQUIRED:
- val = SMCCC_RET_NOT_REQUIRED;
- break;
- }
- break;
- case ARM_SMCCC_ARCH_WORKAROUND_2:
- switch (kvm_arm_have_ssbd()) {
- case KVM_SSBD_FORCE_DISABLE:
- case KVM_SSBD_UNKNOWN:
- break;
- case KVM_SSBD_KERNEL:
- val = SMCCC_RET_SUCCESS;
- break;
- case KVM_SSBD_FORCE_ENABLE:
- case KVM_SSBD_MITIGATED:
- val = SMCCC_RET_NOT_REQUIRED;
- break;
- }
- break;
- }
- break;
- default:
- return kvm_psci_call(vcpu);
- }
-
- smccc_set_retval(vcpu, val, 0, 0, 0);
- return 1;
-}
-
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
{
return 3; /* PSCI version and two workaround registers */
diff --git a/virt/kvm/arm/pvtime.c b/virt/kvm/arm/pvtime.c
new file mode 100644
index 000000000000..1e0f4c284888
--- /dev/null
+++ b/virt/kvm/arm/pvtime.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Arm Ltd.
+
+#include <linux/arm-smccc.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+#include <asm/pvclock-abi.h>
+
+#include <kvm/arm_hypercalls.h>
+
+void kvm_update_stolen_time(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ u64 steal;
+ __le64 steal_le;
+ u64 offset;
+ int idx;
+ u64 base = vcpu->arch.steal.base;
+
+ if (base == GPA_INVALID)
+ return;
+
+ /* Let's do the local bookkeeping */
+ steal = vcpu->arch.steal.steal;
+ steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal;
+ vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+ vcpu->arch.steal.steal = steal;
+
+ steal_le = cpu_to_le64(steal);
+ idx = srcu_read_lock(&kvm->srcu);
+ offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time);
+ kvm_put_guest(kvm, base + offset, steal_le, u64);
+ srcu_read_unlock(&kvm->srcu, idx);
+}
+
+long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
+{
+ u32 feature = smccc_get_arg1(vcpu);
+ long val = SMCCC_RET_NOT_SUPPORTED;
+
+ switch (feature) {
+ case ARM_SMCCC_HV_PV_TIME_FEATURES:
+ case ARM_SMCCC_HV_PV_TIME_ST:
+ val = SMCCC_RET_SUCCESS;
+ break;
+ }
+
+ return val;
+}
+
+gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu)
+{
+ struct pvclock_vcpu_stolen_time init_values = {};
+ struct kvm *kvm = vcpu->kvm;
+ u64 base = vcpu->arch.steal.base;
+ int idx;
+
+ if (base == GPA_INVALID)
+ return base;
+
+ /*
+ * Start counting stolen time from the time the guest requests
+ * the feature enabled.
+ */
+ vcpu->arch.steal.steal = 0;
+ vcpu->arch.steal.last_steal = current->sched_info.run_delay;
+
+ idx = srcu_read_lock(&kvm->srcu);
+ kvm_write_guest(kvm, base, &init_values, sizeof(init_values));
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ return base;
+}
+
+int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 __user *user = (u64 __user *)attr->addr;
+ struct kvm *kvm = vcpu->kvm;
+ u64 ipa;
+ int ret = 0;
+ int idx;
+
+ if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+ return -ENXIO;
+
+ if (get_user(ipa, user))
+ return -EFAULT;
+ if (!IS_ALIGNED(ipa, 64))
+ return -EINVAL;
+ if (vcpu->arch.steal.base != GPA_INVALID)
+ return -EEXIST;
+
+ /* Check the address is in a valid memslot */
+ idx = srcu_read_lock(&kvm->srcu);
+ if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT)))
+ ret = -EINVAL;
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ if (!ret)
+ vcpu->arch.steal.base = ipa;
+
+ return ret;
+}
+
+int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ u64 __user *user = (u64 __user *)attr->addr;
+ u64 ipa;
+
+ if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA)
+ return -ENXIO;
+
+ ipa = vcpu->arch.steal.base;
+
+ if (put_user(ipa, user))
+ return -EFAULT;
+ return 0;
+}
+
+int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->attr) {
+ case KVM_ARM_VCPU_PVTIME_IPA:
+ return 0;
+ }
+ return -ENXIO;
+}
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index 6f50c429196d..a963b9d766b7 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -70,7 +70,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
- int i, vcpu_lock_idx = -1, ret;
+ int i, ret;
struct kvm_vcpu *vcpu;
if (irqchip_in_kernel(kvm))
@@ -86,17 +86,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run while we create the vgic.
- */
ret = -EBUSY;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!mutex_trylock(&vcpu->mutex))
- goto out_unlock;
- vcpu_lock_idx = i;
- }
+ if (!lock_all_vcpus(kvm))
+ return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.has_run_once)
@@ -125,10 +117,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&vcpu->mutex);
- }
+ unlock_all_vcpus(kvm);
return ret;
}
@@ -177,6 +166,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
break;
default:
kfree(dist->spis);
+ dist->spis = NULL;
return -EINVAL;
}
}
@@ -203,6 +193,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
raw_spin_lock_init(&vgic_cpu->ap_list_lock);
+ atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0);
/*
* Enable and configure all SGIs to be edge-triggered and
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 2be6b66b3856..98c7360d9fb7 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -360,7 +360,10 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
if (ret)
return ret;
+ if (map.vpe)
+ atomic_dec(&map.vpe->vlpi_count);
map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ atomic_inc(&map.vpe->vlpi_count);
ret = its_map_vlpi(irq->host_irq, &map);
}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index 8d69f007dd0c..f45635a6f0ec 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -357,14 +357,14 @@ retry:
}
/**
- * vgic_its_save_pending_tables - Save the pending tables into guest RAM
+ * vgic_v3_save_pending_tables - Save the pending tables into guest RAM
* kvm lock and all vcpu lock must be held
*/
int vgic_v3_save_pending_tables(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- int last_byte_offset = -1;
struct vgic_irq *irq;
+ gpa_t last_ptr = ~(gpa_t)0;
int ret;
u8 val;
@@ -384,11 +384,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
bit_nr = irq->intid % BITS_PER_BYTE;
ptr = pendbase + byte_offset;
- if (byte_offset != last_byte_offset) {
+ if (ptr != last_ptr) {
ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
- last_byte_offset = byte_offset;
+ last_ptr = ptr;
}
stored = val & (1U << bit_nr);
@@ -664,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
if (has_vhe())
__vgic_v3_activate_traps(vcpu);
+
+ WARN_ON(vgic_v4_load(vcpu));
}
void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
@@ -676,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
void vgic_v3_put(struct kvm_vcpu *vcpu)
{
+ WARN_ON(vgic_v4_put(vcpu, false));
+
vgic_v3_vmcr_sync(vcpu);
kvm_call_hyp(__vgic_v3_save_aprs, vcpu);
diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c
index 477af6aebb97..46f875589c47 100644
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
{
struct kvm_vcpu *vcpu = info;
+ /* We got the message, no need to fire again */
+ if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data))
+ disable_irq_nosync(irq);
+
vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
@@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
{
- if (!vgic_supports_direct_msis(vcpu->kvm))
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
+ struct irq_desc *desc = irq_to_desc(vpe->irq);
+
+ if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
+ /*
+ * If blocking, a doorbell is required. Undo the nested
+ * disable_irq() calls...
+ */
+ while (need_db && irqd_irq_disabled(&desc->irq_data))
+ enable_irq(vpe->irq);
+
+ return its_schedule_vpe(vpe, false);
}
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
+int vgic_v4_load(struct kvm_vcpu *vcpu)
{
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
+ struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
int err;
- if (!vgic_supports_direct_msis(vcpu->kvm))
+ if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
return 0;
/*
@@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
* doc in drivers/irqchip/irq-gic-v4.c to understand how this
* turns into a VMOVP command at the ITS level.
*/
- err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
+ err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id()));
if (err)
return err;
- err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
+ /* Disabled the doorbell, as we're about to enter the guest */
+ disable_irq_nosync(vpe->irq);
+
+ err = its_schedule_vpe(vpe, true);
if (err)
return err;
@@ -226,9 +243,7 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
* Now that the VPE is resident, let's get rid of a potential
* doorbell interrupt that would still be pending.
*/
- err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
-
- return err;
+ return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false);
}
static struct vgic_its *vgic_get_its(struct kvm *kvm,
@@ -266,7 +281,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
mutex_lock(&its->its_lock);
- /* Perform then actual DevID/EventID -> LPI translation. */
+ /* Perform the actual DevID/EventID -> LPI translation. */
ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
irq_entry->msi.data, &irq);
if (ret)
@@ -294,6 +309,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
irq->hw = true;
irq->host_irq = virq;
+ atomic_inc(&map.vpe->vlpi_count);
out:
mutex_unlock(&its->its_lock);
@@ -327,6 +343,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
WARN_ON(!(irq->hw && irq->host_irq == virq));
if (irq->hw) {
+ atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count);
irq->hw = false;
ret = its_unmap_vlpi(virq);
}
@@ -335,21 +352,3 @@ out:
mutex_unlock(&its->its_lock);
return ret;
}
-
-void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
-{
- if (vgic_supports_direct_msis(vcpu->kvm)) {
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
- if (irq)
- enable_irq(irq);
- }
-}
-
-void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
-{
- if (vgic_supports_direct_msis(vcpu->kvm)) {
- int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
- if (irq)
- disable_irq(irq);
- }
-}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 45a870cb63f5..99b02ca730a8 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -857,8 +857,6 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
- WARN_ON(vgic_v4_sync_hwstate(vcpu));
-
/* An empty ap_list_head implies used_lrs == 0 */
if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
return;
@@ -882,8 +880,6 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu)
/* Flush our emulation state into the GIC hardware before entering the guest. */
void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
{
- WARN_ON(vgic_v4_flush_hwstate(vcpu));
-
/*
* If there are no virtual interrupts active or pending for this
* VCPU, then there is no work to do and we can bail out without
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 83066a81b16a..c7fefd6b1c80 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -316,7 +316,5 @@ void vgic_its_invalidate_cache(struct kvm *kvm);
bool vgic_supports_direct_msis(struct kvm *kvm);
int vgic_v4_init(struct kvm *kvm);
void vgic_v4_teardown(struct kvm *kvm);
-int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
-int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
#endif
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 13efc291b1c7..fef50464d56a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3115,14 +3115,14 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
return filp->private_data;
}
-static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
+static const struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
#ifdef CONFIG_KVM_MPIC
[KVM_DEV_TYPE_FSL_MPIC_20] = &kvm_mpic_ops,
[KVM_DEV_TYPE_FSL_MPIC_42] = &kvm_mpic_ops,
#endif
};
-int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+int kvm_register_device_ops(const struct kvm_device_ops *ops, u32 type)
{
if (type >= ARRAY_SIZE(kvm_device_ops_table))
return -ENOSPC;
@@ -3143,7 +3143,7 @@ void kvm_unregister_device_ops(u32 type)
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
- struct kvm_device_ops *ops = NULL;
+ const struct kvm_device_ops *ops = NULL;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int type;