summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2021-11-18 13:13:16 -0800
committerJakub Kicinski <kuba@kernel.org>2021-11-18 13:13:16 -0800
commit50fc24944a2a0ef5aab571dcac17f6c5f2613f56 (patch)
tree32477b48cc10073e948eb7b85877dc5a0746749d /arch/x86
parentbb8cecf8ba127abca8ccd102207a59c55fdae515 (diff)
parent8d0112ac6fd001f95aabb084ec2ccaa3637bc344 (diff)
downloadlinux-50fc24944a2a0ef5aab571dcac17f6c5f2613f56.tar.gz
linux-50fc24944a2a0ef5aab571dcac17f6c5f2613f56.tar.bz2
linux-50fc24944a2a0ef5aab571dcac17f6c5f2613f56.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/intel/core.c4
-rw-r--r--arch/x86/events/intel/lbr.c2
-rw-r--r--arch/x86/hyperv/hv_init.c12
-rw-r--r--arch/x86/include/asm/fpu/xcr.h12
-rw-r--r--arch/x86/include/asm/fpu/xstate.h7
-rw-r--r--arch/x86/include/asm/intel-family.h2
-rw-r--r--arch/x86/include/asm/kvm_host.h7
-rw-r--r--arch/x86/include/asm/kvm_para.h12
-rw-r--r--arch/x86/include/asm/mem_encrypt.h4
-rw-r--r--arch/x86/include/asm/paravirt.h6
-rw-r--r--arch/x86/include/asm/paravirt_types.h1
-rw-r--r--arch/x86/include/asm/processor.h5
-rw-r--r--arch/x86/include/asm/set_memory.h1
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/static_call.h1
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h1
-rw-r--r--arch/x86/kernel/cpu/cpuid-deps.c1
-rw-r--r--arch/x86/kernel/cpu/mce/intel.c5
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c20
-rw-r--r--arch/x86/kernel/fpu/xstate.h37
-rw-r--r--arch/x86/kernel/kvm.c109
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/smpboot.c18
-rw-r--r--arch/x86/kernel/static_call.c14
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kvm/cpuid.c93
-rw-r--r--arch/x86/kvm/hyperv.c8
-rw-r--r--arch/x86/kvm/lapic.c23
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu/mmu.c11
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c2
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/pmu.h4
-rw-r--r--arch/x86/kvm/svm/avic.c3
-rw-r--r--arch/x86/kvm/svm/pmu.c5
-rw-r--r--arch/x86/kvm/svm/sev.c317
-rw-r--r--arch/x86/kvm/svm/svm.c14
-rw-r--r--arch/x86/kvm/svm/svm.h30
-rw-r--r--arch/x86/kvm/vmx/nested.c264
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c7
-rw-r--r--arch/x86/kvm/vmx/vmx.c73
-rw-r--r--arch/x86/kvm/vmx/vmx.h43
-rw-r--r--arch/x86/kvm/x86.c157
-rw-r--r--arch/x86/kvm/x86.h12
-rw-r--r--arch/x86/kvm/xen.c22
-rw-r--r--arch/x86/mm/mem_encrypt.c72
-rw-r--r--arch/x86/mm/pat/set_memory.c6
-rw-r--r--arch/x86/xen/smp_pv.c12
48 files changed, 1034 insertions, 433 deletions
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 603964408d2d..42cf01ecdd13 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3048,8 +3048,10 @@ intel_vlbr_constraints(struct perf_event *event)
{
struct event_constraint *c = &vlbr_constraint;
- if (unlikely(constraint_match(c, event->hw.config)))
+ if (unlikely(constraint_match(c, event->hw.config))) {
+ event->hw.flags |= c->flags;
return c;
+ }
return NULL;
}
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 6b72e9b55c69..8043213b75a5 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -265,6 +265,8 @@ void intel_pmu_lbr_reset(void)
cpuc->last_task_ctx = NULL;
cpuc->last_log_id = 0;
+ if (!static_cpu_has(X86_FEATURE_ARCH_LBR) && cpuc->lbr_select)
+ wrmsrl(MSR_LBR_SELECT, 0);
}
/*
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 24f4a06ac46a..96eb7db31c8e 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -177,6 +177,9 @@ void set_hv_tscchange_cb(void (*cb)(void))
return;
}
+ if (!hv_vp_index)
+ return;
+
hv_reenlightenment_cb = cb;
/* Make sure callback is registered before we write to MSRs */
@@ -383,20 +386,13 @@ static void __init hv_get_partition_id(void)
*/
void __init hyperv_init(void)
{
- u64 guest_id, required_msrs;
+ u64 guest_id;
union hv_x64_msr_hypercall_contents hypercall_msr;
int cpuhp;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return;
- /* Absolutely required MSRs */
- required_msrs = HV_MSR_HYPERCALL_AVAILABLE |
- HV_MSR_VP_INDEX_AVAILABLE;
-
- if ((ms_hyperv.features & required_msrs) != required_msrs)
- return;
-
if (hv_common_init())
return;
diff --git a/arch/x86/include/asm/fpu/xcr.h b/arch/x86/include/asm/fpu/xcr.h
index 79f95d3787e2..9656a5bc6fea 100644
--- a/arch/x86/include/asm/fpu/xcr.h
+++ b/arch/x86/include/asm/fpu/xcr.h
@@ -3,6 +3,7 @@
#define _ASM_X86_FPU_XCR_H
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+#define XCR_XFEATURE_IN_USE_MASK 0x00000001
static inline u64 xgetbv(u32 index)
{
@@ -20,4 +21,15 @@ static inline void xsetbv(u32 index, u64 value)
asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}
+/*
+ * Return a mask of xfeatures which are currently being tracked
+ * by the processor as being in the initial configuration.
+ *
+ * Callers should check X86_FEATURE_XGETBV1.
+ */
+static inline u64 xfeatures_in_use(void)
+{
+ return xgetbv(XCR_XFEATURE_IN_USE_MASK);
+}
+
#endif /* _ASM_X86_FPU_XCR_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 0f8b90ab18c9..cd3dd170e23a 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -92,6 +92,13 @@
#define XFEATURE_MASK_FPSTATE (XFEATURE_MASK_USER_RESTORE | \
XFEATURE_MASK_SUPERVISOR_SUPPORTED)
+/*
+ * Features in this mask have space allocated in the signal frame, but may not
+ * have that space initialized when the feature is in its init state.
+ */
+#define XFEATURE_MASK_SIGFRAME_INITOPT (XFEATURE_MASK_XTILE | \
+ XFEATURE_MASK_USER_DYNAMIC)
+
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void __init update_regset_xstate_info(unsigned int size,
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index 27158436f322..5a0bcf8b78d7 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -108,6 +108,8 @@
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
+#define INTEL_FAM6_RAPTOR_LAKE 0xB7
+
/* "Small Core" Processors (Atom) */
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2acf37cc1991..6ac61f85e07b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -38,7 +38,6 @@
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
#define KVM_MAX_VCPUS 1024
-#define KVM_SOFT_MAX_VCPUS 710
/*
* In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
@@ -364,6 +363,7 @@ union kvm_mmu_extended_role {
unsigned int cr4_smap:1;
unsigned int cr4_smep:1;
unsigned int cr4_la57:1;
+ unsigned int efer_lma:1;
};
};
@@ -725,6 +725,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
+ u32 kvm_cpuid_base;
u64 reserved_gpa_bits;
int maxphyaddr;
@@ -748,7 +749,7 @@ struct kvm_vcpu_arch {
u8 preempted;
u64 msr_val;
u64 last_steal;
- struct gfn_to_pfn_cache cache;
+ struct gfn_to_hva_cache cache;
} st;
u64 l1_tsc_offset;
@@ -1034,6 +1035,7 @@ struct kvm_x86_msr_filter {
#define APICV_INHIBIT_REASON_IRQWIN 3
#define APICV_INHIBIT_REASON_PIT_REINJ 4
#define APICV_INHIBIT_REASON_X2APIC 5
+#define APICV_INHIBIT_REASON_BLOCKIRQ 6
struct kvm_arch {
unsigned long n_used_mmu_pages;
@@ -1476,6 +1478,7 @@ struct kvm_x86_ops {
int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
+ int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
int (*get_msr_feature)(struct kvm_msr_entry *entry);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 69299878b200..56935ebb1dfe 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -83,6 +83,18 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
return ret;
}
+static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+ long ret;
+
+ asm volatile("vmmcall"
+ : "=a"(ret)
+ : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
+ : "memory");
+ return ret;
+}
+
#ifdef CONFIG_KVM_GUEST
void kvmclock_init(void);
void kvmclock_disable(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index 2d4f5c17d79c..e2c6f433ed10 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -44,6 +44,8 @@ void __init sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages,
+ bool enc);
void __init mem_encrypt_free_decrypted_mem(void);
@@ -78,6 +80,8 @@ static inline int __init
early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; }
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+static inline void __init
+early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {}
static inline void mem_encrypt_free_decrypted_mem(void) { }
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index cebec95a7124..21c4a694ca11 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -97,6 +97,12 @@ static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
PVOP_VCALL1(mmu.exit_mmap, mm);
}
+static inline void notify_page_enc_status_changed(unsigned long pfn,
+ int npages, bool enc)
+{
+ PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
+}
+
#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index fc1151e77569..a69012e1903f 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -168,6 +168,7 @@ struct pv_mmu_ops {
/* Hook for intercepting the destruction of an mm_struct. */
void (*exit_mmap)(struct mm_struct *mm);
+ void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
#ifdef CONFIG_PARAVIRT_XXL
struct paravirt_callee_save read_cr2;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 191878a65c61..355d38c0cf60 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -806,11 +806,14 @@ static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
#endif
+#define for_each_possible_hypervisor_cpuid_base(function) \
+ for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
{
uint32_t base, eax, signature[3];
- for (base = 0x40000000; base < 0x40010000; base += 0x100) {
+ for_each_possible_hypervisor_cpuid_base(base) {
cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
if (!memcmp(sig, signature, 12) &&
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 43fa081a1adb..872617542bbc 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -83,6 +83,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc);
extern int kernel_set_to_readonly;
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 08b0e90623ad..81a0211a372d 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -126,6 +126,7 @@ static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
void cpu_disable_common(void);
void native_smp_prepare_boot_cpu(void);
+void smp_prepare_cpus_common(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index cbb67b6030f9..39ebe0511869 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -27,6 +27,7 @@
".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
insns " \n" \
+ ".byte 0x53, 0x43, 0x54 \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 5146bbab84d4..6e64b27b2c1e 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -8,6 +8,7 @@
* should be used to determine that a VM is running under KVM.
*/
#define KVM_CPUID_SIGNATURE 0x40000000
+#define KVM_SIGNATURE "KVMKVMKVM\0\0\0"
/* This CPUID returns two feature bitmaps in eax, edx. Before enabling
* a particular paravirtualization, the appropriate feature bit should
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
index cb2fdd130aae..c881bcafba7d 100644
--- a/arch/x86/kernel/cpu/cpuid-deps.c
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -76,6 +76,7 @@ static const struct cpuid_dep cpuid_deps[] = {
{ X86_FEATURE_SGX1, X86_FEATURE_SGX },
{ X86_FEATURE_SGX2, X86_FEATURE_SGX1 },
{ X86_FEATURE_XFD, X86_FEATURE_XSAVES },
+ { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 },
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
{}
};
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index acfd5d9f93c6..bb9a46a804bf 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -547,12 +547,13 @@ bool intel_filter_mce(struct mce *m)
{
struct cpuinfo_x86 *c = &boot_cpu_data;
- /* MCE errata HSD131, HSM142, HSW131, BDM48, and HSM142 */
+ /* MCE errata HSD131, HSM142, HSW131, BDM48, HSM142 and SKX37 */
if ((c->x86 == 6) &&
((c->x86_model == INTEL_FAM6_HASWELL) ||
(c->x86_model == INTEL_FAM6_HASWELL_L) ||
(c->x86_model == INTEL_FAM6_BROADWELL) ||
- (c->x86_model == INTEL_FAM6_HASWELL_G)) &&
+ (c->x86_model == INTEL_FAM6_HASWELL_G) ||
+ (c->x86_model == INTEL_FAM6_SKYLAKE_X)) &&
(m->bank == 0) &&
((m->status & 0xa0000000ffffffff) == 0x80000000000f0005))
return true;
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 4794b716ec79..ff55df60228f 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -163,12 +163,22 @@ static uint32_t __init ms_hyperv_platform(void)
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
- if (eax >= HYPERV_CPUID_MIN &&
- eax <= HYPERV_CPUID_MAX &&
- !memcmp("Microsoft Hv", hyp_signature, 12))
- return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
+ if (eax < HYPERV_CPUID_MIN || eax > HYPERV_CPUID_MAX ||
+ memcmp("Microsoft Hv", hyp_signature, 12))
+ return 0;
- return 0;
+ /* HYPERCALL and VP_INDEX MSRs are mandatory for all features. */
+ eax = cpuid_eax(HYPERV_CPUID_FEATURES);
+ if (!(eax & HV_MSR_HYPERCALL_AVAILABLE)) {
+ pr_warn("x86/hyperv: HYPERCALL MSR not available.\n");
+ return 0;
+ }
+ if (!(eax & HV_MSR_VP_INDEX_AVAILABLE)) {
+ pr_warn("x86/hyperv: VP_INDEX MSR not available.\n");
+ return 0;
+ }
+
+ return HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS;
}
static unsigned char hv_get_nmi_reason(void)
diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h
index e18210dff88c..86ea7c0fa2f6 100644
--- a/arch/x86/kernel/fpu/xstate.h
+++ b/arch/x86/kernel/fpu/xstate.h
@@ -4,6 +4,7 @@
#include <asm/cpufeature.h>
#include <asm/fpu/xstate.h>
+#include <asm/fpu/xcr.h>
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(u64, xfd_state);
@@ -199,6 +200,32 @@ static inline void os_xrstor_supervisor(struct fpstate *fpstate)
}
/*
+ * XSAVE itself always writes all requested xfeatures. Removing features
+ * from the request bitmap reduces the features which are written.
+ * Generate a mask of features which must be written to a sigframe. The
+ * unset features can be optimized away and not written.
+ *
+ * This optimization is user-visible. Only use for states where
+ * uninitialized sigframe contents are tolerable, like dynamic features.
+ *
+ * Users of buffers produced with this optimization must check XSTATE_BV
+ * to determine which features have been optimized out.
+ */
+static inline u64 xfeatures_need_sigframe_write(void)
+{
+ u64 xfeaures_to_write;
+
+ /* In-use features must be written: */
+ xfeaures_to_write = xfeatures_in_use();
+
+ /* Also write all non-optimizable sigframe features: */
+ xfeaures_to_write |= XFEATURE_MASK_USER_SUPPORTED &
+ ~XFEATURE_MASK_SIGFRAME_INITOPT;
+
+ return xfeaures_to_write;
+}
+
+/*
* Save xstate to user space xsave area.
*
* We don't use modified optimization because xrstor/xrstors might track
@@ -220,10 +247,16 @@ static inline int xsave_to_user_sigframe(struct xregs_state __user *buf)
*/
struct fpstate *fpstate = current->thread.fpu.fpstate;
u64 mask = fpstate->user_xfeatures;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
+ u32 lmask;
+ u32 hmask;
int err;
+ /* Optimize away writing unnecessary xfeatures: */
+ if (fpu_state_size_dynamic())
+ mask &= xfeatures_need_sigframe_write();
+
+ lmask = mask;
+ hmask = mask >> 32;
xfd_validate_state(fpstate, mask, false);
stac();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8863d1941f1b..59abbdad7729 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -28,6 +28,7 @@
#include <linux/swait.h>
#include <linux/syscore_ops.h>
#include <linux/cc_platform.h>
+#include <linux/efi.h>
#include <asm/timer.h>
#include <asm/cpu.h>
#include <asm/traps.h>
@@ -41,6 +42,7 @@
#include <asm/ptrace.h>
#include <asm/reboot.h>
#include <asm/svm.h>
+#include <asm/e820/api.h>
DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -434,6 +436,8 @@ static void kvm_guest_cpu_offline(bool shutdown)
kvm_disable_steal_time();
if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+ if (kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+ wrmsrl(MSR_KVM_MIGRATION_CONTROL, 0);
kvm_pv_disable_apf();
if (!shutdown)
apf_task_wake_all();
@@ -548,6 +552,55 @@ static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
__send_ipi_mask(local_mask, vector);
}
+static int __init setup_efi_kvm_sev_migration(void)
+{
+ efi_char16_t efi_sev_live_migration_enabled[] = L"SevLiveMigrationEnabled";
+ efi_guid_t efi_variable_guid = AMD_SEV_MEM_ENCRYPT_GUID;
+ efi_status_t status;
+ unsigned long size;
+ bool enabled;
+
+ if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) ||
+ !kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL))
+ return 0;
+
+ if (!efi_enabled(EFI_BOOT))
+ return 0;
+
+ if (!efi_enabled(EFI_RUNTIME_SERVICES)) {
+ pr_info("%s : EFI runtime services are not enabled\n", __func__);
+ return 0;
+ }
+
+ size = sizeof(enabled);
+
+ /* Get variable contents into buffer */
+ status = efi.get_variable(efi_sev_live_migration_enabled,
+ &efi_variable_guid, NULL, &size, &enabled);
+
+ if (status == EFI_NOT_FOUND) {
+ pr_info("%s : EFI live migration variable not found\n", __func__);
+ return 0;
+ }
+
+ if (status != EFI_SUCCESS) {
+ pr_info("%s : EFI variable retrieval failed\n", __func__);
+ return 0;
+ }
+
+ if (enabled == 0) {
+ pr_info("%s: live migration disabled in EFI\n", __func__);
+ return 0;
+ }
+
+ pr_info("%s : live migration enabled in EFI\n", __func__);
+ wrmsrl(MSR_KVM_MIGRATION_CONTROL, KVM_MIGRATION_READY);
+
+ return 1;
+}
+
+late_initcall(setup_efi_kvm_sev_migration);
+
/*
* Set the IPI entry points
*/
@@ -756,7 +809,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
return 0; /* So we don't blow up on old processors */
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
+ return hypervisor_cpuid_base(KVM_SIGNATURE, 0);
return 0;
}
@@ -806,8 +859,62 @@ static bool __init kvm_msi_ext_dest_id(void)
return kvm_para_has_feature(KVM_FEATURE_MSI_EXT_DEST_ID);
}
+static void kvm_sev_hc_page_enc_status(unsigned long pfn, int npages, bool enc)
+{
+ kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, pfn << PAGE_SHIFT, npages,
+ KVM_MAP_GPA_RANGE_ENC_STAT(enc) | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+}
+
static void __init kvm_init_platform(void)
{
+ if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT) &&
+ kvm_para_has_feature(KVM_FEATURE_MIGRATION_CONTROL)) {
+ unsigned long nr_pages;
+ int i;
+
+ pv_ops.mmu.notify_page_enc_status_changed =
+ kvm_sev_hc_page_enc_status;
+
+ /*
+ * Reset the host's shared pages list related to kernel
+ * specific page encryption status settings before we load a
+ * new kernel by kexec. Reset the page encryption status
+ * during early boot intead of just before kexec to avoid SMP
+ * races during kvm_pv_guest_cpu_reboot().
+ * NOTE: We cannot reset the complete shared pages list
+ * here as we need to retain the UEFI/OVMF firmware
+ * specific settings.
+ */
+
+ for (i = 0; i < e820_table->nr_entries; i++) {
+ struct e820_entry *entry = &e820_table->entries[i];
+
+ if (entry->type != E820_TYPE_RAM)
+ continue;
+
+ nr_pages = DIV_ROUND_UP(entry->size, PAGE_SIZE);
+
+ kvm_sev_hypercall3(KVM_HC_MAP_GPA_RANGE, entry->addr,
+ nr_pages,
+ KVM_MAP_GPA_RANGE_ENCRYPTED | KVM_MAP_GPA_RANGE_PAGE_SZ_4K);
+ }
+
+ /*
+ * Ensure that _bss_decrypted section is marked as decrypted in the
+ * shared pages list.
+ */
+ nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted,
+ PAGE_SIZE);
+ early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted,
+ nr_pages, 0);
+
+ /*
+ * If not booted using EFI, enable Live migration support.
+ */
+ if (!efi_enabled(EFI_BOOT))
+ wrmsrl(MSR_KVM_MIGRATION_CONTROL,
+ KVM_MIGRATION_READY);
+ }
kvmclock_init();
x86_platform.apic_post_init = kvm_apic_init;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7157c2df3bc2..7f7636aac620 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -337,6 +337,7 @@ struct paravirt_patch_template pv_ops = {
(void (*)(struct mmu_gather *, void *))tlb_remove_page,
.mmu.exit_mmap = paravirt_nop,
+ .mmu.notify_page_enc_status_changed = paravirt_nop,
#ifdef CONFIG_PARAVIRT_XXL
.mmu.read_cr2 = __PV_IS_CALLEE_SAVE(pv_native_read_cr2),
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8241927addff..ac2909f0cab3 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1350,12 +1350,7 @@ static void __init smp_get_logical_apicid(void)
cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
}
-/*
- * Prepare for SMP bootup.
- * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
- * for common interface support.
- */
-void __init native_smp_prepare_cpus(unsigned int max_cpus)
+void __init smp_prepare_cpus_common(void)
{
unsigned int i;
@@ -1386,6 +1381,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_sched_topology(x86_topology);
set_cpu_sibling_map(0);
+}
+
+/*
+ * Prepare for SMP bootup.
+ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ * for common interface support.
+ */
+void __init native_smp_prepare_cpus(unsigned int max_cpus)
+{
+ smp_prepare_cpus_common();
+
init_freq_invariance(false, false);
smp_sanity_check();
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index ea028e736831..9c407a33a774 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -56,10 +56,15 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void
text_poke_bp(insn, code, size, emulate);
}
-static void __static_call_validate(void *insn, bool tail)
+static void __static_call_validate(void *insn, bool tail, bool tramp)
{
u8 opcode = *(u8 *)insn;
+ if (tramp && memcmp(insn+5, "SCT", 3)) {
+ pr_err("trampoline signature fail");
+ BUG();
+ }
+
if (tail) {
if (opcode == JMP32_INSN_OPCODE ||
opcode == RET_INSN_OPCODE)
@@ -74,7 +79,8 @@ static void __static_call_validate(void *insn, bool tail)
/*
* If we ever trigger this, our text is corrupt, we'll probably not live long.
*/
- WARN_ONCE(1, "unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+ pr_err("unexpected static_call insn opcode 0x%x at %pS\n", opcode, insn);
+ BUG();
}
static inline enum insn_type __sc_insn(bool null, bool tail)
@@ -97,12 +103,12 @@ void arch_static_call_transform(void *site, void *tramp, void *func, bool tail)
mutex_lock(&text_mutex);
if (tramp) {
- __static_call_validate(tramp, true);
+ __static_call_validate(tramp, true, true);
__static_call_transform(tramp, __sc_insn(!func, true), func);
}
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
- __static_call_validate(site, tail);
+ __static_call_validate(site, tail, false);
__static_call_transform(site, __sc_insn(!func, tail), func);
}
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index f14f69d7aa3c..cce1c89cb7df 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -106,7 +106,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
*/
local_irq_enable();
- BUG_ON(!vm86 || !vm86->user_vm86);
+ BUG_ON(!vm86);
set_flags(regs->pt.flags, VEFLAGS, X86_EFLAGS_VIF | vm86->veflags_mask);
user = vm86->user_vm86;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2d70edb0f323..07e9215e911d 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -99,11 +99,45 @@ static int kvm_check_cpuid(struct kvm_cpuid_entry2 *entries, int nent)
return 0;
}
-void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+static void kvm_update_kvm_cpuid_base(struct kvm_vcpu *vcpu)
{
- struct kvm_cpuid_entry2 *best;
+ u32 function;
+ struct kvm_cpuid_entry2 *entry;
+
+ vcpu->arch.kvm_cpuid_base = 0;
+
+ for_each_possible_hypervisor_cpuid_base(function) {
+ entry = kvm_find_cpuid_entry(vcpu, function, 0);
- best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+ if (entry) {
+ u32 signature[3];
+
+ signature[0] = entry->ebx;
+ signature[1] = entry->ecx;
+ signature[2] = entry->edx;
+
+ BUILD_BUG_ON(sizeof(signature) > sizeof(KVM_SIGNATURE));
+ if (!memcmp(signature, KVM_SIGNATURE, sizeof(signature))) {
+ vcpu->arch.kvm_cpuid_base = function;
+ break;
+ }
+ }
+ }
+}
+
+static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu)
+{
+ u32 base = vcpu->arch.kvm_cpuid_base;
+
+ if (!base)
+ return NULL;
+
+ return kvm_find_cpuid_entry(vcpu, base | KVM_CPUID_FEATURES, 0);
+}
+
+void kvm_update_pv_runtime(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best = kvm_find_kvm_cpuid_features(vcpu);
/*
* save the feature bitmap to avoid cpuid lookup for every PV
@@ -142,7 +176,7 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
cpuid_entry_has(best, X86_FEATURE_XSAVEC)))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
- best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
+ best = kvm_find_kvm_cpuid_features(vcpu);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
@@ -239,6 +273,26 @@ u64 kvm_vcpu_reserved_gpa_bits_raw(struct kvm_vcpu *vcpu)
return rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
}
+static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2,
+ int nent)
+{
+ int r;
+
+ r = kvm_check_cpuid(e2, nent);
+ if (r)
+ return r;
+
+ kvfree(vcpu->arch.cpuid_entries);
+ vcpu->arch.cpuid_entries = e2;
+ vcpu->arch.cpuid_nent = nent;
+
+ kvm_update_kvm_cpuid_base(vcpu);
+ kvm_update_cpuid_runtime(vcpu);
+ kvm_vcpu_after_set_cpuid(vcpu);
+
+ return 0;
+}
+
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
@@ -275,18 +329,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
e2[i].padding[2] = 0;
}
- r = kvm_check_cpuid(e2, cpuid->nent);
- if (r) {
+ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+ if (r)
kvfree(e2);
- goto out_free_cpuid;
- }
-
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = cpuid->nent;
-
- kvm_update_cpuid_runtime(vcpu);
- kvm_vcpu_after_set_cpuid(vcpu);
out_free_cpuid:
kvfree(e);
@@ -310,20 +355,11 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
return PTR_ERR(e2);
}
- r = kvm_check_cpuid(e2, cpuid->nent);
- if (r) {
+ r = kvm_set_cpuid(vcpu, e2, cpuid->nent);
+ if (r)
kvfree(e2);
- return r;
- }
- kvfree(vcpu->arch.cpuid_entries);
- vcpu->arch.cpuid_entries = e2;
- vcpu->arch.cpuid_nent = cpuid->nent;
-
- kvm_update_cpuid_runtime(vcpu);
- kvm_vcpu_after_set_cpuid(vcpu);
-
- return 0;
+ return r;
}
int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
@@ -871,8 +907,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
}
break;
case KVM_CPUID_SIGNATURE: {
- static const char signature[12] = "KVMKVMKVM\0\0";
- const u32 *sigptr = (const u32 *)signature;
+ const u32 *sigptr = (const u32 *)KVM_SIGNATURE;
entry->eax = KVM_CPUID_FEATURES;
entry->ebx = sigptr[0];
entry->ecx = sigptr[1];
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4f15c0165c05..5e19e6e4c2ce 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1472,7 +1472,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
hv_vcpu->hv_vapic = data;
- if (kvm_lapic_enable_pv_eoi(vcpu, 0, 0))
+ if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
return 1;
break;
}
@@ -1490,7 +1490,7 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
return 1;
hv_vcpu->hv_vapic = data;
kvm_vcpu_mark_page_dirty(vcpu, gfn);
- if (kvm_lapic_enable_pv_eoi(vcpu,
+ if (kvm_lapic_set_pv_eoi(vcpu,
gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
sizeof(struct hv_vp_assist_page)))
return 1;
@@ -2022,7 +2022,7 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
{
bool longmode;
- longmode = is_64_bit_mode(vcpu);
+ longmode = is_64_bit_hypercall(vcpu);
if (longmode)
kvm_rax_write(vcpu, result);
else {
@@ -2171,7 +2171,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_X86_64
- if (is_64_bit_mode(vcpu)) {
+ if (is_64_bit_hypercall(vcpu)) {
hc.param = kvm_rcx_read(vcpu);
hc.ingpa = kvm_rdx_read(vcpu);
hc.outgpa = kvm_r8_read(vcpu);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d6ac32f3f650..759952dd1222 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2856,25 +2856,30 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
return 0;
}
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
{
u64 addr = data & ~KVM_MSR_ENABLED;
struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
unsigned long new_len;
+ int ret;
if (!IS_ALIGNED(addr, 4))
return 1;
- vcpu->arch.pv_eoi.msr_val = data;
- if (!pv_eoi_enabled(vcpu))
- return 0;
+ if (data & KVM_MSR_ENABLED) {
+ if (addr == ghc->gpa && len <= ghc->len)
+ new_len = ghc->len;
+ else
+ new_len = len;
- if (addr == ghc->gpa && len <= ghc->len)
- new_len = ghc->len;
- else
- new_len = len;
+ ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+ if (ret)
+ return ret;
+ }
+
+ vcpu->arch.pv_eoi.msr_val = data;
- return kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
+ return 0;
}
int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d7c25d0c1354..2b44e533fc8d 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -127,7 +127,7 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data);
-int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
+int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len);
void kvm_lapic_exit(void);
#define VEC_POS(v) ((v) & (32 - 1))
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 323b5057d08f..3be9beea838d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3191,17 +3191,17 @@ static int fast_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
new_spte |= PT_WRITABLE_MASK;
/*
- * Do not fix write-permission on the large spte. Since
- * we only dirty the first page into the dirty-bitmap in
+ * Do not fix write-permission on the large spte when
+ * dirty logging is enabled. Since we only dirty the
+ * first page into the dirty-bitmap in
* fast_pf_fix_direct_spte(), other pages are missed
* if its slot has dirty logging enabled.
*
* Instead, we let the slow page fault path create a
* normal spte to fix the access.
- *
- * See the comments in kvm_arch_commit_memory_region().
*/
- if (sp->role.level > PG_LEVEL_4K)
+ if (sp->role.level > PG_LEVEL_4K &&
+ kvm_slot_dirty_track_enabled(fault->slot))
break;
}
@@ -4682,6 +4682,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu,
/* PKEY and LA57 are active iff long mode is active. */
ext.cr4_pke = ____is_efer_lma(regs) && ____is_cr4_pke(regs);
ext.cr4_la57 = ____is_efer_lma(regs) && ____is_cr4_la57(regs);
+ ext.efer_lma = ____is_efer_lma(regs);
}
ext.valid = 1;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 7c5dd83e52de..a54c3491af42 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -897,7 +897,7 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
struct kvm_page_fault *fault,
struct tdp_iter *iter)
{
- struct kvm_mmu_page *sp = sptep_to_sp(iter->sptep);
+ struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep));
u64 new_spte;
int ret = RET_PF_FIXED;
bool wrprot = false;
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 0772bad9165c..09873f6488f7 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -319,7 +319,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
}
/* check if idx is a valid index to access PMU */
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
return kvm_x86_ops.pmu_ops->is_valid_rdpmc_ecx(vcpu, idx);
}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 0e4f2b1fa9fb..59d6b76203d5 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -32,7 +32,7 @@ struct kvm_pmu_ops {
struct kvm_pmc *(*rdpmc_ecx_to_pmc)(struct kvm_vcpu *vcpu,
unsigned int idx, u64 *mask);
struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr);
- int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
+ bool (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx);
bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr);
int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
@@ -149,7 +149,7 @@ void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
-int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
+bool kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx);
bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr);
int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 8052d92069e0..affc0ea98d30 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -904,7 +904,8 @@ bool svm_check_apicv_inhibit_reasons(ulong bit)
BIT(APICV_INHIBIT_REASON_NESTED) |
BIT(APICV_INHIBIT_REASON_IRQWIN) |
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
- BIT(APICV_INHIBIT_REASON_X2APIC);
+ BIT(APICV_INHIBIT_REASON_X2APIC) |
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
return supported & BIT(bit);
}
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index fdf587f19c5f..871c426ec389 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -181,14 +181,13 @@ static struct kvm_pmc *amd_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
return get_gp_pmc_amd(pmu, base + pmc_idx, PMU_TYPE_COUNTER);
}
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool amd_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
idx &= ~(3u << 30);
- return (idx >= pmu->nr_arch_gp_counters);
+ return idx < pmu->nr_arch_gp_counters;
}
/* idx is the ECX register of RDPMC instruction */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 1964b9a174be..21ac0a5de4e0 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -120,16 +120,26 @@ static bool __sev_recycle_asids(int min_asid, int max_asid)
return true;
}
+static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
+{
+ enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+ return misc_cg_try_charge(type, sev->misc_cg, 1);
+}
+
+static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
+{
+ enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+ misc_cg_uncharge(type, sev->misc_cg, 1);
+}
+
static int sev_asid_new(struct kvm_sev_info *sev)
{
int asid, min_asid, max_asid, ret;
bool retry = true;
- enum misc_res_type type;
- type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
WARN_ON(sev->misc_cg);
sev->misc_cg = get_current_misc_cg();
- ret = misc_cg_try_charge(type, sev->misc_cg, 1);
+ ret = sev_misc_cg_try_charge(sev);
if (ret) {
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
@@ -162,7 +172,7 @@ again:
return asid;
e_uncharge:
- misc_cg_uncharge(type, sev->misc_cg, 1);
+ sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
return ret;
@@ -179,7 +189,6 @@ static void sev_asid_free(struct kvm_sev_info *sev)
{
struct svm_cpu_data *sd;
int cpu;
- enum misc_res_type type;
mutex_lock(&sev_bitmap_lock);
@@ -192,8 +201,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
mutex_unlock(&sev_bitmap_lock);
- type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
- misc_cg_uncharge(type, sev->misc_cg, 1);
+ sev_misc_cg_uncharge(sev);
put_misc_cg(sev->misc_cg);
sev->misc_cg = NULL;
}
@@ -229,7 +237,6 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- bool es_active = argp->id == KVM_SEV_ES_INIT;
int asid, ret;
if (kvm->created_vcpus)
@@ -239,7 +246,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (unlikely(sev->active))
return ret;
- sev->es_active = es_active;
+ sev->active = true;
+ sev->es_active = argp->id == KVM_SEV_ES_INIT;
asid = sev_asid_new(sev);
if (asid < 0)
goto e_no_asid;
@@ -249,8 +257,6 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
if (ret)
goto e_free;
- sev->active = true;
- sev->asid = asid;
INIT_LIST_HEAD(&sev->regions_list);
return 0;
@@ -260,6 +266,7 @@ e_free:
sev->asid = 0;
e_no_asid:
sev->es_active = false;
+ sev->active = false;
return ret;
}
@@ -590,7 +597,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
* traditional VMSA as it has been built so far (in prep
* for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
*/
- memcpy(svm->vmsa, save, sizeof(*save));
+ memcpy(svm->sev_es.vmsa, save, sizeof(*save));
return 0;
}
@@ -612,11 +619,11 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
* the VMSA memory content (i.e it will write the same memory region
* with the guest's key), so invalidate it first.
*/
- clflush_cache_range(svm->vmsa, PAGE_SIZE);
+ clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
vmsa.reserved = 0;
vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
- vmsa.address = __sme_pa(svm->vmsa);
+ vmsa.address = __sme_pa(svm->sev_es.vmsa);
vmsa.len = PAGE_SIZE;
ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
if (ret)
@@ -1522,7 +1529,7 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
}
-static bool cmd_allowed_from_miror(u32 cmd_id)
+static bool is_cmd_allowed_from_mirror(u32 cmd_id)
{
/*
* Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
@@ -1536,6 +1543,201 @@ static bool cmd_allowed_from_miror(u32 cmd_id)
return false;
}
+static int sev_lock_for_migration(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+ /*
+ * Bail if this VM is already involved in a migration to avoid deadlock
+ * between two VMs trying to migrate to/from each other.
+ */
+ if (atomic_cmpxchg_acquire(&sev->migration_in_progress, 0, 1))
+ return -EBUSY;
+
+ mutex_lock(&kvm->lock);
+
+ return 0;
+}
+
+static void sev_unlock_after_migration(struct kvm *kvm)
+{
+ struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+
+ mutex_unlock(&kvm->lock);
+ atomic_set_release(&sev->migration_in_progress, 0);
+}
+
+
+static int sev_lock_vcpus_for_migration(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i, j;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (mutex_lock_killable(&vcpu->mutex))
+ goto out_unlock;
+ }
+
+ return 0;
+
+out_unlock:
+ kvm_for_each_vcpu(j, vcpu, kvm) {
+ if (i == j)
+ break;
+
+ mutex_unlock(&vcpu->mutex);
+ }
+ return -EINTR;
+}
+
+static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ mutex_unlock(&vcpu->mutex);
+ }
+}
+
+static void sev_migrate_from(struct kvm_sev_info *dst,
+ struct kvm_sev_info *src)
+{
+ dst->active = true;
+ dst->asid = src->asid;
+ dst->handle = src->handle;
+ dst->pages_locked = src->pages_locked;
+
+ src->asid = 0;
+ src->active = false;
+ src->handle = 0;
+ src->pages_locked = 0;
+
+ INIT_LIST_HEAD(&dst->regions_list);
+ list_replace_init(&src->regions_list, &dst->regions_list);
+}
+
+static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
+{
+ int i;
+ struct kvm_vcpu *dst_vcpu, *src_vcpu;
+ struct vcpu_svm *dst_svm, *src_svm;
+
+ if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, src_vcpu, src) {
+ if (!src_vcpu->arch.guest_state_protected)
+ return -EINVAL;
+ }
+
+ kvm_for_each_vcpu(i, src_vcpu, src) {
+ src_svm = to_svm(src_vcpu);
+ dst_vcpu = kvm_get_vcpu(dst, i);
+ dst_svm = to_svm(dst_vcpu);
+
+ /*
+ * Transfer VMSA and GHCB state to the destination. Nullify and
+ * clear source fields as appropriate, the state now belongs to
+ * the destination.
+ */
+ memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
+ dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
+ dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
+ dst_vcpu->arch.guest_state_protected = true;
+
+ memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
+ src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
+ src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
+ src_vcpu->arch.guest_state_protected = false;
+ }
+ to_kvm_svm(src)->sev_info.es_active = false;
+ to_kvm_svm(dst)->sev_info.es_active = true;
+
+ return 0;
+}
+
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
+{
+ struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
+ struct kvm_sev_info *src_sev, *cg_cleanup_sev;
+ struct file *source_kvm_file;
+ struct kvm *source_kvm;
+ bool charged = false;
+ int ret;
+
+ ret = sev_lock_for_migration(kvm);
+ if (ret)
+ return ret;
+
+ if (sev_guest(kvm)) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ source_kvm_file = fget(source_fd);
+ if (!file_is_kvm(source_kvm_file)) {
+ ret = -EBADF;
+ goto out_fput;
+ }
+
+ source_kvm = source_kvm_file->private_data;
+ ret = sev_lock_for_migration(source_kvm);
+ if (ret)
+ goto out_fput;
+
+ if (!sev_guest(source_kvm)) {
+ ret = -EINVAL;
+ goto out_source;
+ }
+
+ src_sev = &to_kvm_svm(source_kvm)->sev_info;
+ dst_sev->misc_cg = get_current_misc_cg();
+ cg_cleanup_sev = dst_sev;
+ if (dst_sev->misc_cg != src_sev->misc_cg) {
+ ret = sev_misc_cg_try_charge(dst_sev);
+ if (ret)
+ goto out_dst_cgroup;
+ charged = true;
+ }
+
+ ret = sev_lock_vcpus_for_migration(kvm);
+ if (ret)
+ goto out_dst_cgroup;
+ ret = sev_lock_vcpus_for_migration(source_kvm);
+ if (ret)
+ goto out_dst_vcpu;
+
+ if (sev_es_guest(source_kvm)) {
+ ret = sev_es_migrate_from(kvm, source_kvm);
+ if (ret)
+ goto out_source_vcpu;
+ }
+ sev_migrate_from(dst_sev, src_sev);
+ kvm_vm_dead(source_kvm);
+ cg_cleanup_sev = src_sev;
+ ret = 0;
+
+out_source_vcpu:
+ sev_unlock_vcpus_for_migration(source_kvm);
+out_dst_vcpu:
+ sev_unlock_vcpus_for_migration(kvm);
+out_dst_cgroup:
+ /* Operates on the source on success, on the destination on failure. */
+ if (charged)
+ sev_misc_cg_uncharge(cg_cleanup_sev);
+ put_misc_cg(cg_cleanup_sev->misc_cg);
+ cg_cleanup_sev->misc_cg = NULL;
+out_source:
+ sev_unlock_after_migration(source_kvm);
+out_fput:
+ if (source_kvm_file)
+ fput(source_kvm_file);
+out_unlock:
+ sev_unlock_after_migration(kvm);
+ return ret;
+}
+
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
struct kvm_sev_cmd sev_cmd;
@@ -1554,7 +1756,7 @@ int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
/* Only the enc_context_owner handles some memory enc operations. */
if (is_mirroring_enc_context(kvm) &&
- !cmd_allowed_from_miror(sev_cmd.id)) {
+ !is_cmd_allowed_from_mirror(sev_cmd.id)) {
r = -EINVAL;
goto out;
}
@@ -1787,7 +1989,12 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
mutex_unlock(&source_kvm->lock);
mutex_lock(&kvm->lock);
- if (sev_guest(kvm)) {
+ /*
+ * Disallow out-of-band SEV/SEV-ES init if the target is already an
+ * SEV guest, or if vCPUs have been created. KVM relies on vCPUs being
+ * created after SEV/SEV-ES initialization, e.g. to init intercepts.
+ */
+ if (sev_guest(kvm) || kvm->created_vcpus) {
ret = -EINVAL;
goto e_mirror_unlock;
}
@@ -2038,16 +2245,16 @@ void sev_free_vcpu(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu);
if (vcpu->arch.guest_state_protected)
- sev_flush_guest_memory(svm, svm->vmsa, PAGE_SIZE);
- __free_page(virt_to_page(svm->vmsa));
+ sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
+ __free_page(virt_to_page(svm->sev_es.vmsa));
- if (svm->ghcb_sa_free)
- kfree(svm->ghcb_sa);
+ if (svm->sev_es.ghcb_sa_free)
+ kfree(svm->sev_es.ghcb_sa);
}
static void dump_ghcb(struct vcpu_svm *svm)
{
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
unsigned int nbits;
/* Re-use the dump_invalid_vmcb module parameter */
@@ -2073,7 +2280,7 @@ static void dump_ghcb(struct vcpu_svm *svm)
static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
/*
* The GHCB protocol so far allows for the following data
@@ -2093,7 +2300,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
{
struct vmcb_control_area *control = &svm->vmcb->control;
struct kvm_vcpu *vcpu = &svm->vcpu;
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
u64 exit_code;
/*
@@ -2140,7 +2347,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
struct ghcb *ghcb;
u64 exit_code = 0;
- ghcb = svm->ghcb;
+ ghcb = svm->sev_es.ghcb;
/* Only GHCB Usage code 0 is supported */
if (ghcb->ghcb_usage)
@@ -2258,33 +2465,34 @@ vmgexit_err:
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
{
- if (!svm->ghcb)
+ if (!svm->sev_es.ghcb)
return;
- if (svm->ghcb_sa_free) {
+ if (svm->sev_es.ghcb_sa_free) {
/*
* The scratch area lives outside the GHCB, so there is a
* buffer that, depending on the operation performed, may
* need to be synced, then freed.
*/
- if (svm->ghcb_sa_sync) {
+ if (svm->sev_es.ghcb_sa_sync) {
kvm_write_guest(svm->vcpu.kvm,
- ghcb_get_sw_scratch(svm->ghcb),
- svm->ghcb_sa, svm->ghcb_sa_len);
- svm->ghcb_sa_sync = false;
+ ghcb_get_sw_scratch(svm->sev_es.ghcb),
+ svm->sev_es.ghcb_sa,
+ svm->sev_es.ghcb_sa_len);
+ svm->sev_es.ghcb_sa_sync = false;
}
- kfree(svm->ghcb_sa);
- svm->ghcb_sa = NULL;
- svm->ghcb_sa_free = false;
+ kfree(svm->sev_es.ghcb_sa);
+ svm->sev_es.ghcb_sa = NULL;
+ svm->sev_es.ghcb_sa_free = false;
}
- trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->ghcb);
+ trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
sev_es_sync_to_ghcb(svm);
- kvm_vcpu_unmap(&svm->vcpu, &svm->ghcb_map, true);
- svm->ghcb = NULL;
+ kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
+ svm->sev_es.ghcb = NULL;
}
void pre_sev_run(struct vcpu_svm *svm, int cpu)
@@ -2314,7 +2522,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
{
struct vmcb_control_area *control = &svm->vmcb->control;
- struct ghcb *ghcb = svm->ghcb;
+ struct ghcb *ghcb = svm->sev_es.ghcb;
u64 ghcb_scratch_beg, ghcb_scratch_end;
u64 scratch_gpa_beg, scratch_gpa_end;
void *scratch_va;
@@ -2350,7 +2558,7 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
return false;
}
- scratch_va = (void *)svm->ghcb;
+ scratch_va = (void *)svm->sev_es.ghcb;
scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
} else {
/*
@@ -2380,12 +2588,12 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
* the vCPU next time (i.e. a read was requested so the data
* must be written back to the guest memory).
*/
- svm->ghcb_sa_sync = sync;
- svm->ghcb_sa_free = true;
+ svm->sev_es.ghcb_sa_sync = sync;
+ svm->sev_es.ghcb_sa_free = true;
}
- svm->ghcb_sa = scratch_va;
- svm->ghcb_sa_len = len;
+ svm->sev_es.ghcb_sa = scratch_va;
+ svm->sev_es.ghcb_sa_len = len;
return true;
}
@@ -2504,15 +2712,15 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
return -EINVAL;
}
- if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+ if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
/* Unable to map GHCB from guest */
vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
ghcb_gpa);
return -EINVAL;
}
- svm->ghcb = svm->ghcb_map.hva;
- ghcb = svm->ghcb_map.hva;
+ svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
+ ghcb = svm->sev_es.ghcb_map.hva;
trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
@@ -2535,7 +2743,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_sev_es_mmio_read(vcpu,
control->exit_info_1,
control->exit_info_2,
- svm->ghcb_sa);
+ svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_MMIO_WRITE:
if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
@@ -2544,7 +2752,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
ret = kvm_sev_es_mmio_write(vcpu,
control->exit_info_1,
control->exit_info_2,
- svm->ghcb_sa);
+ svm->sev_es.ghcb_sa);
break;
case SVM_VMGEXIT_NMI_COMPLETE:
ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
@@ -2604,7 +2812,8 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
if (!setup_vmgexit_scratch(svm, in, bytes))
return -EINVAL;
- return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->ghcb_sa, count, in);
+ return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
+ count, in);
}
void sev_es_init_vmcb(struct vcpu_svm *svm)
@@ -2619,7 +2828,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm)
* VMCB page. Do not include the encryption mask on the VMSA physical
* address since hardware will access it using the guest key.
*/
- svm->vmcb->control.vmsa_pa = __pa(svm->vmsa);
+ svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
/* Can't intercept CR register access, HV can't modify CR registers */
svm_clr_intercept(svm, INTERCEPT_CR0_READ);
@@ -2691,8 +2900,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
struct vcpu_svm *svm = to_svm(vcpu);
/* First SIPI: Use the values as initially set by the VMM */
- if (!svm->received_first_sipi) {
- svm->received_first_sipi = true;
+ if (!svm->sev_es.received_first_sipi) {
+ svm->sev_es.received_first_sipi = true;
return;
}
@@ -2701,8 +2910,8 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
* the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
* non-zero value.
*/
- if (!svm->ghcb)
+ if (!svm->sev_es.ghcb)
return;
- ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index b36ca4e476c2..5630c241d5f6 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1452,7 +1452,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm_switch_vmcb(svm, &svm->vmcb01);
if (vmsa_page)
- svm->vmsa = page_address(vmsa_page);
+ svm->sev_es.vmsa = page_address(vmsa_page);
svm->guest_state_loaded = false;
@@ -2835,11 +2835,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
static int svm_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
{
struct vcpu_svm *svm = to_svm(vcpu);
- if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->ghcb))
+ if (!err || !sev_es_guest(vcpu->kvm) || WARN_ON_ONCE(!svm->sev_es.ghcb))
return kvm_complete_insn_gp(vcpu, err);
- ghcb_set_sw_exit_info_1(svm->ghcb, 1);
- ghcb_set_sw_exit_info_2(svm->ghcb,
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 1);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb,
X86_TRAP_GP |
SVM_EVTINJ_TYPE_EXEPT |
SVM_EVTINJ_VALID);
@@ -3121,11 +3121,6 @@ static int invpcid_interception(struct kvm_vcpu *vcpu)
type = svm->vmcb->control.exit_info_2;
gva = svm->vmcb->control.exit_info_1;
- if (type > 3) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
return kvm_handle_invpcid(vcpu, type, gva);
}
@@ -4701,6 +4696,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.mem_enc_unreg_region = svm_unregister_enc_region,
.vm_copy_enc_context_from = svm_vm_copy_asid_from,
+ .vm_move_enc_context_from = svm_vm_migrate_from,
.can_emulate_instruction = svm_can_emulate_instruction,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5e9510d4574e..5faad3dc10e2 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -80,6 +80,7 @@ struct kvm_sev_info {
u64 ap_jump_table; /* SEV-ES AP Jump Table address */
struct kvm *enc_context_owner; /* Owner of copied encryption context */
struct misc_cg *misc_cg; /* For misc cgroup accounting */
+ atomic_t migration_in_progress;
};
struct kvm_svm {
@@ -123,6 +124,20 @@ struct svm_nested_state {
bool initialized;
};
+struct vcpu_sev_es_state {
+ /* SEV-ES support */
+ struct vmcb_save_area *vmsa;
+ struct ghcb *ghcb;
+ struct kvm_host_map ghcb_map;
+ bool received_first_sipi;
+
+ /* SEV-ES scratch area support */
+ void *ghcb_sa;
+ u32 ghcb_sa_len;
+ bool ghcb_sa_sync;
+ bool ghcb_sa_free;
+};
+
struct vcpu_svm {
struct kvm_vcpu vcpu;
/* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
@@ -186,17 +201,7 @@ struct vcpu_svm {
DECLARE_BITMAP(write, MAX_DIRECT_ACCESS_MSRS);
} shadow_msr_intercept;
- /* SEV-ES support */
- struct vmcb_save_area *vmsa;
- struct ghcb *ghcb;
- struct kvm_host_map ghcb_map;
- bool received_first_sipi;
-
- /* SEV-ES scratch area support */
- void *ghcb_sa;
- u32 ghcb_sa_len;
- bool ghcb_sa_sync;
- bool ghcb_sa_free;
+ struct vcpu_sev_es_state sev_es;
bool guest_state_loaded;
};
@@ -242,7 +247,7 @@ static __always_inline bool sev_es_guest(struct kvm *kvm)
#ifdef CONFIG_KVM_AMD_SEV
struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- return sev_guest(kvm) && sev->es_active;
+ return sev->es_active && !WARN_ON_ONCE(!sev->active);
#else
return false;
#endif
@@ -558,6 +563,7 @@ int svm_register_enc_region(struct kvm *kvm,
int svm_unregister_enc_region(struct kvm *kvm,
struct kvm_enc_region *range);
int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
+int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd);
void pre_sev_run(struct vcpu_svm *svm, int cpu);
void __init sev_set_cpu_caps(void);
void __init sev_hardware_setup(void);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b4ee5e9f9e20..1e2f66951566 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -525,67 +525,19 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
}
/*
- * Check if MSR is intercepted for L01 MSR bitmap.
+ * For x2APIC MSRs, ignore the vmcs01 bitmap. L1 can enable x2APIC without L1
+ * itself utilizing x2APIC. All MSRs were previously set to be intercepted,
+ * only the "disable intercept" case needs to be handled.
*/
-static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
+static void nested_vmx_disable_intercept_for_x2apic_msr(unsigned long *msr_bitmap_l1,
+ unsigned long *msr_bitmap_l0,
+ u32 msr, int type)
{
- unsigned long *msr_bitmap;
- int f = sizeof(unsigned long);
+ if (type & MSR_TYPE_R && !vmx_test_msr_bitmap_read(msr_bitmap_l1, msr))
+ vmx_clear_msr_bitmap_read(msr_bitmap_l0, msr);
- if (!cpu_has_vmx_msr_bitmap())
- return true;
-
- msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
-
- if (msr <= 0x1fff) {
- return !!test_bit(msr, msr_bitmap + 0x800 / f);
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
- }
-
- return true;
-}
-
-/*
- * If a msr is allowed by L0, we should check whether it is allowed by L1.
- * The corresponding bit will be cleared unless both of L0 and L1 allow it.
- */
-static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
- unsigned long *msr_bitmap_nested,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R &&
- !test_bit(msr, msr_bitmap_l1 + 0x000 / f))
- /* read-low */
- __clear_bit(msr, msr_bitmap_nested + 0x000 / f);
-
- if (type & MSR_TYPE_W &&
- !test_bit(msr, msr_bitmap_l1 + 0x800 / f))
- /* write-low */
- __clear_bit(msr, msr_bitmap_nested + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R &&
- !test_bit(msr, msr_bitmap_l1 + 0x400 / f))
- /* read-high */
- __clear_bit(msr, msr_bitmap_nested + 0x400 / f);
-
- if (type & MSR_TYPE_W &&
- !test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
- /* write-high */
- __clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
-
- }
+ if (type & MSR_TYPE_W && !vmx_test_msr_bitmap_write(msr_bitmap_l1, msr))
+ vmx_clear_msr_bitmap_write(msr_bitmap_l0, msr);
}
static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
@@ -600,6 +552,34 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
}
}
+#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
+static inline \
+void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
+ unsigned long *msr_bitmap_l1, \
+ unsigned long *msr_bitmap_l0, u32 msr) \
+{ \
+ if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
+ vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
+ vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
+ else \
+ vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
+}
+BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
+BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
+
+static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
+ unsigned long *msr_bitmap_l1,
+ unsigned long *msr_bitmap_l0,
+ u32 msr, int types)
+{
+ if (types & MSR_TYPE_R)
+ nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
+ msr_bitmap_l0, msr);
+ if (types & MSR_TYPE_W)
+ nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
+ msr_bitmap_l0, msr);
+}
+
/*
* Merge L0's and L1's MSR bitmap, return false to indicate that
* we do not use the hardware.
@@ -607,10 +587,11 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
int msr;
unsigned long *msr_bitmap_l1;
- unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
- struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
+ unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
+ struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
/* Nothing to do if the MSR bitmap is not in use. */
if (!cpu_has_vmx_msr_bitmap() ||
@@ -625,7 +606,7 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
/*
* To keep the control flow simple, pay eight 8-byte writes (sixteen
* 4-byte writes on 32-bit systems) up front to enable intercepts for
- * the x2APIC MSR range and selectively disable them below.
+ * the x2APIC MSR range and selectively toggle those relevant to L2.
*/
enable_x2apic_msr_intercepts(msr_bitmap_l0);
@@ -644,61 +625,44 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
}
}
- nested_vmx_disable_intercept_for_msr(
+ nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_TASKPRI),
MSR_TYPE_R | MSR_TYPE_W);
if (nested_cpu_has_vid(vmcs12)) {
- nested_vmx_disable_intercept_for_msr(
+ nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_EOI),
MSR_TYPE_W);
- nested_vmx_disable_intercept_for_msr(
+ nested_vmx_disable_intercept_for_x2apic_msr(
msr_bitmap_l1, msr_bitmap_l0,
X2APIC_MSR(APIC_SELF_IPI),
MSR_TYPE_W);
}
}
- /* KVM unconditionally exposes the FS/GS base MSRs to L1. */
+ /*
+ * Always check vmcs01's bitmap to honor userspace MSR filters and any
+ * other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
+ */
#ifdef CONFIG_X86_64
- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
- MSR_FS_BASE, MSR_TYPE_RW);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_FS_BASE, MSR_TYPE_RW);
- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
- MSR_GS_BASE, MSR_TYPE_RW);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_GS_BASE, MSR_TYPE_RW);
- nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
- MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
#endif
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
- /*
- * Checking the L0->L1 bitmap is trying to verify two things:
- *
- * 1. L0 gave a permission to L1 to actually passthrough the MSR. This
- * ensures that we do not accidentally generate an L02 MSR bitmap
- * from the L12 MSR bitmap that is too permissive.
- * 2. That L1 or L2s have actually used the MSR. This avoids
- * unnecessarily merging of the bitmap if the MSR is unused. This
- * works properly because we only update the L01 MSR bitmap lazily.
- * So even if L0 should pass L1 these MSRs, the L01 bitmap is only
- * updated to reflect this when L1 (or its L2s) actually write to
- * the MSR.
- */
- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- MSR_IA32_SPEC_CTRL,
- MSR_TYPE_R | MSR_TYPE_W);
-
- if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
- nested_vmx_disable_intercept_for_msr(
- msr_bitmap_l1, msr_bitmap_l0,
- MSR_IA32_PRED_CMD,
- MSR_TYPE_W);
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_PRED_CMD, MSR_TYPE_W);
- kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
+ kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
return true;
}
@@ -706,33 +670,39 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- struct kvm_host_map map;
- struct vmcs12 *shadow;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == INVALID_GPA)
return;
- shadow = get_shadow_vmcs12(vcpu);
-
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE))
return;
- memcpy(shadow, map.hva, VMCS12_SIZE);
- kvm_vcpu_unmap(vcpu, &map, false);
+ kvm_read_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+ VMCS12_SIZE);
}
static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
vmcs12->vmcs_link_pointer == INVALID_GPA)
return;
- kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
- get_shadow_vmcs12(vcpu), VMCS12_SIZE);
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE))
+ return;
+
+ kvm_write_guest_cached(vmx->vcpu.kvm, ghc, get_shadow_vmcs12(vcpu),
+ VMCS12_SIZE);
}
/*
@@ -2866,6 +2836,17 @@ static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
return 0;
}
+static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
+ struct vmcs12 *vmcs12)
+{
+#ifdef CONFIG_X86_64
+ if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
+ !!(vcpu->arch.efer & EFER_LMA)))
+ return -EINVAL;
+#endif
+ return 0;
+}
+
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
@@ -2890,18 +2871,16 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
return -EINVAL;
#ifdef CONFIG_X86_64
- ia32e = !!(vcpu->arch.efer & EFER_LMA);
+ ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
#else
ia32e = false;
#endif
if (ia32e) {
- if (CC(!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)) ||
- CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
+ if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
return -EINVAL;
} else {
- if (CC(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) ||
- CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
+ if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
CC((vmcs12->host_rip) >> 32))
return -EINVAL;
@@ -2946,9 +2925,9 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- int r = 0;
- struct vmcs12 *shadow;
- struct kvm_host_map map;
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct gfn_to_hva_cache *ghc = &vmx->nested.shadow_vmcs12_cache;
+ struct vmcs_hdr hdr;
if (vmcs12->vmcs_link_pointer == INVALID_GPA)
return 0;
@@ -2956,17 +2935,21 @@ static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
return -EINVAL;
- if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
- return -EINVAL;
+ if (ghc->gpa != vmcs12->vmcs_link_pointer &&
+ CC(kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc,
+ vmcs12->vmcs_link_pointer, VMCS12_SIZE)))
+ return -EINVAL;
- shadow = map.hva;
+ if (CC(kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+ offsetof(struct vmcs12, hdr),
+ sizeof(hdr))))
+ return -EINVAL;
- if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
- CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
- r = -EINVAL;
+ if (CC(hdr.revision_id != VMCS12_REVISION) ||
+ CC(hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
+ return -EINVAL;
- kvm_vcpu_unmap(vcpu, &map, false);
- return r;
+ return 0;
}
/*
@@ -3571,6 +3554,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
if (nested_vmx_check_controls(vcpu, vmcs12))
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
+ if (nested_vmx_check_address_space_size(vcpu, vmcs12))
+ return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
+
if (nested_vmx_check_host_state(vcpu, vmcs12))
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
@@ -5300,10 +5286,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
return 1;
if (vmx->nested.current_vmptr != vmptr) {
- struct kvm_host_map map;
- struct vmcs12 *new_vmcs12;
+ struct gfn_to_hva_cache *ghc = &vmx->nested.vmcs12_cache;
+ struct vmcs_hdr hdr;
- if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
+ if (ghc->gpa != vmptr &&
+ kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, vmptr, VMCS12_SIZE)) {
/*
* Reads from an unbacked page return all 1s,
* which means that the 32 bits located at the
@@ -5314,12 +5301,16 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
- new_vmcs12 = map.hva;
+ if (kvm_read_guest_offset_cached(vcpu->kvm, ghc, &hdr,
+ offsetof(struct vmcs12, hdr),
+ sizeof(hdr))) {
+ return nested_vmx_fail(vcpu,
+ VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+ }
- if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
- (new_vmcs12->hdr.shadow_vmcs &&
+ if (hdr.revision_id != VMCS12_REVISION ||
+ (hdr.shadow_vmcs &&
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
- kvm_vcpu_unmap(vcpu, &map, false);
return nested_vmx_fail(vcpu,
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
}
@@ -5330,8 +5321,11 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu)
* Load VMCS12 from guest memory since it is not already
* cached.
*/
- memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
- kvm_vcpu_unmap(vcpu, &map, false);
+ if (kvm_read_guest_cached(vcpu->kvm, ghc, vmx->nested.cached_vmcs12,
+ VMCS12_SIZE)) {
+ return nested_vmx_fail(vcpu,
+ VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
+ }
set_current_vmptr(vmx, vmptr);
}
@@ -5379,7 +5373,7 @@ static int handle_invept(struct kvm_vcpu *vcpu)
struct {
u64 eptp, gpa;
} operand;
- int i, r;
+ int i, r, gpr_index;
if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_EPT) ||
@@ -5392,7 +5386,8 @@ static int handle_invept(struct kvm_vcpu *vcpu)
return 1;
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+ gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+ type = kvm_register_read(vcpu, gpr_index);
types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
@@ -5459,7 +5454,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
u64 gla;
} operand;
u16 vpid02;
- int r;
+ int r, gpr_index;
if (!(vmx->nested.msrs.secondary_ctls_high &
SECONDARY_EXEC_ENABLE_VPID) ||
@@ -5472,7 +5467,8 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
return 1;
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
+ gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+ type = kvm_register_read(vcpu, gpr_index);
types = (vmx->nested.msrs.vpid_caps &
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index b8e0d21b7c8a..1b7456b2177b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -118,16 +118,15 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
}
}
-/* returns 0 if idx's corresponding MSR exists; otherwise returns 1. */
-static int intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
+static bool intel_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx)
{
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
bool fixed = idx & (1u << 30);
idx &= ~(3u << 30);
- return (!fixed && idx >= pmu->nr_arch_gp_counters) ||
- (fixed && idx >= pmu->nr_arch_fixed_counters);
+ return fixed ? idx < pmu->nr_arch_fixed_counters
+ : idx < pmu->nr_arch_gp_counters;
}
static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 76861b66bbcf..ba66c171d951 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -769,24 +769,13 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
/*
* Check if MSR is intercepted for currently loaded MSR bitmap.
*/
-static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
+static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
{
- unsigned long *msr_bitmap;
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
+ if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
return true;
- msr_bitmap = to_vmx(vcpu)->loaded_vmcs->msr_bitmap;
-
- if (msr <= 0x1fff) {
- return !!test_bit(msr, msr_bitmap + 0x800 / f);
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- return !!test_bit(msr, msr_bitmap + 0xc00 / f);
- }
-
- return true;
+ return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
+ MSR_IA32_SPEC_CTRL);
}
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -3697,46 +3686,6 @@ void free_vpid(int vpid)
spin_unlock(&vmx_vpid_lock);
}
-static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __clear_bit(msr, msr_bitmap + 0x000 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __clear_bit(msr, msr_bitmap + 0x800 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
-static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __set_bit(msr, msr_bitmap + 0x000 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
-}
-
-static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
-{
- int f = sizeof(unsigned long);
-
- if (msr <= 0x1fff)
- __set_bit(msr, msr_bitmap + 0x800 / f);
- else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
- __set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
-}
-
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -5494,6 +5443,7 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
u64 pcid;
u64 gla;
} operand;
+ int gpr_index;
if (!guest_cpuid_has(vcpu, X86_FEATURE_INVPCID)) {
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5501,12 +5451,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
}
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
- type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
-
- if (type > 3) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
+ gpr_index = vmx_get_instr_info_reg2(vmx_instruction_info);
+ type = kvm_register_read(vcpu, gpr_index);
/* According to the Intel instruction reference, the memory operand
* is read even if it isn't needed (e.g., for type==all)
@@ -6749,7 +6695,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
* If the L02 MSR bitmap does not intercept the MSR, then we need to
* save it.
*/
- if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
+ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
@@ -7563,7 +7509,8 @@ static void hardware_unsetup(void)
static bool vmx_check_apicv_inhibit_reasons(ulong bit)
{
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
- BIT(APICV_INHIBIT_REASON_HYPERV);
+ BIT(APICV_INHIBIT_REASON_HYPERV) |
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
return supported & BIT(bit);
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index e7db42e3b0ce..4df2ac24ffc1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -142,6 +142,16 @@ struct nested_vmx {
struct vmcs12 *cached_shadow_vmcs12;
/*
+ * GPA to HVA cache for accessing vmcs12->vmcs_link_pointer
+ */
+ struct gfn_to_hva_cache shadow_vmcs12_cache;
+
+ /*
+ * GPA to HVA cache for VMCS12
+ */
+ struct gfn_to_hva_cache vmcs12_cache;
+
+ /*
* Indicates if the shadow vmcs or enlightened vmcs must be updated
* with the data held by struct vmcs12.
*/
@@ -400,6 +410,34 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
+/*
+ * Note, early Intel manuals have the write-low and read-high bitmap offsets
+ * the wrong way round. The bitmaps control MSRs 0x00000000-0x00001fff and
+ * 0xc0000000-0xc0001fff. The former (low) uses bytes 0-0x3ff for reads and
+ * 0x800-0xbff for writes. The latter (high) uses 0x400-0x7ff for reads and
+ * 0xc00-0xfff for writes. MSRs not covered by either of the ranges always
+ * VM-Exit.
+ */
+#define __BUILD_VMX_MSR_BITMAP_HELPER(rtype, action, bitop, access, base) \
+static inline rtype vmx_##action##_msr_bitmap_##access(unsigned long *bitmap, \
+ u32 msr) \
+{ \
+ int f = sizeof(unsigned long); \
+ \
+ if (msr <= 0x1fff) \
+ return bitop##_bit(msr, bitmap + base / f); \
+ else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) \
+ return bitop##_bit(msr & 0x1fff, bitmap + (base + 0x400) / f); \
+ return (rtype)true; \
+}
+#define BUILD_VMX_MSR_BITMAP_HELPERS(ret_type, action, bitop) \
+ __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, read, 0x0) \
+ __BUILD_VMX_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 0x800)
+
+BUILD_VMX_MSR_BITMAP_HELPERS(bool, test, test)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, clear, __clear)
+BUILD_VMX_MSR_BITMAP_HELPERS(void, set, __set)
+
static inline u8 vmx_get_rvi(void)
{
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
@@ -522,4 +560,9 @@ static inline bool vmx_guest_state_valid(struct kvm_vcpu *vcpu)
void dump_vmcs(struct kvm_vcpu *vcpu);
+static inline int vmx_get_instr_info_reg2(u32 vmx_instr_info)
+{
+ return (vmx_instr_info >> 28) & 0xf;
+}
+
#endif /* __KVM_X86_VMX_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c1c4e2b05a63..5a403d92833f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3260,8 +3260,11 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
static void record_steal_time(struct kvm_vcpu *vcpu)
{
- struct kvm_host_map map;
- struct kvm_steal_time *st;
+ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+ struct kvm_steal_time __user *st;
+ struct kvm_memslots *slots;
+ u64 steal;
+ u32 version;
if (kvm_xen_msr_enabled(vcpu->kvm)) {
kvm_xen_runstate_set_running(vcpu);
@@ -3271,47 +3274,86 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
- /* -EAGAIN is returned in atomic context so we can just return. */
- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT,
- &map, &vcpu->arch.st.cache, false))
+ if (WARN_ON_ONCE(current->mm != vcpu->kvm->mm))
return;
- st = map.hva +
- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+ slots = kvm_memslots(vcpu->kvm);
+
+ if (unlikely(slots->generation != ghc->generation ||
+ kvm_is_error_hva(ghc->hva) || !ghc->memslot)) {
+ gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS;
+
+ /* We rely on the fact that it fits in a single page. */
+ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS);
+
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) ||
+ kvm_is_error_hva(ghc->hva) || !ghc->memslot)
+ return;
+ }
+ st = (struct kvm_steal_time __user *)ghc->hva;
/*
* Doing a TLB flush here, on the guest's behalf, can avoid
* expensive IPIs.
*/
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
- u8 st_preempted = xchg(&st->preempted, 0);
+ u8 st_preempted = 0;
+ int err = -EFAULT;
+
+ if (!user_access_begin(st, sizeof(*st)))
+ return;
+
+ asm volatile("1: xchgb %0, %2\n"
+ "xor %1, %1\n"
+ "2:\n"
+ _ASM_EXTABLE_UA(1b, 2b)
+ : "+q" (st_preempted),
+ "+&r" (err),
+ "+m" (st->preempted));
+ if (err)
+ goto out;
+
+ user_access_end();
+
+ vcpu->arch.st.preempted = 0;
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
st_preempted & KVM_VCPU_FLUSH_TLB);
if (st_preempted & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
+
+ if (!user_access_begin(st, sizeof(*st)))
+ goto dirty;
} else {
- st->preempted = 0;
- }
+ if (!user_access_begin(st, sizeof(*st)))
+ return;
- vcpu->arch.st.preempted = 0;
+ unsafe_put_user(0, &st->preempted, out);
+ vcpu->arch.st.preempted = 0;
+ }
- if (st->version & 1)
- st->version += 1; /* first time write, random junk */
+ unsafe_get_user(version, &st->version, out);
+ if (version & 1)
+ version += 1; /* first time write, random junk */
- st->version += 1;
+ version += 1;
+ unsafe_put_user(version, &st->version, out);
smp_wmb();
- st->steal += current->sched_info.run_delay -
+ unsafe_get_user(steal, &st->steal, out);
+ steal += current->sched_info.run_delay -
vcpu->arch.st.last_steal;
vcpu->arch.st.last_steal = current->sched_info.run_delay;
+ unsafe_put_user(steal, &st->steal, out);
- smp_wmb();
+ version += 1;
+ unsafe_put_user(version, &st->version, out);
- st->version += 1;
-
- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, false);
+ out:
+ user_access_end();
+ dirty:
+ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
@@ -3517,7 +3559,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_pv_has(vcpu, KVM_FEATURE_PV_EOI))
return 1;
- if (kvm_lapic_enable_pv_eoi(vcpu, data, sizeof(u8)))
+ if (kvm_lapic_set_pv_eoi(vcpu, data, sizeof(u8)))
return 1;
break;
@@ -4137,7 +4179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
break;
case KVM_CAP_NR_VCPUS:
- r = KVM_SOFT_MAX_VCPUS;
+ r = min_t(unsigned int, num_online_cpus(), KVM_MAX_VCPUS);
break;
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
@@ -4351,8 +4393,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
- struct kvm_host_map map;
- struct kvm_steal_time *st;
+ struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache;
+ struct kvm_steal_time __user *st;
+ struct kvm_memslots *slots;
+ static const u8 preempted = KVM_VCPU_PREEMPTED;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -4360,16 +4404,23 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (vcpu->arch.st.preempted)
return;
- if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
- &vcpu->arch.st.cache, true))
+ /* This happens on process exit */
+ if (unlikely(current->mm != vcpu->kvm->mm))
+ return;
+
+ slots = kvm_memslots(vcpu->kvm);
+
+ if (unlikely(slots->generation != ghc->generation ||
+ kvm_is_error_hva(ghc->hva) || !ghc->memslot))
return;
- st = map.hva +
- offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
+ st = (struct kvm_steal_time __user *)ghc->hva;
+ BUILD_BUG_ON(sizeof(st->preempted) != sizeof(preempted));
- st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
+ if (!copy_to_user_nofault(&st->preempted, &preempted, sizeof(preempted)))
+ vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
- kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+ mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -5728,6 +5779,12 @@ split_irqchip_unlock:
if (kvm_x86_ops.vm_copy_enc_context_from)
r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
return r;
+ case KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM:
+ r = -EINVAL;
+ if (kvm_x86_ops.vm_move_enc_context_from)
+ r = kvm_x86_ops.vm_move_enc_context_from(
+ kvm, cap->args[0]);
+ return r;
case KVM_CAP_EXIT_HYPERCALL:
if (cap->args[0] & ~KVM_EXIT_HYPERCALL_VALID_MASK) {
r = -EINVAL;
@@ -7328,7 +7385,9 @@ static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
u32 pmc)
{
- return kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc);
+ if (kvm_pmu_is_valid_rdpmc_ecx(emul_to_vcpu(ctxt), pmc))
+ return 0;
+ return -EINVAL;
}
static int emulator_read_pmc(struct x86_emulate_ctxt *ctxt,
@@ -8789,7 +8848,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
trace_kvm_hypercall(nr, a0, a1, a2, a3);
- op_64_bit = is_64_bit_mode(vcpu);
+ op_64_bit = is_64_bit_hypercall(vcpu);
if (!op_64_bit) {
nr &= 0xFFFFFFFF;
a0 &= 0xFFFFFFFF;
@@ -9488,12 +9547,16 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
- if (to_hv_vcpu(vcpu))
+ if (to_hv_vcpu(vcpu)) {
bitmap_or((ulong *)eoi_exit_bitmap,
vcpu->arch.ioapic_handled_vectors,
to_hv_synic(vcpu)->vec_bitmap, 256);
+ static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+ return;
+ }
- static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
+ static_call(kvm_x86_load_eoi_exitmap)(
+ vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors);
}
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
@@ -9552,7 +9615,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
if (kvm_request_pending(vcpu)) {
- if (kvm_check_request(KVM_REQ_VM_BUGGED, vcpu)) {
+ if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
r = -EIO;
goto out;
}
@@ -10564,6 +10627,24 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
return ret;
}
+static void kvm_arch_vcpu_guestdbg_update_apicv_inhibit(struct kvm *kvm)
+{
+ bool inhibit = false;
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ down_write(&kvm->arch.apicv_update_lock);
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->guest_debug & KVM_GUESTDBG_BLOCKIRQ) {
+ inhibit = true;
+ break;
+ }
+ }
+ __kvm_request_apicv_update(kvm, !inhibit, APICV_INHIBIT_REASON_BLOCKIRQ);
+ up_write(&kvm->arch.apicv_update_lock);
+}
+
int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
struct kvm_guest_debug *dbg)
{
@@ -10616,6 +10697,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
static_call(kvm_x86_update_exception_bitmap)(vcpu);
+ kvm_arch_vcpu_guestdbg_update_apicv_inhibit(vcpu->kvm);
+
r = 0;
out:
@@ -10859,11 +10942,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
- struct gfn_to_pfn_cache *cache = &vcpu->arch.st.cache;
int idx;
- kvm_release_pfn(cache->pfn, cache->dirty, cache);
-
kvmclock_reset(vcpu);
static_call(kvm_x86_vcpu_free)(vcpu);
@@ -12275,7 +12355,8 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
return kvm_skip_emulated_instruction(vcpu);
default:
- BUG(); /* We have already checked above that type <= 3 */
+ kvm_inject_gp(vcpu, 0);
+ return 1;
}
}
EXPORT_SYMBOL_GPL(kvm_handle_invpcid);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index ea264c4502e4..997669ae9caa 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -153,12 +153,24 @@ static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu)
{
int cs_db, cs_l;
+ WARN_ON_ONCE(vcpu->arch.guest_state_protected);
+
if (!is_long_mode(vcpu))
return false;
static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
return cs_l;
}
+static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If running with protected guest state, the CS register is not
+ * accessible. The hypercall register values will have had to been
+ * provided in 64-bit mode, so assume the guest is in 64-bit.
+ */
+ return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu);
+}
+
static inline bool x86_exception_has_error_code(unsigned int vector)
{
static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 8f62baebd028..dff2bdf9507a 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -127,9 +127,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
state_entry_time = vx->runstate_entry_time;
state_entry_time |= XEN_RUNSTATE_UPDATE;
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
- BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+ BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
sizeof(state_entry_time));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -144,9 +144,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
*/
BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
offsetof(struct compat_vcpu_runstate_info, state));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
- BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+ BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
sizeof(vx->current_runstate));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -163,9 +163,9 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
- sizeof(((struct compat_vcpu_runstate_info *)0)->time));
- BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
+ sizeof_field(struct compat_vcpu_runstate_info, time));
+ BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
sizeof(vx->runstate_times));
if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
@@ -205,9 +205,9 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
offsetof(struct compat_vcpu_info, evtchn_upcall_pending));
BUILD_BUG_ON(sizeof(rc) !=
- sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending));
+ sizeof_field(struct vcpu_info, evtchn_upcall_pending));
BUILD_BUG_ON(sizeof(rc) !=
- sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending));
+ sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
/*
* For efficiency, this mirrors the checks for using the valid
@@ -299,7 +299,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
break;
case KVM_XEN_ATTR_TYPE_SHARED_INFO:
- data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_gfn);
+ data->u.shared_info.gfn = kvm->arch.xen.shinfo_gfn;
r = 0;
break;
@@ -698,7 +698,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
kvm_hv_hypercall_enabled(vcpu))
return kvm_hv_hypercall(vcpu);
- longmode = is_64_bit_mode(vcpu);
+ longmode = is_64_bit_hypercall(vcpu);
if (!longmode) {
params[0] = (u32)kvm_rbx_read(vcpu);
params[1] = (u32)kvm_rcx_read(vcpu);
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index 23d54b810f08..35487305d8af 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -229,28 +229,75 @@ void __init sev_setup_arch(void)
swiotlb_adjust_size(size);
}
-static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
{
- pgprot_t old_prot, new_prot;
- unsigned long pfn, pa, size;
- pte_t new_pte;
+ unsigned long pfn = 0;
+ pgprot_t prot;
switch (level) {
case PG_LEVEL_4K:
pfn = pte_pfn(*kpte);
- old_prot = pte_pgprot(*kpte);
+ prot = pte_pgprot(*kpte);
break;
case PG_LEVEL_2M:
pfn = pmd_pfn(*(pmd_t *)kpte);
- old_prot = pmd_pgprot(*(pmd_t *)kpte);
+ prot = pmd_pgprot(*(pmd_t *)kpte);
break;
case PG_LEVEL_1G:
pfn = pud_pfn(*(pud_t *)kpte);
- old_prot = pud_pgprot(*(pud_t *)kpte);
+ prot = pud_pgprot(*(pud_t *)kpte);
break;
default:
- return;
+ WARN_ONCE(1, "Invalid level for kpte\n");
+ return 0;
+ }
+
+ if (ret_prot)
+ *ret_prot = prot;
+
+ return pfn;
+}
+
+void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc)
+{
+#ifdef CONFIG_PARAVIRT
+ unsigned long sz = npages << PAGE_SHIFT;
+ unsigned long vaddr_end = vaddr + sz;
+
+ while (vaddr < vaddr_end) {
+ int psize, pmask, level;
+ unsigned long pfn;
+ pte_t *kpte;
+
+ kpte = lookup_address(vaddr, &level);
+ if (!kpte || pte_none(*kpte)) {
+ WARN_ONCE(1, "kpte lookup for vaddr\n");
+ return;
+ }
+
+ pfn = pg_level_to_pfn(level, kpte, NULL);
+ if (!pfn)
+ continue;
+
+ psize = page_level_size(level);
+ pmask = page_level_mask(level);
+
+ notify_page_enc_status_changed(pfn, psize >> PAGE_SHIFT, enc);
+
+ vaddr = (vaddr & pmask) + psize;
}
+#endif
+}
+
+static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
+{
+ pgprot_t old_prot, new_prot;
+ unsigned long pfn, pa, size;
+ pte_t new_pte;
+
+ pfn = pg_level_to_pfn(level, kpte, &old_prot);
+ if (!pfn)
+ return;
new_prot = old_prot;
if (enc)
@@ -286,12 +333,13 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
static int __init early_set_memory_enc_dec(unsigned long vaddr,
unsigned long size, bool enc)
{
- unsigned long vaddr_end, vaddr_next;
+ unsigned long vaddr_end, vaddr_next, start;
unsigned long psize, pmask;
int split_page_size_mask;
int level, ret;
pte_t *kpte;
+ start = vaddr;
vaddr_next = vaddr;
vaddr_end = vaddr + size;
@@ -346,6 +394,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr,
ret = 0;
+ notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc);
out:
__flush_tlb_all();
return ret;
@@ -361,6 +410,11 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size)
return early_set_memory_enc_dec(vaddr, size, true);
}
+void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc)
+{
+ notify_range_enc_status_changed(vaddr, npages, enc);
+}
+
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
{
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 934dc5b2df36..b4072115c8ef 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2023,6 +2023,12 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc)
*/
cpa_flush(&cpa, 0);
+ /*
+ * Notify hypervisor that a given memory range is mapped encrypted
+ * or decrypted.
+ */
+ notify_range_enc_status_changed(addr, numpages, enc);
+
return ret;
}
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 9e55bcbfcd33..6a8f3b53ab83 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -225,7 +225,6 @@ static void __init xen_pv_smp_prepare_boot_cpu(void)
static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
{
unsigned cpu;
- unsigned int i;
if (skip_ioapic_setup) {
char *m = (max_cpus == 0) ?
@@ -238,16 +237,9 @@ static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
}
xen_init_lock_cpu(0);
- smp_store_boot_cpu_info();
- cpu_data(0).x86_max_cores = 1;
+ smp_prepare_cpus_common();
- for_each_possible_cpu(i) {
- zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_die_map, i), GFP_KERNEL);
- zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL);
- }
- set_cpu_sibling_map(0);
+ cpu_data(0).x86_max_cores = 1;
speculative_store_bypass_ht_init();