summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/vmx/vmx.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/vmx/vmx.c')
-rw-r--r--arch/x86/kvm/vmx/vmx.c321
1 files changed, 157 insertions, 164 deletions
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index c9b49a09e6b5..9dba04b6b019 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -439,7 +439,7 @@ do { \
pr_warn_ratelimited(fmt); \
} while (0)
-asmlinkage void vmread_error(unsigned long field, bool fault)
+void vmread_error(unsigned long field, bool fault)
{
if (fault)
kvm_spurious_fault();
@@ -864,7 +864,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
return flags;
}
-static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+static __always_inline void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
vm_entry_controls_clearbit(vmx, entry);
@@ -922,7 +922,7 @@ skip_guest:
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->host.nr);
}
-static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+static __always_inline void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit,
unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
u64 guest_val, u64 host_val)
@@ -1652,17 +1652,25 @@ static void vmx_update_emulated_instruction(struct kvm_vcpu *vcpu)
/*
* Per the SDM, MTF takes priority over debug-trap exceptions besides
- * T-bit traps. As instruction emulation is completed (i.e. at the
- * instruction boundary), any #DB exception pending delivery must be a
- * debug-trap. Record the pending MTF state to be delivered in
+ * TSS T-bit traps and ICEBP (INT1). KVM doesn't emulate T-bit traps
+ * or ICEBP (in the emulator proper), and skipping of ICEBP after an
+ * intercepted #DB deliberately avoids single-step #DB and MTF updates
+ * as ICEBP is higher priority than both. As instruction emulation is
+ * completed at this point (i.e. KVM is at the instruction boundary),
+ * any #DB exception pending delivery must be a debug-trap of lower
+ * priority than MTF. Record the pending MTF state to be delivered in
* vmx_check_nested_events().
*/
if (nested_cpu_has_mtf(vmcs12) &&
(!vcpu->arch.exception.pending ||
- vcpu->arch.exception.nr == DB_VECTOR))
+ vcpu->arch.exception.vector == DB_VECTOR) &&
+ (!vcpu->arch.exception_vmexit.pending ||
+ vcpu->arch.exception_vmexit.vector == DB_VECTOR)) {
vmx->nested.mtf_pending = true;
- else
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ } else {
vmx->nested.mtf_pending = false;
+ }
}
static int vmx_skip_emulated_instruction(struct kvm_vcpu *vcpu)
@@ -1684,32 +1692,40 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}
-static void vmx_queue_exception(struct kvm_vcpu *vcpu)
+static void vmx_inject_exception(struct kvm_vcpu *vcpu)
{
+ struct kvm_queued_exception *ex = &vcpu->arch.exception;
+ u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- unsigned nr = vcpu->arch.exception.nr;
- bool has_error_code = vcpu->arch.exception.has_error_code;
- u32 error_code = vcpu->arch.exception.error_code;
- u32 intr_info = nr | INTR_INFO_VALID_MASK;
- kvm_deliver_exception_payload(vcpu);
+ kvm_deliver_exception_payload(vcpu, ex);
- if (has_error_code) {
- vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
+ if (ex->has_error_code) {
+ /*
+ * Despite the error code being architecturally defined as 32
+ * bits, and the VMCS field being 32 bits, Intel CPUs and thus
+ * VMX don't actually supporting setting bits 31:16. Hardware
+ * will (should) never provide a bogus error code, but AMD CPUs
+ * do generate error codes with bits 31:16 set, and so KVM's
+ * ABI lets userspace shove in arbitrary 32-bit values. Drop
+ * the upper bits to avoid VM-Fail, losing information that
+ * does't really exist is preferable to killing the VM.
+ */
+ vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, (u16)ex->error_code);
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
}
if (vmx->rmode.vm86_active) {
int inc_eip = 0;
- if (kvm_exception_is_soft(nr))
+ if (kvm_exception_is_soft(ex->vector))
inc_eip = vcpu->arch.event_exit_inst_len;
- kvm_inject_realmode_interrupt(vcpu, nr, inc_eip);
+ kvm_inject_realmode_interrupt(vcpu, ex->vector, inc_eip);
return;
}
WARN_ON_ONCE(vmx->emulation_required);
- if (kvm_exception_is_soft(nr)) {
+ if (kvm_exception_is_soft(ex->vector)) {
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
@@ -1930,9 +1946,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
* sanity checking and refuse to boot. Filter all unsupported
* features out.
*/
- if (!msr_info->host_initiated &&
- vmx->nested.enlightened_vmcs_enabled)
- nested_evmcs_filter_control_msr(msr_info->index,
+ if (!msr_info->host_initiated && guest_cpuid_has_evmcs(vcpu))
+ nested_evmcs_filter_control_msr(vcpu, msr_info->index,
&msr_info->data);
break;
case MSR_IA32_RTIT_CTL:
@@ -2494,6 +2509,30 @@ static bool cpu_has_sgx(void)
return cpuid_eax(0) >= 0x12 && (cpuid_eax(0x12) & BIT(0));
}
+/*
+ * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
+ * can't be used due to errata where VM Exit may incorrectly clear
+ * IA32_PERF_GLOBAL_CTRL[34:32]. Work around the errata by using the
+ * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
+ */
+static bool cpu_has_perf_global_ctrl_bug(void)
+{
+ if (boot_cpu_data.x86 == 0x6) {
+ switch (boot_cpu_data.x86_model) {
+ case INTEL_FAM6_NEHALEM_EP: /* AAK155 */
+ case INTEL_FAM6_NEHALEM: /* AAP115 */
+ case INTEL_FAM6_WESTMERE: /* AAT100 */
+ case INTEL_FAM6_WESTMERE_EP: /* BC86,AAY89,BD102 */
+ case INTEL_FAM6_NEHALEM_EX: /* BA97 */
+ return true;
+ default:
+ break;
+ }
+ }
+
+ return false;
+}
+
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
u32 msr, u32 *result)
{
@@ -2526,13 +2565,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
struct vmx_capability *vmx_cap)
{
u32 vmx_msr_low, vmx_msr_high;
- u32 min, opt, min2, opt2;
u32 _pin_based_exec_control = 0;
u32 _cpu_based_exec_control = 0;
u32 _cpu_based_2nd_exec_control = 0;
u64 _cpu_based_3rd_exec_control = 0;
u32 _vmexit_control = 0;
u32 _vmentry_control = 0;
+ u64 misc_msr;
int i;
/*
@@ -2552,64 +2591,17 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
};
memset(vmcs_conf, 0, sizeof(*vmcs_conf));
- min = CPU_BASED_HLT_EXITING |
-#ifdef CONFIG_X86_64
- CPU_BASED_CR8_LOAD_EXITING |
- CPU_BASED_CR8_STORE_EXITING |
-#endif
- CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_UNCOND_IO_EXITING |
- CPU_BASED_MOV_DR_EXITING |
- CPU_BASED_USE_TSC_OFFSETTING |
- CPU_BASED_MWAIT_EXITING |
- CPU_BASED_MONITOR_EXITING |
- CPU_BASED_INVLPG_EXITING |
- CPU_BASED_RDPMC_EXITING;
-
- opt = CPU_BASED_TPR_SHADOW |
- CPU_BASED_USE_MSR_BITMAPS |
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS |
- CPU_BASED_ACTIVATE_TERTIARY_CONTROLS;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
- &_cpu_based_exec_control) < 0)
+
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_CPU_BASED_VM_EXEC_CONTROL,
+ KVM_OPTIONAL_VMX_CPU_BASED_VM_EXEC_CONTROL,
+ MSR_IA32_VMX_PROCBASED_CTLS,
+ &_cpu_based_exec_control))
return -EIO;
-#ifdef CONFIG_X86_64
- if (_cpu_based_exec_control & CPU_BASED_TPR_SHADOW)
- _cpu_based_exec_control &= ~CPU_BASED_CR8_LOAD_EXITING &
- ~CPU_BASED_CR8_STORE_EXITING;
-#endif
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
- min2 = 0;
- opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
- SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
- SECONDARY_EXEC_WBINVD_EXITING |
- SECONDARY_EXEC_ENABLE_VPID |
- SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST |
- SECONDARY_EXEC_PAUSE_LOOP_EXITING |
- SECONDARY_EXEC_DESC |
- SECONDARY_EXEC_ENABLE_RDTSCP |
- SECONDARY_EXEC_ENABLE_INVPCID |
- SECONDARY_EXEC_APIC_REGISTER_VIRT |
- SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_SHADOW_VMCS |
- SECONDARY_EXEC_XSAVES |
- SECONDARY_EXEC_RDSEED_EXITING |
- SECONDARY_EXEC_RDRAND_EXITING |
- SECONDARY_EXEC_ENABLE_PML |
- SECONDARY_EXEC_TSC_SCALING |
- SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
- SECONDARY_EXEC_PT_USE_GPA |
- SECONDARY_EXEC_PT_CONCEAL_VMX |
- SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_BUS_LOCK_DETECTION |
- SECONDARY_EXEC_NOTIFY_VM_EXITING;
- if (cpu_has_sgx())
- opt2 |= SECONDARY_EXEC_ENCLS_EXITING;
- if (adjust_vmx_controls(min2, opt2,
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_SECONDARY_VM_EXEC_CONTROL,
+ KVM_OPTIONAL_VMX_SECONDARY_VM_EXEC_CONTROL,
MSR_IA32_VMX_PROCBASED_CTLS2,
- &_cpu_based_2nd_exec_control) < 0)
+ &_cpu_based_2nd_exec_control))
return -EIO;
}
#ifndef CONFIG_X86_64
@@ -2627,13 +2619,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
rdmsr_safe(MSR_IA32_VMX_EPT_VPID_CAP,
&vmx_cap->ept, &vmx_cap->vpid);
- if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
- /* CR3 accesses and invlpg don't need to cause VM Exits when EPT
- enabled */
- _cpu_based_exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_INVLPG_EXITING);
- } else if (vmx_cap->ept) {
+ if (!(_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
+ vmx_cap->ept) {
pr_warn_once("EPT CAP should not exist if not support "
"1-setting enable EPT VM-execution control\n");
@@ -2653,32 +2640,24 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
vmx_cap->vpid = 0;
}
- if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS) {
- u64 opt3 = TERTIARY_EXEC_IPI_VIRT;
+ if (!cpu_has_sgx())
+ _cpu_based_2nd_exec_control &= ~SECONDARY_EXEC_ENCLS_EXITING;
- _cpu_based_3rd_exec_control = adjust_vmx_controls64(opt3,
+ if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_TERTIARY_CONTROLS)
+ _cpu_based_3rd_exec_control =
+ adjust_vmx_controls64(KVM_OPTIONAL_VMX_TERTIARY_VM_EXEC_CONTROL,
MSR_IA32_VMX_PROCBASED_CTLS3);
- }
- min = VM_EXIT_SAVE_DEBUG_CONTROLS | VM_EXIT_ACK_INTR_ON_EXIT;
-#ifdef CONFIG_X86_64
- min |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
-#endif
- opt = VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL |
- VM_EXIT_LOAD_IA32_PAT |
- VM_EXIT_LOAD_IA32_EFER |
- VM_EXIT_CLEAR_BNDCFGS |
- VM_EXIT_PT_CONCEAL_PIP |
- VM_EXIT_CLEAR_IA32_RTIT_CTL;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS,
- &_vmexit_control) < 0)
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_EXIT_CONTROLS,
+ KVM_OPTIONAL_VMX_VM_EXIT_CONTROLS,
+ MSR_IA32_VMX_EXIT_CTLS,
+ &_vmexit_control))
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
- PIN_BASED_VMX_PREEMPTION_TIMER;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
- &_pin_based_exec_control) < 0)
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_PIN_BASED_VM_EXEC_CONTROL,
+ KVM_OPTIONAL_VMX_PIN_BASED_VM_EXEC_CONTROL,
+ MSR_IA32_VMX_PINBASED_CTLS,
+ &_pin_based_exec_control))
return -EIO;
if (cpu_has_broken_vmx_preemption_timer())
@@ -2687,15 +2666,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY))
_pin_based_exec_control &= ~PIN_BASED_POSTED_INTR;
- min = VM_ENTRY_LOAD_DEBUG_CONTROLS;
- opt = VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
- VM_ENTRY_LOAD_IA32_PAT |
- VM_ENTRY_LOAD_IA32_EFER |
- VM_ENTRY_LOAD_BNDCFGS |
- VM_ENTRY_PT_CONCEAL_PIP |
- VM_ENTRY_LOAD_IA32_RTIT_CTL;
- if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS,
- &_vmentry_control) < 0)
+ if (adjust_vmx_controls(KVM_REQUIRED_VMX_VM_ENTRY_CONTROLS,
+ KVM_OPTIONAL_VMX_VM_ENTRY_CONTROLS,
+ MSR_IA32_VMX_ENTRY_CTLS,
+ &_vmentry_control))
return -EIO;
for (i = 0; i < ARRAY_SIZE(vmcs_entry_exit_pairs); i++) {
@@ -2715,30 +2689,6 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
_vmexit_control &= ~x_ctrl;
}
- /*
- * Some cpus support VM_{ENTRY,EXIT}_IA32_PERF_GLOBAL_CTRL but they
- * can't be used due to an errata where VM Exit may incorrectly clear
- * IA32_PERF_GLOBAL_CTRL[34:32]. Workaround the errata by using the
- * MSR load mechanism to switch IA32_PERF_GLOBAL_CTRL.
- */
- if (boot_cpu_data.x86 == 0x6) {
- switch (boot_cpu_data.x86_model) {
- case 26: /* AAK155 */
- case 30: /* AAP115 */
- case 37: /* AAT100 */
- case 44: /* BC86,AAY89,BD102 */
- case 46: /* BA97 */
- _vmentry_control &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
- _vmexit_control &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
- pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
- "does not work properly. Using workaround\n");
- break;
- default:
- break;
- }
- }
-
-
rdmsr(MSR_IA32_VMX_BASIC, vmx_msr_low, vmx_msr_high);
/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
@@ -2755,6 +2705,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
if (((vmx_msr_high >> 18) & 15) != 6)
return -EIO;
+ rdmsrl(MSR_IA32_VMX_MISC, misc_msr);
+
vmcs_conf->size = vmx_msr_high & 0x1fff;
vmcs_conf->basic_cap = vmx_msr_high & ~0x1fff;
@@ -2766,11 +2718,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
vmcs_conf->cpu_based_3rd_exec_ctrl = _cpu_based_3rd_exec_control;
vmcs_conf->vmexit_ctrl = _vmexit_control;
vmcs_conf->vmentry_ctrl = _vmentry_control;
-
-#if IS_ENABLED(CONFIG_HYPERV)
- if (enlightened_vmcs)
- evmcs_sanitize_exec_ctrls(vmcs_conf);
-#endif
+ vmcs_conf->misc = misc_msr;
return 0;
}
@@ -3037,10 +2985,15 @@ int vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
return 0;
vcpu->arch.efer = efer;
+#ifdef CONFIG_X86_64
if (efer & EFER_LMA)
vm_entry_controls_setbit(vmx, VM_ENTRY_IA32E_MODE);
else
vm_entry_controls_clearbit(vmx, VM_ENTRY_IA32E_MODE);
+#else
+ if (KVM_BUG_ON(efer & EFER_LMA, vcpu->kvm))
+ return 1;
+#endif
vmx_setup_uret_msrs(vmx);
return 0;
@@ -4327,18 +4280,37 @@ static u32 vmx_vmentry_ctrl(void)
if (vmx_pt_mode_is_system())
vmentry_ctrl &= ~(VM_ENTRY_PT_CONCEAL_PIP |
VM_ENTRY_LOAD_IA32_RTIT_CTL);
- /* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
- return vmentry_ctrl &
- ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VM_ENTRY_LOAD_IA32_EFER);
+ /*
+ * IA32e mode, and loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically.
+ */
+ vmentry_ctrl &= ~(VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL |
+ VM_ENTRY_LOAD_IA32_EFER |
+ VM_ENTRY_IA32E_MODE);
+
+ if (cpu_has_perf_global_ctrl_bug())
+ vmentry_ctrl &= ~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
+
+ return vmentry_ctrl;
}
static u32 vmx_vmexit_ctrl(void)
{
u32 vmexit_ctrl = vmcs_config.vmexit_ctrl;
+ /*
+ * Not used by KVM and never set in vmcs01 or vmcs02, but emulated for
+ * nested virtualization and thus allowed to be set in vmcs12.
+ */
+ vmexit_ctrl &= ~(VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER |
+ VM_EXIT_SAVE_VMX_PREEMPTION_TIMER);
+
if (vmx_pt_mode_is_system())
vmexit_ctrl &= ~(VM_EXIT_PT_CONCEAL_PIP |
VM_EXIT_CLEAR_IA32_RTIT_CTL);
+
+ if (cpu_has_perf_global_ctrl_bug())
+ vmexit_ctrl &= ~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
+
/* Loading of EFER and PERF_GLOBAL_CTRL are toggled dynamically */
return vmexit_ctrl &
~(VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | VM_EXIT_LOAD_IA32_EFER);
@@ -4376,20 +4348,38 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
{
u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
+ /*
+ * Not used by KVM, but fully supported for nesting, i.e. are allowed in
+ * vmcs12 and propagated to vmcs02 when set in vmcs12.
+ */
+ exec_control &= ~(CPU_BASED_RDTSC_EXITING |
+ CPU_BASED_USE_IO_BITMAPS |
+ CPU_BASED_MONITOR_TRAP_FLAG |
+ CPU_BASED_PAUSE_EXITING);
+
+ /* INTR_WINDOW_EXITING and NMI_WINDOW_EXITING are toggled dynamically */
+ exec_control &= ~(CPU_BASED_INTR_WINDOW_EXITING |
+ CPU_BASED_NMI_WINDOW_EXITING);
+
if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)
exec_control &= ~CPU_BASED_MOV_DR_EXITING;
- if (!cpu_need_tpr_shadow(&vmx->vcpu)) {
+ if (!cpu_need_tpr_shadow(&vmx->vcpu))
exec_control &= ~CPU_BASED_TPR_SHADOW;
+
#ifdef CONFIG_X86_64
+ if (exec_control & CPU_BASED_TPR_SHADOW)
+ exec_control &= ~(CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING);
+ else
exec_control |= CPU_BASED_CR8_STORE_EXITING |
CPU_BASED_CR8_LOAD_EXITING;
#endif
- }
- if (!enable_ept)
- exec_control |= CPU_BASED_CR3_STORE_EXITING |
- CPU_BASED_CR3_LOAD_EXITING |
- CPU_BASED_INVLPG_EXITING;
+ /* No need to intercept CR3 access or INVPLG when using EPT. */
+ if (enable_ept)
+ exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
+ CPU_BASED_CR3_STORE_EXITING |
+ CPU_BASED_INVLPG_EXITING);
if (kvm_mwait_in_guest(vmx->vcpu.kvm))
exec_control &= ~(CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING);
@@ -5155,8 +5145,10 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu)
* instruction. ICEBP generates a trap-like #DB, but
* despite its interception control being tied to #DB,
* is an instruction intercept, i.e. the VM-Exit occurs
- * on the ICEBP itself. Note, skipping ICEBP also
- * clears STI and MOVSS blocking.
+ * on the ICEBP itself. Use the inner "skip" helper to
+ * avoid single-step #DB and MTF updates, as ICEBP is
+ * higher priority. Note, skipping ICEBP still clears
+ * STI and MOVSS blocking.
*
* For all other #DBs, set vmcs.PENDING_DBG_EXCEPTIONS.BS
* if single-step is enabled in RFLAGS and STI or MOVSS
@@ -5638,7 +5630,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- trace_kvm_page_fault(gpa, exit_qualification);
+ trace_kvm_page_fault(vcpu, gpa, exit_qualification);
/* Is it a read fault? */
error_code = (exit_qualification & EPT_VIOLATION_ACC_READ)
@@ -5710,7 +5702,7 @@ static bool vmx_emulation_required_with_pending_exception(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
return vmx->emulation_required && !vmx->rmode.vm86_active &&
- (vcpu->arch.exception.pending || vcpu->arch.exception.injected);
+ (kvm_is_exception_pending(vcpu) || vcpu->arch.exception.injected);
}
static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
@@ -7430,7 +7422,7 @@ static int __init vmx_check_processor_compat(void)
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
return -EIO;
if (nested)
- nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept);
+ nested_vmx_setup_ctls_msrs(&vmcs_conf, vmx_cap.ept);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
@@ -8070,7 +8062,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.patch_hypercall = vmx_patch_hypercall,
.inject_irq = vmx_inject_irq,
.inject_nmi = vmx_inject_nmi,
- .queue_exception = vmx_queue_exception,
+ .inject_exception = vmx_inject_exception,
.cancel_injection = vmx_cancel_injection,
.interrupt_allowed = vmx_interrupt_allowed,
.nmi_allowed = vmx_nmi_allowed,
@@ -8227,6 +8219,10 @@ static __init int hardware_setup(void)
if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
return -EIO;
+ if (cpu_has_perf_global_ctrl_bug())
+ pr_warn_once("kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
+ "does not work properly. Using workaround\n");
+
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
@@ -8341,11 +8337,9 @@ static __init int hardware_setup(void)
if (enable_preemption_timer) {
u64 use_timer_freq = 5000ULL * 1000 * 1000;
- u64 vmx_msr;
- rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
cpu_preemption_timer_multi =
- vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ vmcs_config.misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
if (tsc_khz)
use_timer_freq = (u64)tsc_khz * 1000;
@@ -8381,8 +8375,7 @@ static __init int hardware_setup(void)
setup_default_sgx_lepubkeyhash();
if (nested) {
- nested_vmx_setup_ctls_msrs(&vmcs_config.nested,
- vmx_capability.ept);
+ nested_vmx_setup_ctls_msrs(&vmcs_config, vmx_capability.ept);
r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers);
if (r)