summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2022-03-03 20:20:17 -0800
committerPaolo Bonzini <pbonzini@redhat.com>2022-04-29 12:49:49 -0400
commit6819af7597d87d40769b47bb377472877a6b56c0 (patch)
treeb374084b3096f02be7755060ce7cdd5debd3eefd /arch/x86/kvm
parent25cc05652cd6be7349c84abbea3886b5483330cd (diff)
downloadlinux-6819af7597d87d40769b47bb377472877a6b56c0.tar.gz
linux-6819af7597d87d40769b47bb377472877a6b56c0.tar.bz2
linux-6819af7597d87d40769b47bb377472877a6b56c0.zip
KVM: x86: Clean up and document nested #PF workaround
Replace the per-vendor hack-a-fix for KVM's #PF => #PF => #DF workaround with an explicit, common workaround in kvm_inject_emulated_page_fault(). Aside from being a hack, the current approach is brittle and incomplete, e.g. nSVM's KVM_SET_NESTED_STATE fails to set ->inject_page_fault(), and nVMX fails to apply the workaround when VMX is intercepting #PF due to allow_smaller_maxphyaddr=1. Signed-off-by: Sean Christopherson <seanjc@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/svm/nested.c18
-rw-r--r--arch/x86/kvm/vmx/nested.c15
-rw-r--r--arch/x86/kvm/x86.c21
3 files changed, 35 insertions, 19 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index caa691229b71..bed5e1692cef 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -55,24 +55,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
nested_svm_vmexit(svm);
}
-static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+static bool nested_svm_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb;
- WARN_ON(!is_guest_mode(vcpu));
+ WARN_ON(!is_guest_mode(vcpu));
if (vmcb12_is_intercept(&svm->nested.ctl,
INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
- !svm->nested.nested_run_pending) {
- vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
+ !WARN_ON_ONCE(svm->nested.nested_run_pending)) {
+ vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
vmcb->control.exit_code_hi = 0;
vmcb->control.exit_info_1 = fault->error_code;
vmcb->control.exit_info_2 = fault->address;
nested_svm_vmexit(svm);
- } else {
- kvm_inject_page_fault(vcpu, fault);
+ return true;
}
+
+ return false;
}
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
@@ -751,9 +753,6 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
if (ret)
return ret;
- if (!npt_enabled)
- vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
-
if (!from_vmrun)
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
@@ -1659,6 +1658,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
struct kvm_x86_nested_ops svm_nested_ops = {
.leave_nested = svm_leave_nested,
.check_events = svm_check_nested_events,
+ .handle_page_fault_workaround = nested_svm_handle_page_fault_workaround,
.triple_fault = nested_svm_triple_fault,
.get_nested_state_pages = svm_get_nested_state_pages,
.get_state = svm_get_nested_state,
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 838ac7ab5950..a6688663da4d 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -476,24 +476,23 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
return 0;
}
-
-static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
- struct x86_exception *fault)
+static bool nested_vmx_handle_page_fault_workaround(struct kvm_vcpu *vcpu,
+ struct x86_exception *fault)
{
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
WARN_ON(!is_guest_mode(vcpu));
if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
- !to_vmx(vcpu)->nested.nested_run_pending) {
+ !WARN_ON_ONCE(to_vmx(vcpu)->nested.nested_run_pending)) {
vmcs12->vm_exit_intr_error_code = fault->error_code;
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
fault->address);
- } else {
- kvm_inject_page_fault(vcpu, fault);
+ return true;
}
+ return false;
}
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
@@ -2614,9 +2613,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
}
- if (!enable_ept)
- vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
-
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
vmcs12->guest_ia32_perf_global_ctrl))) {
@@ -6830,6 +6826,7 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
struct kvm_x86_nested_ops vmx_nested_ops = {
.leave_nested = vmx_leave_nested,
.check_events = vmx_check_nested_events,
+ .handle_page_fault_workaround = nested_vmx_handle_page_fault_workaround,
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.triple_fault = nested_vmx_triple_fault,
.get_state = vmx_get_nested_state,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1bc67995cc8a..b2b17728a7a4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -748,6 +748,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
}
EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
+/* Returns true if the page fault was immediately morphed into a VM-Exit. */
bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault)
{
@@ -766,8 +767,26 @@ bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address,
fault_mmu->root.hpa);
+ /*
+ * A workaround for KVM's bad exception handling. If KVM injected an
+ * exception into L2, and L2 encountered a #PF while vectoring the
+ * injected exception, manually check to see if L1 wants to intercept
+ * #PF, otherwise queuing the #PF will lead to #DF or a lost exception.
+ * In all other cases, defer the check to nested_ops->check_events(),
+ * which will correctly handle priority (this does not). Note, other
+ * exceptions, e.g. #GP, are theoretically affected, #PF is simply the
+ * most problematic, e.g. when L0 and L1 are both intercepting #PF for
+ * shadow paging.
+ *
+ * TODO: Rewrite exception handling to track injected and pending
+ * (VM-Exit) exceptions separately.
+ */
+ if (unlikely(vcpu->arch.exception.injected && is_guest_mode(vcpu)) &&
+ kvm_x86_ops.nested_ops->handle_page_fault_workaround(vcpu, fault))
+ return true;
+
fault_mmu->inject_page_fault(vcpu, fault);
- return fault->nested_page_fault;
+ return false;
}
EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault);