diff options
Diffstat (limited to 'arch/x86/kvm/svm/nested.c')
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 91 |
1 files changed, 66 insertions, 25 deletions
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 5e8d8443154e..21d03e3a5dfd 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -98,13 +98,18 @@ static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) WARN_ON(mmu_is_nested(vcpu)); vcpu->arch.mmu = &vcpu->arch.guest_mmu; + + /* + * The NPT format depends on L1's CR4 and EFER, which is in vmcb01. Note, + * when called via KVM_SET_NESTED_STATE, that state may _not_ match current + * vCPU state. CR0.WP is explicitly ignored, while CR0.PG is required. + */ kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4, svm->vmcb01.ptr->save.efer, svm->nested.ctl.nested_cr3); vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; - reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -380,33 +385,47 @@ static inline bool nested_npt_enabled(struct vcpu_svm *svm) return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE; } +static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu) +{ + /* + * TODO: optimize unconditional TLB flush/MMU sync. A partial list of + * things to fix before this can be conditional: + * + * - Flush TLBs for both L1 and L2 remote TLB flush + * - Honor L1's request to flush an ASID on nested VMRUN + * - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*] + * - Don't crush a pending TLB flush in vmcb02 on nested VMRUN + * - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST + * + * [*] Unlike nested EPT, SVM's ASID management can invalidate nested + * NPT guest-physical mappings on VMRUN. + */ + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); +} + /* * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true * if we are emulating VM-Entry into a guest with NPT enabled. */ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, - bool nested_npt) + bool nested_npt, bool reload_pdptrs) { if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) return -EINVAL; - if (!nested_npt && is_pae_paging(vcpu) && - (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) { - if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) - return -EINVAL; - } + if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) && + CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) + return -EINVAL; - /* - * TODO: optimize unconditional TLB flush/MMU sync here and in - * kvm_init_shadow_npt_mmu(). - */ if (!nested_npt) - kvm_mmu_new_pgd(vcpu, cr3, false, false); + kvm_mmu_new_pgd(vcpu, cr3); vcpu->arch.cr3 = cr3; kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); - kvm_init_mmu(vcpu, false); + /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */ + kvm_init_mmu(vcpu); return 0; } @@ -481,6 +500,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) { const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK; + struct kvm_vcpu *vcpu = &svm->vcpu; /* * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, @@ -505,10 +525,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) /* nested_cr3. */ if (nested_npt_enabled(svm)) - nested_svm_init_mmu_context(&svm->vcpu); + nested_svm_init_mmu_context(vcpu); - svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = - svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; + svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset = + vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; svm->vmcb->control.int_ctl = (svm->nested.ctl.int_ctl & ~mask) | @@ -523,8 +543,10 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count; svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh; + nested_svm_transition_tlb_flush(vcpu); + /* Enter Guest-Mode */ - enter_guest_mode(&svm->vcpu); + enter_guest_mode(vcpu); /* * Merge guest and host intercepts - must be called with vcpu in @@ -576,7 +598,7 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, nested_vmcb02_prepare_save(svm, vmcb12); ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3, - nested_npt_enabled(svm)); + nested_npt_enabled(svm), true); if (ret) return ret; @@ -596,8 +618,6 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) struct kvm_host_map map; u64 vmcb12_gpa; - ++vcpu->stat.nested_run; - if (is_smm(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -803,9 +823,11 @@ int nested_svm_vmexit(struct vcpu_svm *svm) kvm_vcpu_unmap(vcpu, &map, true); + nested_svm_transition_tlb_flush(vcpu); + nested_svm_uninit_mmu_context(vcpu); - rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false); + rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true); if (rc) return 1; @@ -1228,8 +1250,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, &user_kvm_nested_state->data.svm[0]; struct vmcb_control_area *ctl; struct vmcb_save_area *save; + unsigned long cr0; int ret; - u32 cr0; BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) > KVM_STATE_NESTED_SVM_VMCB_SIZE); @@ -1302,6 +1324,19 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, goto out_free; /* + * While the nested guest CR3 is already checked and set by + * KVM_SET_SREGS, it was set when nested state was yet loaded, + * thus MMU might not be initialized correctly. + * Set it again to fix this. + */ + + ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, + nested_npt_enabled(svm), false); + if (WARN_ON_ONCE(ret)) + goto out_free; + + + /* * All checks done, we can enter guest mode. Userspace provides * vmcb12.control, which will be combined with L1 and stored into * vmcb02, and the L1 save state which we store in vmcb01. @@ -1358,9 +1393,15 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) if (WARN_ON(!is_guest_mode(vcpu))) return true; - if (nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, - nested_npt_enabled(svm))) - return false; + if (!vcpu->arch.pdptrs_from_userspace && + !nested_npt_enabled(svm) && is_pae_paging(vcpu)) + /* + * Reload the guest's PDPTRs since after a migration + * the guest CR3 might be restored prior to setting the nested + * state which can lead to a load of wrong PDPTRs. + */ + if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))) + return false; if (!nested_svm_vmrun_msrpm(svm)) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; |