diff options
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r-- | arch/x86/kvm/cpuid.c | 4 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.h | 8 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/paging_tmpl.h | 7 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 1 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 59 |
8 files changed, 82 insertions, 18 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59b69f6a2844..1d08ad3582d0 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,6 +16,8 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> +#include <asm/i387.h> /* For use_eager_fpu. Ugh! */ +#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */ #include <asm/user.h> #include <asm/xsave.h> #include "cpuid.h" @@ -95,6 +97,8 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); + vcpu->arch.eager_fpu = guest_cpuid_has_mpx(vcpu); + /* * The existing code assumes virtual address is 48-bit in the canonical * address checks; exit if it is ever changed. diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index c3b1ad9fca81..496b3695d3d3 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -117,4 +117,12 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 7, 0); return best && (best->ebx & bit(X86_FEATURE_RTM)); } + +static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 7, 0); + return best && (best->ebx & bit(X86_FEATURE_MPX)); +} #endif diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d43867c33bc4..44a7d2515497 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3736,8 +3736,8 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu, } } -void update_permission_bitmask(struct kvm_vcpu *vcpu, - struct kvm_mmu *mmu, bool ept) +static void update_permission_bitmask(struct kvm_vcpu *vcpu, + struct kvm_mmu *mmu, bool ept) { unsigned bit, byte, pfec; u8 map; @@ -3918,6 +3918,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) { bool smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP); + bool smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); struct kvm_mmu *context = &vcpu->arch.mmu; MMU_WARN_ON(VALID_PAGE(context->root_hpa)); @@ -3936,6 +3937,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) context->base_role.cr0_wp = is_write_protection(vcpu); context->base_role.smep_andnot_wp = smep && !is_write_protection(vcpu); + context->base_role.smap_andnot_wp + = smap && !is_write_protection(vcpu); } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); @@ -4207,12 +4210,18 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes) { gfn_t gfn = gpa >> PAGE_SHIFT; - union kvm_mmu_page_role mask = { .word = 0 }; struct kvm_mmu_page *sp; LIST_HEAD(invalid_list); u64 entry, gentry, *spte; int npte; bool remote_flush, local_flush, zap_page; + union kvm_mmu_page_role mask = (union kvm_mmu_page_role) { + .cr0_wp = 1, + .cr4_pae = 1, + .nxe = 1, + .smep_andnot_wp = 1, + .smap_andnot_wp = 1, + }; /* * If we don't have indirect shadow pages, it means no page is @@ -4238,7 +4247,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); - mask.cr0_wp = mask.cr4_pae = mask.nxe = 1; for_each_gfn_indirect_valid_sp(vcpu->kvm, sp, gfn) { if (detect_write_misaligned(sp, gpa, bytes) || detect_write_flooding(sp)) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index c7d65637c851..0ada65ecddcf 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -71,8 +71,6 @@ enum { int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); -void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - bool ept); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { @@ -166,6 +164,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index = (pfec >> 1) + (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1)); + WARN_ON(pfec & PFERR_RSVD_MASK); + return (mmu->permissions[index] >> pte_access) & 1; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index fd49c867b25a..6e6d115fe9b5 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -718,6 +718,13 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code, mmu_is_nested(vcpu)); if (likely(r != RET_MMIO_PF_INVALID)) return r; + + /* + * page fault with PFEC.RSVD = 1 is caused by shadow + * page fault, should not be used to walk guest page + * table. + */ + error_code &= ~PFERR_RSVD_MASK; }; r = mmu_topup_memory_caches(vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ce741b8650f6..9afa233b5482 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4381,6 +4381,7 @@ static struct kvm_x86_ops svm_x86_ops = { .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, + .fpu_activate = svm_fpu_activate, .fpu_deactivate = svm_fpu_deactivate, .tlb_flush = svm_flush_tlb, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b61687bd79..2d73807f0d31 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10185,6 +10185,7 @@ static struct kvm_x86_ops vmx_x86_ops = { .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, + .fpu_activate = vmx_fpu_activate, .fpu_deactivate = vmx_fpu_deactivate, .tlb_flush = vmx_flush_tlb, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ed31c31b2485..ea306adbbc13 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -702,8 +702,9 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { unsigned long old_cr4 = kvm_read_cr4(vcpu); - unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | - X86_CR4_PAE | X86_CR4_SMEP; + unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | + X86_CR4_SMEP | X86_CR4_SMAP; + if (cr4 & CR4_RESERVED_BITS) return 1; @@ -744,9 +745,6 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); - if ((cr4 ^ old_cr4) & X86_CR4_SMAP) - update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false); - if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE) kvm_update_cpuid(vcpu); @@ -1669,12 +1667,28 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) &guest_hv_clock, sizeof(guest_hv_clock)))) return 0; - /* - * The interface expects us to write an even number signaling that the - * update is finished. Since the guest won't see the intermediate - * state, we just increase by 2 at the end. + /* This VCPU is paused, but it's legal for a guest to read another + * VCPU's kvmclock, so we really have to follow the specification where + * it says that version is odd if data is being modified, and even after + * it is consistent. + * + * Version field updates must be kept separate. This is because + * kvm_write_guest_cached might use a "rep movs" instruction, and + * writes within a string instruction are weakly ordered. So there + * are three writes overall. + * + * As a small optimization, only write the version field in the first + * and third write. The vcpu->pv_time cache is still valid, because the + * version field is the first in the struct. */ - vcpu->hv_clock.version = guest_hv_clock.version + 2; + BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + + vcpu->hv_clock.version = guest_hv_clock.version + 1; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); + + smp_wmb(); /* retain PVCLOCK_GUEST_STOPPED if set in guest copy */ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED); @@ -1695,6 +1709,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, sizeof(vcpu->hv_clock)); + + smp_wmb(); + + vcpu->hv_clock.version++; + kvm_write_guest_cached(v->kvm, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); return 0; } @@ -6174,6 +6195,8 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) return; page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (is_error_page(page)) + return; kvm_x86_ops->set_apic_access_page_addr(vcpu, page_to_phys(page)); /* @@ -7037,7 +7060,9 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) fpu_save_init(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; - kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + if (!vcpu->arch.eager_fpu) + kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); + trace_kvm_fpu(0); } @@ -7053,11 +7078,21 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { + struct kvm_vcpu *vcpu; + if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) printk_once(KERN_WARNING "kvm: SMP vm created on host with unstable TSC; " "guest TSC will not be reliable\n"); - return kvm_x86_ops->vcpu_create(kvm, id); + + vcpu = kvm_x86_ops->vcpu_create(kvm, id); + + /* + * Activate fpu unconditionally in case the guest needs eager FPU. It will be + * deactivated soon if it doesn't. + */ + kvm_x86_ops->fpu_activate(vcpu); + return vcpu; } int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) |