diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 8 | ||||
-rw-r--r-- | arch/x86/Makefile | 8 | ||||
-rw-r--r-- | arch/x86/boot/compressed/misc.h | 4 | ||||
-rw-r--r-- | arch/x86/include/asm/cpufeature.h | 5 | ||||
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 10 | ||||
-rw-r--r-- | arch/x86/include/asm/realmode.h | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/resctrl/monitor.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/hyperv.c | 9 | ||||
-rw-r--r-- | arch/x86/kvm/mmu.c | 54 | ||||
-rw-r--r-- | arch/x86/kvm/mmutrace.h | 4 | ||||
-rw-r--r-- | arch/x86/kvm/svm.c | 32 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/nested.c | 5 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 19 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 1 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 59 | ||||
-rw-r--r-- | arch/x86/mm/mmap.c | 2 | ||||
-rw-r--r-- | arch/x86/platform/efi/quirks.c | 2 | ||||
-rw-r--r-- | arch/x86/realmode/init.c | 11 |
18 files changed, 158 insertions, 84 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1f9b3cf437c..5ad92419be19 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2217,14 +2217,8 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING If unsure, leave at the default value. config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" + def_bool y depends on SMP - ---help--- - Say Y here to allow turning CPUs off and on. CPUs can be - controlled through /sys/devices/system/cpu. - ( Note: power management support will enable this option - automatically on SMP systems. ) - Say N if you want to disable CPU hotplug. config BOOTPARAM_HOTPLUG_CPU0 bool "Set default setting of cpu0_hotpluggable" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d8b9d8ca4f8..a587805c6687 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -219,8 +219,12 @@ ifdef CONFIG_RETPOLINE # Additionally, avoid generating expensive indirect jumps which # are subject to retpolines for small number of switch cases. # clang turns off jump table generation by default when under - # retpoline builds, however, gcc does not for x86. - KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20) + # retpoline builds, however, gcc does not for x86. This has + # only been fixed starting from gcc stable version 8.4.0 and + # onwards, but not for older ones. See gcc bug #86952. + ifndef CONFIG_CC_IS_CLANG + KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables) + endif endif archscripts: scripts_basic diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index fd13655e0f9b..d2f184165934 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -120,8 +120,6 @@ static inline void console_init(void) void set_sev_encryption_mask(void); -#endif - /* acpi.c */ #ifdef CONFIG_ACPI acpi_physical_address get_rsdp_addr(void); @@ -135,3 +133,5 @@ int count_immovable_mem_regions(void); #else static inline int count_immovable_mem_regions(void) { return 0; } #endif + +#endif /* BOOT_COMPRESSED_MISC_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce95b8cbd229..0e56ff7e4848 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -112,8 +112,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; test_cpu_cap(c, bit)) #define this_cpu_has(bit) \ - (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ - x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, \ + (unsigned long __percpu *)&cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a5db4475e72d..159b5988292f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -253,14 +253,14 @@ struct kvm_mmu_memory_cache { * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used * by indirect shadow page can not be more than 15 bits. * - * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, + * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access, * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. */ union kvm_mmu_page_role { u32 word; struct { unsigned level:4; - unsigned cr4_pae:1; + unsigned gpte_is_8_bytes:1; unsigned quadrant:2; unsigned direct:1; unsigned access:3; @@ -350,6 +350,7 @@ struct kvm_mmu_page { }; struct kvm_pio_request { + unsigned long linear_rip; unsigned long count; int in; int port; @@ -568,6 +569,7 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; u64 ia32_xss; u64 microcode_version; + u64 arch_capabilities; /* * Paging state of the vcpu @@ -1192,6 +1194,8 @@ struct kvm_x86_ops { int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); + + bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { @@ -1252,7 +1256,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); +unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 63b3393bd98e..c53682303c9c 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -77,7 +77,11 @@ static inline size_t real_mode_size_needed(void) return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE); } -void set_real_mode_mem(phys_addr_t mem, size_t size); +static inline void set_real_mode_mem(phys_addr_t mem) +{ + real_mode_header = (struct real_mode_header *) __va(mem); +} + void reserve_real_mode(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f33f11f69078..1573a0a6b525 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -501,11 +501,8 @@ out_unlock: void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms) { unsigned long delay = msecs_to_jiffies(delay_ms); - struct rdt_resource *r; int cpu; - r = &rdt_resources_all[RDT_RESOURCE_L3]; - cpu = cpumask_any(&dom->cpu_mask); dom->cqm_work_cpu = cpu; diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 27c43525a05f..421899f6ad7b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -526,7 +526,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, new_config.enable = 0; stimer->config.as_uint64 = new_config.as_uint64; - stimer_mark_pending(stimer, false); + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } @@ -542,7 +544,10 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, stimer->config.enable = 0; else if (stimer->config.auto_enable) stimer->config.enable = 1; - stimer_mark_pending(stimer, false); + + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7837ab001d80..eee455a8a612 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -182,7 +182,7 @@ struct kvm_shadow_walk_iterator { static const union kvm_mmu_page_role mmu_base_role_mask = { .cr0_wp = 1, - .cr4_pae = 1, + .gpte_is_8_bytes = 1, .nxe = 1, .smep_andnot_wp = 1, .smap_andnot_wp = 1, @@ -2205,6 +2205,7 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); + #define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ @@ -2215,12 +2216,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, for_each_valid_sp(_kvm, _sp, _gfn) \ if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else +static inline bool is_ept_sp(struct kvm_mmu_page *sp) +{ + return sp->role.cr0_wp && sp->role.smap_andnot_wp; +} + /* @sp->gfn should be write-protected at the call site */ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { - if (sp->role.cr4_pae != !!is_pae(vcpu) - || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { + if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) || + vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return false; } @@ -2423,7 +2429,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.level = level; role.direct = direct; if (role.direct) - role.cr4_pae = 0; + role.gpte_is_8_bytes = true; role.access = access; if (!vcpu->arch.mmu->direct_map && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { @@ -4794,7 +4800,6 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu, role.base.access = ACC_ALL; role.base.nxe = !!is_nx(vcpu); - role.base.cr4_pae = !!is_pae(vcpu); role.base.cr0_wp = is_write_protection(vcpu); role.base.smm = is_smm(vcpu); role.base.guest_mode = is_guest_mode(vcpu); @@ -4815,6 +4820,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) role.base.ad_disabled = (shadow_accessed_mask == 0); role.base.level = kvm_x86_ops->get_tdp_level(vcpu); role.base.direct = true; + role.base.gpte_is_8_bytes = true; return role; } @@ -4879,6 +4885,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) role.base.smap_andnot_wp = role.ext.cr4_smap && !is_write_protection(vcpu); role.base.direct = !is_paging(vcpu); + role.base.gpte_is_8_bytes = !!is_pae(vcpu); if (!is_long_mode(vcpu)) role.base.level = PT32E_ROOT_LEVEL; @@ -4918,18 +4925,26 @@ static union kvm_mmu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly) { - union kvm_mmu_role role; + union kvm_mmu_role role = {0}; - /* Base role is inherited from root_mmu */ - role.base.word = vcpu->arch.root_mmu.mmu_role.base.word; - role.ext = kvm_calc_mmu_role_ext(vcpu); + /* SMM flag is inherited from root_mmu */ + role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm; role.base.level = PT64_ROOT_4LEVEL; + role.base.gpte_is_8_bytes = true; role.base.direct = false; role.base.ad_disabled = !accessed_dirty; role.base.guest_mode = true; role.base.access = ACC_ALL; + /* + * WP=1 and NOT_WP=1 is an impossible combination, use WP and the + * SMAP variation to denote shadow EPT entries. + */ + role.base.cr0_wp = true; + role.base.smap_andnot_wp = true; + + role.ext = kvm_calc_mmu_role_ext(vcpu); role.ext.execonly = execonly; return role; @@ -5179,7 +5194,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, gpa, bytes, sp->role.word); offset = offset_in_page(gpa); - pte_size = sp->role.cr4_pae ? 8 : 4; + pte_size = sp->role.gpte_is_8_bytes ? 8 : 4; /* * Sometimes, the OS only writes the last one bytes to update status @@ -5203,7 +5218,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) page_offset = offset_in_page(gpa); level = sp->role.level; *nspte = 1; - if (!sp->role.cr4_pae) { + if (!sp->role.gpte_is_8_bytes) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map @@ -5393,10 +5408,12 @@ emulate: * This can happen if a guest gets a page-fault on data access but the HW * table walker is not able to read the instruction page (e.g instruction * page is not present in memory). In those cases we simply restart the - * guest. + * guest, with the exception of AMD Erratum 1096 which is unrecoverable. */ - if (unlikely(insn && !insn_len)) - return 1; + if (unlikely(insn && !insn_len)) { + if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu)) + return 1; + } er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); @@ -5509,7 +5526,9 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_with_address(kvm, + start_gfn, + iterator.gfn - start_gfn + 1); flush = false; } cond_resched_lock(&kvm->mmu_lock); @@ -5517,7 +5536,8 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, } if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_with_address(kvm, start_gfn, + end_gfn - start_gfn + 1); flush = false; } @@ -6011,7 +6031,7 @@ out: /* * Calculate mmu pages needed for kvm. */ -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) +unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) { unsigned int nr_mmu_pages; unsigned int nr_pages = 0; diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9f6c855a0043..dd30dccd2ad5 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -29,10 +29,10 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \ + trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \ " %snxe %sad root %u %s%c", \ __entry->gfn, role.level, \ - role.cr4_pae ? " pae" : "", \ + role.gpte_is_8_bytes ? 8 : 4, \ role.quadrant, \ role.direct ? " direct" : "", \ access_str[role.access], \ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b5b128a0a051..426039285fd1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7098,6 +7098,36 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, return -ENODEV; } +static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + bool is_user, smap; + + is_user = svm_get_cpl(vcpu) == 3; + smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + + /* + * Detect and workaround Errata 1096 Fam_17h_00_0Fh + * + * In non SEV guest, hypervisor will be able to read the guest + * memory to decode the instruction pointer when insn_len is zero + * so we return true to indicate that decoding is possible. + * + * But in the SEV guest, the guest memory is encrypted with the + * guest specific key and hypervisor will not be able to decode the + * instruction pointer so we will not able to workaround it. Lets + * print the error and request to kill the guest. + */ + if (is_user && smap) { + if (!sev_guest(vcpu->kvm)) + return true; + + pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } + + return false; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7231,6 +7261,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .nested_enable_evmcs = nested_enable_evmcs, .nested_get_evmcs_version = nested_get_evmcs_version, + + .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f24a2c225070..153e539c29c9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2585,6 +2585,11 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || !nested_cr3_valid(vcpu, vmcs12->host_cr3)) return -EINVAL; + + if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) || + is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) + return -EINVAL; + /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the * IA32_EFER MSR must be 0 in the field for that register. In addition, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c73375e01ab8..ab432a930ae8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1683,12 +1683,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->spec_ctrl; break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) - return 1; - msr_info->data = to_vmx(vcpu)->arch_capabilities; - break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -1895,11 +1889,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated) - return 1; - vmx->arch_capabilities = data; - break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -4088,8 +4077,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - vmx->arch_capabilities = kvm_get_arch_capabilities(); - vm_exit_controls_init(vmx, vmx_vmexit_ctrl()); /* 22.2.1, 20.8.1 */ @@ -7409,6 +7396,11 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + return 0; +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; @@ -7711,6 +7703,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_nested_state = NULL, .get_vmcs12_pages = NULL, .nested_enable_evmcs = NULL, + .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, }; static void vmx_cleanup_l1d_flush(void) diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1554cb45b393..a1e00d0a2482 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -190,7 +190,6 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif - u64 arch_capabilities; u64 spec_ctrl; u32 vm_entry_controls_shadow; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65e4559eef2f..099b851dabaf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1125,7 +1125,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_SPEC_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, @@ -1158,6 +1158,7 @@ static u32 emulated_msrs[] = { MSR_IA32_TSC_ADJUST, MSR_IA32_TSCDEADLINE, + MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, @@ -2443,6 +2444,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr_info->host_initiated) vcpu->arch.microcode_version = data; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vcpu->arch.arch_capabilities = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2747,6 +2753,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = vcpu->arch.microcode_version; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return 1; + msr_info->data = vcpu->arch.arch_capabilities; + break; case MSR_IA32_TSC: msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; break; @@ -6523,14 +6535,27 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); +static int complete_fast_pio_out(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pio.count = 0; + + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) + return 1; + + return kvm_skip_emulated_instruction(vcpu); +} + static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, size, port, &val, 1); - /* do not return to emulator after return from userspace */ - vcpu->arch.pio.count = 0; + + if (!ret) { + vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); + vcpu->arch.complete_userspace_io = complete_fast_pio_out; + } return ret; } @@ -6541,6 +6566,11 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) /* We should only ever be called with arch.pio.count equal to 1 */ BUG_ON(vcpu->arch.pio.count != 1); + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) { + vcpu->arch.pio.count = 0; + return 1; + } + /* For size less than 4 we merge, else we zero extend */ val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0; @@ -6553,7 +6583,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) vcpu->arch.pio.port, &val, 1); kvm_register_write(vcpu, VCPU_REGS_RAX, val); - return 1; + return kvm_skip_emulated_instruction(vcpu); } static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, @@ -6572,6 +6602,7 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, return ret; } + vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = complete_fast_pio_in; return 0; @@ -6579,16 +6610,13 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) { - int ret = kvm_skip_emulated_instruction(vcpu); + int ret; - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ if (in) - return kvm_fast_pio_in(vcpu, size, port) && ret; + ret = kvm_fast_pio_in(vcpu, size, port); else - return kvm_fast_pio_out(vcpu, size, port) && ret; + ret = kvm_fast_pio_out(vcpu, size, port); + return ret && kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_fast_pio); @@ -8733,6 +8761,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; kvm_vcpu_mtrr_init(vcpu); vcpu_load(vcpu); @@ -9429,13 +9458,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int nr_mmu_pages = 0; - if (!kvm->arch.n_requested_mmu_pages) - nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); - - if (nr_mmu_pages) - kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); + kvm_mmu_change_mmu_pages(kvm, + kvm_mmu_calculate_default_mmu_pages(kvm)); /* * Dirty logging tracks sptes in 4k granularity, meaning that large diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index db3165714521..dc726e07d8ba 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -230,7 +230,7 @@ bool mmap_address_hint_valid(unsigned long addr, unsigned long len) /* Can we access it for direct reading/writing? Must be RAM: */ int valid_phys_addr_range(phys_addr_t addr, size_t count) { - return addr + count <= __pa(high_memory); + return addr + count - 1 <= __pa(high_memory - 1); } /* Can we access it through mmap? Must be a valid physical address: */ diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 458a0e2bcc57..a25a9fd987a9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -449,7 +449,7 @@ void __init efi_free_boot_services(void) */ rm_size = real_mode_size_needed(); if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { - set_real_mode_mem(start, rm_size); + set_real_mode_mem(start); start += rm_size; size -= rm_size; } diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index d10105825d57..7dce39c8c034 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -15,15 +15,6 @@ u32 *trampoline_cr4_features; /* Hold the pgd entry used on booting additional CPUs */ pgd_t trampoline_pgd_entry; -void __init set_real_mode_mem(phys_addr_t mem, size_t size) -{ - void *base = __va(mem); - - real_mode_header = (struct real_mode_header *) base; - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - base, (unsigned long long)mem, size); -} - void __init reserve_real_mode(void) { phys_addr_t mem; @@ -42,7 +33,7 @@ void __init reserve_real_mode(void) } memblock_reserve(mem, size); - set_real_mode_mem(mem, size); + set_real_mode_mem(mem); } static void __init setup_real_mode(void) |