diff options
author | Paolo Bonzini <pbonzini@redhat.com> | 2024-04-02 12:26:15 -0400 |
---|---|---|
committer | Paolo Bonzini <pbonzini@redhat.com> | 2024-04-02 12:26:15 -0400 |
commit | 52b761b48f8e23399fafe3834a173c990357b8de (patch) | |
tree | 06f7fe191b277dde269ecb11ff3e089da6f2aff9 /arch/x86/kernel | |
parent | 0d1756482e66f326eb65fe08eed24ce2efabb168 (diff) | |
parent | d96c66ab9fb3ad8b243669cf6b41e68d0f7f9ecd (diff) | |
download | linux-stable-52b761b48f8e23399fafe3834a173c990357b8de.tar.gz linux-stable-52b761b48f8e23399fafe3834a173c990357b8de.tar.bz2 linux-stable-52b761b48f8e23399fafe3834a173c990357b8de.zip |
Merge tag 'kvmarm-fixes-6.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 fixes for 6.9, part #1
- Ensure perf events programmed to count during guest execution
are actually enabled before entering the guest in the nVHE
configuration.
- Restore out-of-range handler for stage-2 translation faults.
- Several fixes to stage-2 TLB invalidations to avoid stale
translations, possibly including partial walk caches.
- Fix early handling of architectural VHE-only systems to ensure E2H is
appropriately set.
- Correct a format specifier warning in the arch_timer selftest.
- Make the KVM banner message correctly handle all of the possible
configurations.
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 93 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/topology_common.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/e820.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/fpu/xstate.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/head64.c | 18 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 11 | ||||
-rw-r--r-- | arch/x86/kernel/mpparse.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 10 | ||||
-rw-r--r-- | arch/x86/kernel/smpboot.c | 32 |
12 files changed, 114 insertions, 128 deletions
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ba8cf5e9ce56..5c1e6d6be267 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2307,6 +2307,8 @@ void arch_smt_update(void) void __init arch_cpu_finalize_init(void) { + struct cpuinfo_x86 *c = this_cpu_ptr(&cpu_info); + identify_boot_cpu(); select_idle_routine(); @@ -2345,6 +2347,13 @@ void __init arch_cpu_finalize_init(void) fpu__init_system(); fpu__init_cpu(); + /* + * Ensure that access to the per CPU representation has the initial + * boot CPU configuration. + */ + *c = boot_cpu_data; + c->initialized = true; + alternative_instructions(); if (IS_ENABLED(CONFIG_X86_64)) { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 303fef824167..e0fd57a8ba84 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -45,70 +45,70 @@ bool hyperv_paravisor_present __ro_after_init; EXPORT_SYMBOL_GPL(hyperv_paravisor_present); #if IS_ENABLED(CONFIG_HYPERV) -static inline unsigned int hv_get_nested_reg(unsigned int reg) +static inline unsigned int hv_get_nested_msr(unsigned int reg) { - if (hv_is_sint_reg(reg)) - return reg - HV_REGISTER_SINT0 + HV_REGISTER_NESTED_SINT0; + if (hv_is_sint_msr(reg)) + return reg - HV_X64_MSR_SINT0 + HV_X64_MSR_NESTED_SINT0; switch (reg) { - case HV_REGISTER_SIMP: - return HV_REGISTER_NESTED_SIMP; - case HV_REGISTER_SIEFP: - return HV_REGISTER_NESTED_SIEFP; - case HV_REGISTER_SVERSION: - return HV_REGISTER_NESTED_SVERSION; - case HV_REGISTER_SCONTROL: - return HV_REGISTER_NESTED_SCONTROL; - case HV_REGISTER_EOM: - return HV_REGISTER_NESTED_EOM; + case HV_X64_MSR_SIMP: + return HV_X64_MSR_NESTED_SIMP; + case HV_X64_MSR_SIEFP: + return HV_X64_MSR_NESTED_SIEFP; + case HV_X64_MSR_SVERSION: + return HV_X64_MSR_NESTED_SVERSION; + case HV_X64_MSR_SCONTROL: + return HV_X64_MSR_NESTED_SCONTROL; + case HV_X64_MSR_EOM: + return HV_X64_MSR_NESTED_EOM; default: return reg; } } -u64 hv_get_non_nested_register(unsigned int reg) +u64 hv_get_non_nested_msr(unsigned int reg) { u64 value; - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) hv_ivm_msr_read(reg, &value); else rdmsrl(reg, value); return value; } -EXPORT_SYMBOL_GPL(hv_get_non_nested_register); +EXPORT_SYMBOL_GPL(hv_get_non_nested_msr); -void hv_set_non_nested_register(unsigned int reg, u64 value) +void hv_set_non_nested_msr(unsigned int reg, u64 value) { - if (hv_is_synic_reg(reg) && ms_hyperv.paravisor_present) { + if (hv_is_synic_msr(reg) && ms_hyperv.paravisor_present) { hv_ivm_msr_write(reg, value); /* Write proxy bit via wrmsl instruction */ - if (hv_is_sint_reg(reg)) + if (hv_is_sint_msr(reg)) wrmsrl(reg, value | 1 << 20); } else { wrmsrl(reg, value); } } -EXPORT_SYMBOL_GPL(hv_set_non_nested_register); +EXPORT_SYMBOL_GPL(hv_set_non_nested_msr); -u64 hv_get_register(unsigned int reg) +u64 hv_get_msr(unsigned int reg) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - return hv_get_non_nested_register(reg); + return hv_get_non_nested_msr(reg); } -EXPORT_SYMBOL_GPL(hv_get_register); +EXPORT_SYMBOL_GPL(hv_get_msr); -void hv_set_register(unsigned int reg, u64 value) +void hv_set_msr(unsigned int reg, u64 value) { if (hv_nested) - reg = hv_get_nested_reg(reg); + reg = hv_get_nested_msr(reg); - hv_set_non_nested_register(reg, value); + hv_set_non_nested_msr(reg, value); } -EXPORT_SYMBOL_GPL(hv_set_register); +EXPORT_SYMBOL_GPL(hv_set_msr); static void (*vmbus_handler)(void); static void (*hv_stimer0_handler)(void); @@ -352,13 +352,24 @@ static void __init reduced_hw_init(void) x86_init.irqs.pre_vector_init = x86_init_noop; } +int hv_get_hypervisor_version(union hv_hypervisor_version_info *info) +{ + unsigned int hv_max_functions; + + hv_max_functions = cpuid_eax(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS); + if (hv_max_functions < HYPERV_CPUID_VERSION) { + pr_err("%s: Could not detect Hyper-V version\n", __func__); + return -ENODEV; + } + + cpuid(HYPERV_CPUID_VERSION, &info->eax, &info->ebx, &info->ecx, &info->edx); + + return 0; +} + static void __init ms_hyperv_init_platform(void) { int hv_max_functions_eax; - int hv_host_info_eax; - int hv_host_info_ebx; - int hv_host_info_ecx; - int hv_host_info_edx; #ifdef CONFIG_PARAVIRT pv_info.name = "Hyper-V"; @@ -409,21 +420,6 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: running on a nested hypervisor\n"); } - /* - * Extract host information. - */ - if (hv_max_functions_eax >= HYPERV_CPUID_VERSION) { - hv_host_info_eax = cpuid_eax(HYPERV_CPUID_VERSION); - hv_host_info_ebx = cpuid_ebx(HYPERV_CPUID_VERSION); - hv_host_info_ecx = cpuid_ecx(HYPERV_CPUID_VERSION); - hv_host_info_edx = cpuid_edx(HYPERV_CPUID_VERSION); - - pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", - hv_host_info_ebx >> 16, hv_host_info_ebx & 0xFFFF, - hv_host_info_eax, hv_host_info_edx & 0xFFFFFF, - hv_host_info_ecx, hv_host_info_edx >> 24); - } - if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { x86_platform.calibrate_tsc = hv_get_tsc_khz; @@ -456,7 +452,7 @@ static void __init ms_hyperv_init_platform(void) /* To be supported: more work is required. */ ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; - /* HV_REGISTER_CRASH_CTL is unsupported. */ + /* HV_MSR_CRASH_CTL is unsupported. */ ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; /* Don't trust Hyper-V's TLB-flushing hypercalls. */ @@ -648,6 +644,7 @@ const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = { .init.x2apic_available = ms_hyperv_x2apic_available, .init.msi_ext_dest_id = ms_hyperv_msi_ext_dest_id, .init.init_platform = ms_hyperv_init_platform, + .init.guest_late_init = ms_hyperv_late_init, #ifdef CONFIG_AMD_MEM_ENCRYPT .runtime.sev_es_hcall_prepare = hv_sev_es_hcall_prepare, .runtime.sev_es_hcall_finish = hv_sev_es_hcall_finish, diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index 3259b1d4fefe..aaca8d235dc2 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -415,6 +415,17 @@ void __init topology_init_possible_cpus(void) unsigned int total = assigned + disabled; u32 apicid, firstid; + /* + * If there was no APIC registered, then fake one so that the + * topology bitmap is populated. That ensures that the code below + * is valid and the various query interfaces can be used + * unconditionally. This does not affect the actual APIC code in + * any way because either the local APIC address has not been + * registered or the local APIC was disabled on the command line. + */ + if (topo_info.boot_cpu_apic_id == BAD_APICID) + topology_register_boot_apic(0); + if (!restrict_to_up()) { if (WARN_ON_ONCE(assigned > nr_cpu_ids)) { disabled += assigned - nr_cpu_ids; diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c index a50ae8d63d1c..9a6069e7133c 100644 --- a/arch/x86/kernel/cpu/topology_common.c +++ b/arch/x86/kernel/cpu/topology_common.c @@ -140,7 +140,7 @@ static void parse_topology(struct topo_scan *tscan, bool early) } } -static void topo_set_ids(struct topo_scan *tscan) +static void topo_set_ids(struct topo_scan *tscan, bool early) { struct cpuinfo_x86 *c = tscan->c; u32 apicid = c->topo.apicid; @@ -148,8 +148,10 @@ static void topo_set_ids(struct topo_scan *tscan) c->topo.pkg_id = topo_shift_apicid(apicid, TOPO_PKG_DOMAIN); c->topo.die_id = topo_shift_apicid(apicid, TOPO_DIE_DOMAIN); - c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); - c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + if (!early) { + c->topo.logical_pkg_id = topology_get_logical_id(apicid, TOPO_PKG_DOMAIN); + c->topo.logical_die_id = topology_get_logical_id(apicid, TOPO_DIE_DOMAIN); + } /* Package relative core ID */ c->topo.core_id = (apicid & topo_domain_mask(TOPO_PKG_DOMAIN)) >> @@ -187,7 +189,7 @@ void cpu_parse_topology(struct cpuinfo_x86 *c) tscan.dom_shifts[dom], x86_topo_system.dom_shifts[dom]); } - topo_set_ids(&tscan); + topo_set_ids(&tscan, false); } void __init cpu_init_topology(struct cpuinfo_x86 *c) @@ -208,7 +210,7 @@ void __init cpu_init_topology(struct cpuinfo_x86 *c) x86_topo_system.dom_size[dom] = 1U << sft; } - topo_set_ids(&tscan); + topo_set_ids(&tscan, true); /* * AMD systems have Nodes per package which cannot be mapped to diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index b66f540de054..6f1b379e3b38 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1016,17 +1016,6 @@ void __init e820__reserve_setup_data(void) e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - /* - * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by - * kexec and do not need to be reserved. - */ - if (data->type != SETUP_EFI && - data->type != SETUP_IMA && - data->type != SETUP_RNG_SEED) - e820__range_update_kexec(pa_data, - sizeof(*data) + data->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - if (data->type == SETUP_INDIRECT) { len += data->len; early_memunmap(data, sizeof(*data)); @@ -1038,12 +1027,9 @@ void __init e820__reserve_setup_data(void) indirect = (struct setup_indirect *)data->data; - if (indirect->type != SETUP_INDIRECT) { + if (indirect->type != SETUP_INDIRECT) e820__range_update(indirect->addr, indirect->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - e820__range_update_kexec(indirect->addr, indirect->len, - E820_TYPE_RAM, E820_TYPE_RESERVED_KERN); - } } pa_data = pa_next; @@ -1051,7 +1037,6 @@ void __init e820__reserve_setup_data(void) } e820__update_table(e820_table); - e820__update_table(e820_table_kexec); pr_info("extended physical RAM map:\n"); e820__print_table("reserve setup_data"); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 117e74c44e75..33a214b1a4ce 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -178,10 +178,11 @@ void fpu__init_cpu_xstate(void) * Must happen after CR4 setup and before xsetbv() to allow KVM * lazy passthrough. Write independent of the dynamic state static * key as that does not work on the boot CPU. This also ensures - * that any stale state is wiped out from XFD. + * that any stale state is wiped out from XFD. Reset the per CPU + * xfd cache too. */ if (cpu_feature_enabled(X86_FEATURE_XFD)) - wrmsrl(MSR_IA32_XFD, init_fpstate.xfd); + xfd_set_state(init_fpstate.xfd); /* * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features diff --git a/arch/x86/kernel/fpu/xstate.h b/arch/x86/kernel/fpu/xstate.h index 3518fb26d06b..19ca623ffa2a 100644 --- a/arch/x86/kernel/fpu/xstate.h +++ b/arch/x86/kernel/fpu/xstate.h @@ -148,20 +148,26 @@ static inline void xfd_validate_state(struct fpstate *fpstate, u64 mask, bool rs #endif #ifdef CONFIG_X86_64 +static inline void xfd_set_state(u64 xfd) +{ + wrmsrl(MSR_IA32_XFD, xfd); + __this_cpu_write(xfd_state, xfd); +} + static inline void xfd_update_state(struct fpstate *fpstate) { if (fpu_state_size_dynamic()) { u64 xfd = fpstate->xfd; - if (__this_cpu_read(xfd_state) != xfd) { - wrmsrl(MSR_IA32_XFD, xfd); - __this_cpu_write(xfd_state, xfd); - } + if (__this_cpu_read(xfd_state) != xfd) + xfd_set_state(xfd); } } extern int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu); #else +static inline void xfd_set_state(u64 xfd) { } + static inline void xfd_update_state(struct fpstate *fpstate) { } static inline int __xfd_enable_feature(u64 which, struct fpu_guest *guest_fpu) { diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 212e8e06aeba..a817ed0724d1 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -81,6 +81,13 @@ static inline bool check_la57_support(void) if (!(native_read_cr4() & X86_CR4_LA57)) return false; + RIP_REL_REF(__pgtable_l5_enabled) = 1; + RIP_REL_REF(pgdir_shift) = 48; + RIP_REL_REF(ptrs_per_p4d) = 512; + RIP_REL_REF(page_offset_base) = __PAGE_OFFSET_BASE_L5; + RIP_REL_REF(vmalloc_base) = __VMALLOC_BASE_L5; + RIP_REL_REF(vmemmap_base) = __VMEMMAP_BASE_L5; + return true; } @@ -175,7 +182,7 @@ unsigned long __head __startup_64(unsigned long physaddr, p4d = (p4dval_t *)&RIP_REL_REF(level4_kernel_pgt); p4d[MAX_PTRS_PER_P4D - 1] += load_delta; - pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE_NOENC; + pgd[pgd_index(__START_KERNEL_map)] = (pgdval_t)p4d | _PAGE_TABLE; } RIP_REL_REF(level3_kernel_pgt)[PTRS_PER_PUD - 2].pud += load_delta; @@ -431,15 +438,6 @@ asmlinkage __visible void __init __noreturn x86_64_start_kernel(char * real_mode (__START_KERNEL & PGDIR_MASK))); BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); - if (check_la57_support()) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - page_offset_base = __PAGE_OFFSET_BASE_L5; - vmalloc_base = __VMALLOC_BASE_L5; - vmemmap_base = __VMEMMAP_BASE_L5; - } - cr4_init_shadow(); /* Kill off the identity-map trampoline */ diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 091b3ab76a18..d0e49bd7c6f3 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -373,7 +373,16 @@ out: kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - if (is_endbr(*(u32 *)addr)) { + u32 insn; + + /* + * Since 'addr' is not guaranteed to be safe to access, use + * copy_from_kernel_nofault() to read the instruction: + */ + if (copy_from_kernel_nofault(&insn, (void *)addr, sizeof(u32))) + return NULL; + + if (is_endbr(insn)) { *on_func_entry = !offset || offset == 4; if (*on_func_entry) offset = 4; diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 1ccd30c8246f..e89171b0347a 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -197,12 +197,12 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) if (!smp_check_mpc(mpc, oem, str)) return 0; - /* Initialize the lapic mapping */ - if (!acpi_lapic) - register_lapic_address(mpc->lapic); - - if (early) + if (early) { + /* Initialize the lapic mapping */ + if (!acpi_lapic) + register_lapic_address(mpc->lapic); return 1; + } /* Now process the configuration blocks. */ while (count < mpc->length) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3e1e96efadfe..ef206500ed6f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1206,16 +1206,6 @@ void __init i386_reserve_resources(void) #endif /* CONFIG_X86_32 */ -#ifndef CONFIG_SMP -void __init smp_prepare_boot_cpu(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} -#endif - static struct notifier_block kernel_offset_notifier = { .notifier_call = dump_kernel_offset }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fe355c89f6c1..76bb65045c64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -313,14 +313,6 @@ static void notrace start_secondary(void *unused) cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } -static void __init smp_store_boot_cpu_info(void) -{ - struct cpuinfo_x86 *c = &cpu_data(0); - - *c = boot_cpu_data; - c->initialized = true; -} - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -1039,29 +1031,15 @@ static __init void disable_smp(void) cpumask_set_cpu(0, topology_die_cpumask(0)); } -static void __init smp_cpu_index_default(void) -{ - int i; - struct cpuinfo_x86 *c; - - for_each_possible_cpu(i) { - c = &cpu_data(i); - /* mark all to hotplug */ - c->cpu_index = nr_cpu_ids; - } -} - void __init smp_prepare_cpus_common(void) { unsigned int i; - smp_cpu_index_default(); - - /* - * Setup boot CPU information - */ - smp_store_boot_cpu_info(); /* Final full version of the data */ - mb(); + /* Mark all except the boot CPU as hotpluggable */ + for_each_possible_cpu(i) { + if (i) + per_cpu(cpu_info.cpu_index, i) = nr_cpu_ids; + } for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); |