From 735a6dd02222d8d070c7bb748f25895239ca8c92 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 26 Feb 2020 15:16:15 -0800 Subject: x86/pkeys: Manually set X86_FEATURE_OSPKE to preserve existing changes Explicitly set X86_FEATURE_OSPKE via set_cpu_cap() instead of calling get_cpu_cap() to pull the feature bit from CPUID after enabling CR4.PKE. Invoking get_cpu_cap() effectively wipes out any {set,clear}_cpu_cap() changes that were made between this_cpu->c_init() and setup_pku(), as all non-synthetic feature words are reinitialized from the CPU's CPUID values. Blasting away capability updates manifests most visibility when running on a VMX capable CPU, but with VMX disabled by BIOS. To indicate that VMX is disabled, init_ia32_feat_ctl() clears X86_FEATURE_VMX, using clear_cpu_cap() instead of setup_clear_cpu_cap() so that KVM can report which CPU is misconfigured (KVM needs to probe every CPU anyways). Restoring X86_FEATURE_VMX from CPUID causes KVM to think VMX is enabled, ultimately leading to an unexpected #GP when KVM attempts to do VMXON. Arguably, init_ia32_feat_ctl() should use setup_clear_cpu_cap() and let KVM figure out a different way to report the misconfigured CPU, but VMX is not the only feature bit that is affected, i.e. there is precedent that tweaking feature bits via {set,clear}_cpu_cap() after ->c_init() is expected to work. Most notably, x86_init_rdrand()'s clearing of X86_FEATURE_RDRAND when RDRAND malfunctions is also overwritten. Fixes: 0697694564c8 ("x86/mm/pkeys: Actually enable Memory Protection Keys in the CPU") Reported-by: Jacob Keller Signed-off-by: Sean Christopherson Signed-off-by: Borislav Petkov Acked-by: Dave Hansen Tested-by: Jacob Keller Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200226231615.13664-1-sean.j.christopherson@intel.com --- arch/x86/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 52c9bfbbdb2a..4cdb123ff66a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -445,7 +445,7 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c) * cpuid bit to be set. We need to ensure that we * update that bit in this CPU's "cpu_info". */ - get_cpu_cap(c); + set_cpu_cap(c, X86_FEATURE_OSPKE); } #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -- cgit v1.2.3 From a262bca3aba03f0696995beb223c610e47533db3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 18 Feb 2020 09:08:23 +0800 Subject: KVM: Introduce pv check helpers Introduce some pv check helpers for consistency. Suggested-by: Vitaly Kuznetsov Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d817f255aed8..7bc0fff3f8e6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -425,7 +425,27 @@ static void __init sev_map_percpu_data(void) } } +static bool pv_tlb_flush_supported(void) +{ + return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); +} + #ifdef CONFIG_SMP + +static bool pv_ipi_supported(void) +{ + return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI); +} + +static bool pv_sched_yield_supported(void) +{ + return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && + kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); +} + #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG) static void __send_ipi_mask(const struct cpumask *mask, int vector) @@ -619,9 +639,7 @@ static void __init kvm_guest_init(void) pv_ops.time.steal_clock = kvm_steal_clock; } - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_tlb_flush_supported()) { pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.tlb_remove_table = tlb_remove_table; } @@ -632,9 +650,7 @@ static void __init kvm_guest_init(void) #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; - if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_sched_yield_supported()) { smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi; pr_info("KVM setup pv sched yield\n"); } @@ -700,7 +716,7 @@ static uint32_t __init kvm_detect(void) static void __init kvm_apic_init(void) { #if defined(CONFIG_SMP) - if (kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI)) + if (pv_ipi_supported()) kvm_setup_pv_ipi(); #endif } @@ -739,9 +755,7 @@ static __init int kvm_setup_pv_tlb_flush(void) if (!kvm_para_available() || nopv) return 0; - if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_REALTIME) && - kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { + if (pv_tlb_flush_supported()) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); -- cgit v1.2.3 From 8a9442f49c72bde43f982e53b74526ac37d3565b Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 18 Feb 2020 09:08:24 +0800 Subject: KVM: Pre-allocate 1 cpumask variable per cpu for both pv tlb and pv ipis Nick Desaulniers Reported: When building with: $ make CC=clang arch/x86/ CFLAGS=-Wframe-larger-than=1000 The following warning is observed: arch/x86/kernel/kvm.c:494:13: warning: stack frame size of 1064 bytes in function 'kvm_send_ipi_mask_allbutself' [-Wframe-larger-than=] static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) ^ Debugging with: https://github.com/ClangBuiltLinux/frame-larger-than via: $ python3 frame_larger_than.py arch/x86/kernel/kvm.o \ kvm_send_ipi_mask_allbutself points to the stack allocated `struct cpumask newmask` in `kvm_send_ipi_mask_allbutself`. The size of a `struct cpumask` is potentially large, as it's CONFIG_NR_CPUS divided by BITS_PER_LONG for the target architecture. CONFIG_NR_CPUS for X86_64 can be as high as 8192, making a single instance of a `struct cpumask` 1024 B. This patch fixes it by pre-allocate 1 cpumask variable per cpu and use it for both pv tlb and pv ipis.. Reported-by: Nick Desaulniers Acked-by: Nick Desaulniers Reviewed-by: Vitaly Kuznetsov Cc: Peter Zijlstra Cc: Nick Desaulniers Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kernel/kvm.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7bc0fff3f8e6..6efe0410fb72 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -432,6 +432,8 @@ static bool pv_tlb_flush_supported(void) kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)); } +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask); + #ifdef CONFIG_SMP static bool pv_ipi_supported(void) @@ -510,12 +512,12 @@ static void kvm_send_ipi_mask(const struct cpumask *mask, int vector) static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) { unsigned int this_cpu = smp_processor_id(); - struct cpumask new_mask; + struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); const struct cpumask *local_mask; - cpumask_copy(&new_mask, mask); - cpumask_clear_cpu(this_cpu, &new_mask); - local_mask = &new_mask; + cpumask_copy(new_mask, mask); + cpumask_clear_cpu(this_cpu, new_mask); + local_mask = new_mask; __send_ipi_mask(local_mask, vector); } @@ -595,7 +597,6 @@ static void __init kvm_apf_trap_init(void) update_intr_gate(X86_TRAP_PF, async_page_fault); } -static DEFINE_PER_CPU(cpumask_var_t, __pv_tlb_mask); static void kvm_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) @@ -603,7 +604,7 @@ static void kvm_flush_tlb_others(const struct cpumask *cpumask, u8 state; int cpu; struct kvm_steal_time *src; - struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_tlb_mask); + struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask); cpumask_copy(flushmask, cpumask); /* @@ -642,6 +643,7 @@ static void __init kvm_guest_init(void) if (pv_tlb_flush_supported()) { pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others; pv_ops.mmu.tlb_remove_table = tlb_remove_table; + pr_info("KVM setup pv remote TLB flush\n"); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) @@ -748,24 +750,31 @@ static __init int activate_jump_labels(void) } arch_initcall(activate_jump_labels); -static __init int kvm_setup_pv_tlb_flush(void) +static __init int kvm_alloc_cpumask(void) { int cpu; + bool alloc = false; if (!kvm_para_available() || nopv) return 0; - if (pv_tlb_flush_supported()) { + if (pv_tlb_flush_supported()) + alloc = true; + +#if defined(CONFIG_SMP) + if (pv_ipi_supported()) + alloc = true; +#endif + + if (alloc) for_each_possible_cpu(cpu) { - zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), + zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu), GFP_KERNEL, cpu_to_node(cpu)); } - pr_info("KVM setup pv remote TLB flush\n"); - } return 0; } -arch_initcall(kvm_setup_pv_tlb_flush); +arch_initcall(kvm_alloc_cpumask); #ifdef CONFIG_PARAVIRT_SPINLOCKS -- cgit v1.2.3 From 99bcd4a6e5b8ba201fdd252f1054689884899fee Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 18 Feb 2020 16:47:12 +0100 Subject: x86/ioperm: Add new paravirt function update_io_bitmap() Commit 111e7b15cf10f6 ("x86/ioperm: Extend IOPL config to control ioperm() as well") reworked the iopl syscall to use I/O bitmaps. Unfortunately this broke Xen PV domains using that syscall as there is currently no I/O bitmap support in PV domains. Add I/O bitmap support via a new paravirt function update_io_bitmap which Xen PV domains can use to update their I/O bitmaps via a hypercall. Fixes: 111e7b15cf10f6 ("x86/ioperm: Extend IOPL config to control ioperm() as well") Reported-by: Jan Beulich Signed-off-by: Juergen Gross Signed-off-by: Thomas Gleixner Tested-by: Jan Beulich Reviewed-by: Jan Beulich Cc: # 5.5 Link: https://lkml.kernel.org/r/20200218154712.25490-1-jgross@suse.com --- arch/x86/kernel/paravirt.c | 5 +++++ arch/x86/kernel/process.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 789f5e4f89de..c131ba4e70ef 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -30,6 +30,7 @@ #include #include #include +#include /* * nop stub, which must not clobber anything *including the stack* to @@ -341,6 +342,10 @@ struct paravirt_patch_template pv_ops = { .cpu.iret = native_iret, .cpu.swapgs = native_swapgs, +#ifdef CONFIG_X86_IOPL_IOPERM + .cpu.update_io_bitmap = native_tss_update_io_bitmap, +#endif + .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 839b5244e3b7..3053c85e0e42 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -374,7 +374,7 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) /** * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode */ -void tss_update_io_bitmap(void) +void native_tss_update_io_bitmap(void) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); struct thread_struct *t = ¤t->thread; -- cgit v1.2.3