From 17270717e80de33a884ad328fea5f407d87f6d6a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 21:43:31 -0500 Subject: x86/head: Remove confusing comment This comment is actively wrong and confusing. It refers to the registers' stack offsets after the pt_regs has been constructed on the stack, but this code is *before* that. At this point the stack just has the standard iret frame, for which no comment should be needed. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Jiri Slaby Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/a3c267b770fc56c9b86df9c11c552848248aace2.1505764066.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 513cbb012ecc..3b04e4c99389 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -270,10 +270,6 @@ bad_address: __INIT ENTRY(early_idt_handler_array) - # 104(%rsp) %rflags - # 96(%rsp) %cs - # 88(%rsp) %rip - # 80(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 -- cgit v1.2.3 From a8b88e84d124bc92c4808e72b8b8c0e0bb538630 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 21:43:32 -0500 Subject: x86/head: Remove unused 'bad_address' code It's no longer possible for this code to be executed, so remove it. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Jiri Slaby Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/32a46fe92d2083700599b36872b26e7dfd7b7965.1505764066.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 3b04e4c99389..afb0a1e22d41 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -265,9 +265,6 @@ ENDPROC(start_cpu0) .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS __FINITDATA -bad_address: - jmp bad_address - __INIT ENTRY(early_idt_handler_array) i = 0 -- cgit v1.2.3 From 015a2ea5478680fc5216d56b7ff306f2a74efaf9 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 21:43:33 -0500 Subject: x86/head: Fix head ELF function annotations These functions aren't callable C-type functions, so don't annotate them as such. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Jiri Slaby Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/36eb182738c28514f8bf95e403d89b6413a88883.1505764066.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index afb0a1e22d41..edacd579d504 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -234,7 +234,7 @@ ENTRY(secondary_startup_64) pushq %rax # target address in negative space lretq .Lafter_lret: -ENDPROC(secondary_startup_64) +END(secondary_startup_64) #include "verify_cpu.S" @@ -277,7 +277,7 @@ ENTRY(early_idt_handler_array) i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr -ENDPROC(early_idt_handler_array) +END(early_idt_handler_array) early_idt_handler_common: /* @@ -320,7 +320,7 @@ early_idt_handler_common: 20: decl early_recursion_flag(%rip) jmp restore_regs_and_iret -ENDPROC(early_idt_handler_common) +END(early_idt_handler_common) __INITDATA -- cgit v1.2.3 From e93db75a0054b23a874a12c63376753544f3fe9e Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 21:43:34 -0500 Subject: x86/boot: Annotate verify_cpu() as a callable function verify_cpu() is a callable function. Annotate it as such. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Jiri Slaby Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/293024b8a080832075312f38c07ccc970fc70292.1505764066.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/verify_cpu.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index 014ea59aa153..3d3c2f71f617 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -33,7 +33,7 @@ #include #include -verify_cpu: +ENTRY(verify_cpu) pushf # Save caller passed flags push $0 # Kill any dangerous flags popf @@ -139,3 +139,4 @@ verify_cpu: popf # Restore caller passed flags xorl %eax, %eax ret +ENDPROC(verify_cpu) -- cgit v1.2.3 From 2704fbb672d0d9a19414907fda7949283dcef6a1 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Sep 2017 21:43:37 -0500 Subject: x86/head: Add unwind hint annotations Jiri Slaby reported an ORC issue when unwinding from an idle task. The stack was: ffffffff811083c2 do_idle+0x142/0x1e0 ffffffff8110861d cpu_startup_entry+0x5d/0x60 ffffffff82715f58 start_kernel+0x3ff/0x407 ffffffff827153e8 x86_64_start_kernel+0x14e/0x15d ffffffff810001bf secondary_startup_64+0x9f/0xa0 The ORC unwinder errored out at secondary_startup_64 because the head code isn't annotated yet so there wasn't a corresponding ORC entry. Fix that and any other head-related unwinding issues by adding unwind hints to the head code. Reported-by: Jiri Slaby Tested-by: Jiri Slaby Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Boris Ostrovsky Cc: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/78ef000a2f68f545d6eef44ee912edceaad82ccf.1505764066.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/head_64.S | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fd0a7895b63f..d8e2b700d1db 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -26,7 +26,6 @@ KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n -OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y OBJECT_FILES_NON_STANDARD_test_nx.o := y diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index edacd579d504..42e32c2e51bb 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -49,6 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map) .code64 .globl startup_64 startup_64: + UNWIND_HINT_EMPTY /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded an identity mapped page table @@ -88,6 +89,7 @@ startup_64: addq $(early_top_pgt - __START_KERNEL_map), %rax jmp 1f ENTRY(secondary_startup_64) + UNWIND_HINT_EMPTY /* * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. @@ -132,6 +134,7 @@ ENTRY(secondary_startup_64) movq $1f, %rax jmp *%rax 1: + UNWIND_HINT_EMPTY /* Check if nx is implemented */ movl $0x80000001, %eax @@ -246,6 +249,7 @@ END(secondary_startup_64) */ ENTRY(start_cpu0) movq initial_stack(%rip), %rsp + UNWIND_HINT_EMPTY jmp .Ljump_to_C_code ENDPROC(start_cpu0) #endif @@ -270,13 +274,18 @@ ENTRY(early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 - pushq $0 # Dummy error code, to make stack frame uniform + UNWIND_HINT_IRET_REGS + pushq $0 # Dummy error code, to make stack frame uniform + .else + UNWIND_HINT_IRET_REGS offset=8 .endif pushq $i # 72(%rsp) Vector number jmp early_idt_handler_common + UNWIND_HINT_IRET_REGS i = i + 1 .fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc .endr + UNWIND_HINT_IRET_REGS offset=16 END(early_idt_handler_array) early_idt_handler_common: @@ -305,6 +314,7 @@ early_idt_handler_common: pushq %r13 /* pt_regs->r13 */ pushq %r14 /* pt_regs->r14 */ pushq %r15 /* pt_regs->r15 */ + UNWIND_HINT_REGS cmpq $14,%rsi /* Page fault? */ jnz 10f @@ -427,7 +437,7 @@ ENTRY(phys_base) EXPORT_SYMBOL(phys_base) #include "../../x86/xen/xen-head.S" - + __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE -- cgit v1.2.3 From 11af847446ed0d131cf24d16a7ef3d5ea7a49554 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 13 Oct 2017 15:02:00 -0500 Subject: x86/unwind: Rename unwinder config options to 'CONFIG_UNWINDER_*' Rename the unwinder config options from: CONFIG_ORC_UNWINDER CONFIG_FRAME_POINTER_UNWINDER CONFIG_GUESS_UNWINDER to: CONFIG_UNWINDER_ORC CONFIG_UNWINDER_FRAME_POINTER CONFIG_UNWINDER_GUESS ... in order to give them a more logical config namespace. Suggested-by: Ingo Molnar Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/73972fc7e2762e91912c6b9584582703d6f1b8cc.1507924831.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index fd0a7895b63f..6209ab6deb50 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -127,9 +127,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o obj-$(CONFIG_TRACING) += tracepoint.o obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o -obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o -obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o -obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o +obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o +obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o +obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o ### # 64 bit specific files -- cgit v1.2.3 From 0b00de857a648dafe7020878c7a27cf776f5edf4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 13 Oct 2017 14:56:42 -0700 Subject: x86/cpuid: Add generic table for CPUID dependencies Some CPUID features depend on other features. Currently it's possible to to clear dependent features, but not clear the base features, which can cause various interesting problems. This patch implements a generic table to describe dependencies between CPUID features, to be used by all code that clears CPUID. Some subsystems (like XSAVE) had an own implementation of this, but it's better to do it all in a single place for everyone. Then clear_cpu_cap and setup_clear_cpu_cap always look up this table and clear all dependencies too. This is intended to be a practical table: only for features that make sense to clear. If someone for example clears FPU, or other features that are essentially part of the required base feature set, not much is going to work. Handling that is right now out of scope. We're only handling features which can be usefully cleared. Signed-off-by: Andi Kleen Reviewed-by: Thomas Gleixner Cc: Jonathan McDowell Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171013215645.23166-3-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/Makefile | 1 + arch/x86/kernel/cpu/cpuid-deps.c | 113 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 arch/x86/kernel/cpu/cpuid-deps.c (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index e17942c131c8..de260fae1017 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -22,6 +22,7 @@ obj-y += rdrand.o obj-y += match.o obj-y += bugs.o obj-$(CONFIG_CPU_FREQ) += aperfmperf.o +obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c new file mode 100644 index 000000000000..e48eb7313120 --- /dev/null +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -0,0 +1,113 @@ +/* Declare dependencies between CPUIDs */ +#include +#include +#include +#include + +struct cpuid_dep { + unsigned int feature; + unsigned int depends; +}; + +/* + * Table of CPUID features that depend on others. + * + * This only includes dependencies that can be usefully disabled, not + * features part of the base set (like FPU). + * + * Note this all is not __init / __initdata because it can be + * called from cpu hotplug. It shouldn't do anything in this case, + * but it's difficult to tell that to the init reference checker. + */ +const static struct cpuid_dep cpuid_deps[] = { + { X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE }, + { X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE }, + { X86_FEATURE_XSAVES, X86_FEATURE_XSAVE }, + { X86_FEATURE_AVX, X86_FEATURE_XSAVE }, + { X86_FEATURE_PKU, X86_FEATURE_XSAVE }, + { X86_FEATURE_MPX, X86_FEATURE_XSAVE }, + { X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE }, + { X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR }, + { X86_FEATURE_XMM, X86_FEATURE_FXSR }, + { X86_FEATURE_XMM2, X86_FEATURE_XMM }, + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, + { X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 }, + { X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 }, + { X86_FEATURE_XMM3, X86_FEATURE_XMM2 }, + { X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 }, + { X86_FEATURE_SSSE3, X86_FEATURE_XMM2, }, + { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, + { X86_FEATURE_AES, X86_FEATURE_XMM2 }, + { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, + { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, + { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, + {} +}; + +static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit) +{ + clear_bit32(bit, c->x86_capability); +} + +static inline void __setup_clear_cpu_cap(unsigned int bit) +{ + clear_cpu_cap(&boot_cpu_data, bit); + set_bit32(bit, cpu_caps_cleared); +} + +static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) +{ + if (!c) + __setup_clear_cpu_cap(feature); + else + __clear_cpu_cap(c, feature); +} + +static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) +{ + bool changed; + DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8); + const struct cpuid_dep *d; + + clear_feature(c, feature); + + /* Collect all features to disable, handling dependencies */ + memset(disable, 0, sizeof(disable)); + __set_bit(feature, disable); + + /* Loop until we get a stable state. */ + do { + changed = false; + for (d = cpuid_deps; d->feature; d++) { + if (!test_bit(d->depends, disable)) + continue; + if (__test_and_set_bit(d->feature, disable)) + continue; + + changed = true; + clear_feature(c, d->feature); + } + } while (changed); +} + +void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) +{ + do_clear_cpu_cap(c, feature); +} + +void setup_clear_cpu_cap(unsigned int feature) +{ + do_clear_cpu_cap(NULL, feature); +} -- cgit v1.2.3 From 0c2a3913d6f50503f7c59d83a6219e39508cc898 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 13 Oct 2017 14:56:43 -0700 Subject: x86/fpu: Parse clearcpuid= as early XSAVE argument With a followon patch we want to make clearcpuid affect the XSAVE configuration. But xsave is currently initialized before arguments are parsed. Move the clearcpuid= parsing into the special early xsave argument parsing code. Since clearcpuid= contains a = we need to keep the old __setup around as a dummy, otherwise it would end up as a environment variable in init's environment. Signed-off-by: Andi Kleen Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171013215645.23166-4-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 16 +++++++--------- arch/x86/kernel/fpu/init.c | 11 +++++++++++ 2 files changed, 18 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c9176bae7fd8..03bb004bb15e 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1301,18 +1301,16 @@ void print_cpu_info(struct cpuinfo_x86 *c) pr_cont(")\n"); } -static __init int setup_disablecpuid(char *arg) +/* + * clearcpuid= was already parsed in fpu__init_parse_early_param. + * But we need to keep a dummy __setup around otherwise it would + * show up as an environment variable for init. + */ +static __init int setup_clearcpuid(char *arg) { - int bit; - - if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32) - setup_clear_cpu_cap(bit); - else - return 0; - return 1; } -__setup("clearcpuid=", setup_disablecpuid); +__setup("clearcpuid=", setup_clearcpuid); #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(union irq_stack_union, diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7affb7e3d9a5..6abd83572b01 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void) */ static void __init fpu__init_parse_early_param(void) { + char arg[32]; + char *argptr = arg; + int bit; + if (cmdline_find_option_bool(boot_command_line, "no387")) setup_clear_cpu_cap(X86_FEATURE_FPU); @@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void) if (cmdline_find_option_bool(boot_command_line, "noxsaves")) setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + if (cmdline_find_option(boot_command_line, "clearcpuid", arg, + sizeof(arg)) && + get_option(&argptr, &bit) && + bit >= 0 && + bit < NCAPINTS * 32) + setup_clear_cpu_cap(bit); } /* -- cgit v1.2.3 From ccb18db2ab9d923df07e7495123fe5fb02329713 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 13 Oct 2017 14:56:44 -0700 Subject: x86/fpu: Make XSAVE check the base CPUID features before enabling Before enabling XSAVE, not only check the XSAVE specific CPUID bits, but also the base CPUID features of the respective XSAVE feature. This allows to disable individual XSAVE states using the existing clearcpuid= option, which can be useful for performance testing and debugging, and also in general avoids inconsistencies. Signed-off-by: Andi Kleen Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171013215645.23166-5-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f1d5476c9022..fb581292975b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -15,6 +15,7 @@ #include #include +#include /* * Although we spell it out in here, the Processor Trace @@ -36,6 +37,19 @@ static const char *xfeature_names[] = "unknown xstate feature" , }; +static short xsave_cpuid_features[] __initdata = { + X86_FEATURE_FPU, + X86_FEATURE_XMM, + X86_FEATURE_AVX, + X86_FEATURE_MPX, + X86_FEATURE_MPX, + X86_FEATURE_AVX512F, + X86_FEATURE_AVX512F, + X86_FEATURE_AVX512F, + X86_FEATURE_INTEL_PT, + X86_FEATURE_PKU, +}; + /* * Mask of xstate features supported by the CPU and the kernel: */ @@ -726,6 +740,7 @@ void __init fpu__init_system_xstate(void) unsigned int eax, ebx, ecx, edx; static int on_boot_cpu __initdata = 1; int err; + int i; WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; @@ -759,6 +774,14 @@ void __init fpu__init_system_xstate(void) goto out_disable; } + /* + * Clear XSAVE features that are disabled in the normal CPUID. + */ + for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { + if (!boot_cpu_has(xsave_cpuid_features[i])) + xfeatures_mask &= ~BIT(i); + } + xfeatures_mask &= fpu__get_supported_xfeatures_mask(); /* Enable xstate instructions to be able to continue with initialization: */ -- cgit v1.2.3 From 73e3a7d2a7c3be29a5a22b85026f6cfa5664267f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 13 Oct 2017 14:56:45 -0700 Subject: x86/fpu: Remove the explicit clearing of XSAVE dependent features Clearing a CPU feature with setup_clear_cpu_cap() clears all features which depend on it. Expressing feature dependencies in one place is easier to maintain than keeping functions like fpu__xstate_clear_all_cpu_caps() up to date. The features which depend on XSAVE have their dependency expressed in the dependency table, so its sufficient to clear X86_FEATURE_XSAVE. Remove the explicit clearing of XSAVE dependent features. Signed-off-by: Andi Kleen Reviewed-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171013215645.23166-6-andi@firstfloor.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index fb581292975b..87a57b7642d3 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -73,26 +73,6 @@ unsigned int fpu_user_xstate_size; void fpu__xstate_clear_all_cpu_caps(void) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - setup_clear_cpu_cap(X86_FEATURE_XSAVEC); - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - setup_clear_cpu_cap(X86_FEATURE_AVX512F); - setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA); - setup_clear_cpu_cap(X86_FEATURE_AVX512PF); - setup_clear_cpu_cap(X86_FEATURE_AVX512ER); - setup_clear_cpu_cap(X86_FEATURE_AVX512CD); - setup_clear_cpu_cap(X86_FEATURE_AVX512DQ); - setup_clear_cpu_cap(X86_FEATURE_AVX512BW); - setup_clear_cpu_cap(X86_FEATURE_AVX512VL); - setup_clear_cpu_cap(X86_FEATURE_MPX); - setup_clear_cpu_cap(X86_FEATURE_XGETBV1); - setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI); - setup_clear_cpu_cap(X86_FEATURE_PKU); - setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); - setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); - setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); } /* -- cgit v1.2.3 From 57b8b1a1856adaa849d02d547411a553a531022b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 18 Oct 2017 19:39:35 +0200 Subject: x86/cpuid: Prevent out of bound access in do_clear_cpu_cap() do_clear_cpu_cap() allocates a bitmap to keep track of disabled feature dependencies. That bitmap is sized NCAPINTS * BITS_PER_INIT. The possible 'features' which can be handed in are larger than this, because after the capabilities the bug 'feature' bits occupy another 32bit. Not really obvious... So clearing any of the misfeature bits, as 32bit does for the F00F bug, accesses that bitmap out of bounds thereby corrupting the stack. Size the bitmap proper and add a sanity check to catch accidental out of bound access. Fixes: 0b00de857a64 ("x86/cpuid: Add generic table for CPUID dependencies") Reported-by: kernel test robot Signed-off-by: Thomas Gleixner Cc: Andi Kleen Cc: Borislav Petkov Link: https://lkml.kernel.org/r/20171018022023.GA12058@yexl-desktop --- arch/x86/kernel/cpu/cpuid-deps.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index e48eb7313120..c1d49842a411 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -75,11 +75,17 @@ static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) __clear_cpu_cap(c, feature); } +/* Take the capabilities and the BUG bits into account */ +#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8) + static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature) { - bool changed; - DECLARE_BITMAP(disable, NCAPINTS * sizeof(u32) * 8); + DECLARE_BITMAP(disable, MAX_FEATURE_BITS); const struct cpuid_dep *d; + bool changed; + + if (WARN_ON(feature >= MAX_FEATURE_BITS)) + return; clear_feature(c, feature); -- cgit v1.2.3 From da20ab35180780e4a6eadc804544f1fa967f3567 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 18 Oct 2017 10:21:07 -0700 Subject: x86/entry: Use SYSCALL_DEFINE() macros for sys_modify_ldt() We do not have tracepoints for sys_modify_ldt() because we define it directly instead of using the normal SYSCALL_DEFINEx() macros. However, there is a reason sys_modify_ldt() does not use the macros: it has an 'int' return type instead of 'unsigned long'. This is a bug, but it's a bug cemented in the ABI. What does this mean? If we return -EINVAL from a function that returns 'int', we have 0x00000000ffffffea in %rax. But, if we return -EINVAL from a function returning 'unsigned long', we end up with 0xffffffffffffffea in %rax, which is wrong. To work around this and maintain the 'int' behavior while using the SYSCALL_DEFINEx() macros, so we add a cast to 'unsigned int' in both implementations of sys_modify_ldt(). Signed-off-by: Dave Hansen Reviewed-by: Andy Lutomirski Reviewed-by: Brian Gerst Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171018172107.1A79C532@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/ldt.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index f0e64db18ac8..0402d44deb4d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -294,8 +295,8 @@ out: return error; } -asmlinkage int sys_modify_ldt(int func, void __user *ptr, - unsigned long bytecount) +SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr , + unsigned long , bytecount) { int ret = -ENOSYS; @@ -313,5 +314,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr, ret = write_ldt(ptr, bytecount, 0); break; } - return ret; + /* + * The SYSCALL_DEFINE() macros give us an 'unsigned long' + * return type, but tht ABI for sys_modify_ldt() expects + * 'int'. This cast gives us an int-sized value in %rax + * for the return code. The 'unsigned' is necessary so + * the compiler does not try to sign-extend the negative + * return codes into the high half of the register when + * taking the value from int->long. + */ + return (unsigned int)ret; } -- cgit v1.2.3 From 82c62fa0c49aa305104013cee4468772799bb391 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 20 Oct 2017 11:21:35 -0500 Subject: x86/asm: Don't use the confusing '.ifeq' directive I find the '.ifeq ' directive to be confusing. Reading it quickly seems to suggest its opposite meaning, or that it's missing an argument. Improve readability by replacing all of its x86 uses with '.if == 0'. Signed-off-by: Josh Poimboeuf Cc: Andrei Vagin Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/757da028e802c7e98d23fbab8d234b1063e161cf.1508516398.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_32.S | 2 +- arch/x86/kernel/head_64.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9ed3074d0d27..6e50f87765e5 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -401,7 +401,7 @@ ENTRY(early_idt_handler_array) # 24(%rsp) error code i = 0 .rept NUM_EXCEPTION_VECTORS - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 pushl $0 # Dummy error code, to make stack frame uniform .endif pushl $i # 20(%esp) Vector number diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 42e32c2e51bb..311db1a73c11 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -273,7 +273,7 @@ ENDPROC(start_cpu0) ENTRY(early_idt_handler_array) i = 0 .rept NUM_EXCEPTION_VECTORS - .ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1 + .if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0 UNWIND_HINT_IRET_REGS pushq $0 # Dummy error code, to make stack frame uniform .else -- cgit v1.2.3 From c128dbfa0f879f8ce7b79054037889b0b2240728 Mon Sep 17 00:00:00 2001 From: Gayatri Kammela Date: Mon, 30 Oct 2017 18:20:29 -0700 Subject: x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU features Add a few new SSE/AVX/AVX512 instruction groups/features for enumeration in /proc/cpuinfo: AVX512_VBMI2, GFNI, VAES, VPCLMULQDQ, AVX512_VNNI, AVX512_BITALG. CPUID.(EAX=7,ECX=0):ECX[bit 6] AVX512_VBMI2 CPUID.(EAX=7,ECX=0):ECX[bit 8] GFNI CPUID.(EAX=7,ECX=0):ECX[bit 9] VAES CPUID.(EAX=7,ECX=0):ECX[bit 10] VPCLMULQDQ CPUID.(EAX=7,ECX=0):ECX[bit 11] AVX512_VNNI CPUID.(EAX=7,ECX=0):ECX[bit 12] AVX512_BITALG Detailed information of CPUID bits for these features can be found in the Intel Architecture Instruction Set Extensions and Future Features Programming Interface document (refer to Table 1-1. and Table 1-2.). A copy of this document is available at https://bugzilla.kernel.org/show_bug.cgi?id=197239 Signed-off-by: Gayatri Kammela Acked-by: Thomas Gleixner Cc: Andi Kleen Cc: Fenghua Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Yang Zhong Cc: bp@alien8.de Link: http://lkml.kernel.org/r/1509412829-23380-1-git-send-email-gayatri.kammela@intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/cpuid-deps.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index c1d49842a411..c21f22d836ad 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -50,6 +50,12 @@ const static struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, + { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, + { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, + { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, + { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F }, -- cgit v1.2.3 From b0ce5b8c95c83a7b98c679b117e3d6ae6f97154b Mon Sep 17 00:00:00 2001 From: Ricardo Neri Date: Fri, 27 Oct 2017 13:25:29 -0700 Subject: x86/boot: Relocate definition of the initial state of CR0 Both head_32.S and head_64.S utilize the same value to initialize the control register CR0. Also, other parts of the kernel might want to access this initial definition (e.g., emulation code for User-Mode Instruction Prevention uses this state to provide a sane dummy value for CR0 when emulating the smsw instruction). Thus, relocate this definition to a header file from which it can be conveniently accessed. Suggested-by: Borislav Petkov Signed-off-by: Ricardo Neri Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Andy Lutomirski Cc: "Michael S. Tsirkin" Cc: Peter Zijlstra Cc: Dave Hansen Cc: ricardo.neri@intel.com Cc: linux-mm@kvack.org Cc: Paul Gortmaker Cc: Huang Rui Cc: Shuah Khan Cc: linux-arch@vger.kernel.org Cc: Jonathan Corbet Cc: Jiri Slaby Cc: "Ravi V. Shankar" Cc: Denys Vlasenko Cc: Chris Metcalf Cc: Brian Gerst Cc: Josh Poimboeuf Cc: Chen Yucong Cc: Vlastimil Babka Cc: Dave Hansen Cc: Andy Lutomirski Cc: Masami Hiramatsu Cc: Paolo Bonzini Cc: Andrew Morton Cc: Linus Torvalds Link: https://lkml.kernel.org/r/1509135945-13762-3-git-send-email-ricardo.neri-calderon@linux.intel.com --- arch/x86/kernel/head_32.S | 3 --- arch/x86/kernel/head_64.S | 3 --- 2 files changed, 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 9ed3074d0d27..c3cfc655f551 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -211,9 +211,6 @@ ENTRY(startup_32_smp) #endif .Ldefault_entry: -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ - X86_CR0_PG) movl $(CR0_STATE & ~X86_CR0_PG),%eax movl %eax,%cr0 diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 513cbb012ecc..5e1bfdd86b5b 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -149,9 +149,6 @@ ENTRY(secondary_startup_64) 1: wrmsr /* Make changes effective */ /* Setup cr0 */ -#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \ - X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \ - X86_CR0_PG) movl $CR0_STATE, %eax /* Make changes effective */ movq %rax, %cr0 -- cgit v1.2.3 From 26c4ef9c49d8a0341f6d97ce2cfdd55d1236ed29 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:58:59 -0700 Subject: x86/entry/64: Split the IRET-to-user and IRET-to-kernel paths These code paths will diverge soon. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/dccf8c7b3750199b4b30383c812d4e2931811509.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/head_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 189bf42dfa2b..08f067faa264 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -326,7 +326,7 @@ early_idt_handler_common: 20: decl early_recursion_flag(%rip) - jmp restore_regs_and_iret + jmp restore_regs_and_return_to_kernel END(early_idt_handler_common) __INITDATA -- cgit v1.2.3 From bd7dc5a6afac719d8ce4092391eef2c7e83c2a75 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:59:09 -0700 Subject: x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update code out of native_load_sp0() This causes the MSR_IA32_SYSENTER_CS write to move out of the paravirt callback. This shouldn't affect Xen PV: Xen already ignores MSR_IA32_SYSENTER_ESP writes. In any event, Xen doesn't support vm86() in a useful way. Note to any potential backporters: This patch won't break lguest, as lguest didn't have any SYSENTER support at all. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/75cf09fe03ae778532d0ca6c65aa58e66bc2f90c.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 4 +++- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/vm86_32.c | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 11966251cd42..0936ed3da6b6 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Reload esp0 and cpu_current_top_of_stack. This changes - * current_thread_info(). + * current_thread_info(). Refresh the SYSENTER configuration in + * case prev or next is vm86. */ load_sp0(tss, next); + refresh_sysenter_cs(next); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 302e7b2572d1..a6ff6d1a0110 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -464,7 +464,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ this_cpu_write(current_task, next_p); - /* Reload esp0 and ss1. This changes current_thread_info(). */ + /* Reload sp0. */ load_sp0(tss, next); /* diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 7924a5356c8a..5bc1c3ab6287 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -54,6 +54,7 @@ #include #include #include +#include /* * Known problems: @@ -149,6 +150,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; load_sp0(tss, &tsk->thread); + refresh_sysenter_cs(&tsk->thread); vm86->saved_sp0 = 0; put_cpu(); @@ -368,8 +370,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) /* make room for real-mode segments */ tsk->thread.sp0 += 16; - if (static_cpu_has(X86_FEATURE_SEP)) + if (static_cpu_has(X86_FEATURE_SEP)) { tsk->thread.sysenter_cs = 0; + refresh_sysenter_cs(&tsk->thread); + } load_sp0(tss, &tsk->thread); put_cpu(); -- cgit v1.2.3 From da51da189a24bb9b7e2d5a123be096e51a4695a5 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:59:10 -0700 Subject: x86/entry/64: Pass SP0 directly to load_sp0() load_sp0() had an odd signature: void load_sp0(struct tss_struct *tss, struct thread_struct *thread); Simplify it to: void load_sp0(unsigned long sp0); Also simplify a few get_cpu()/put_cpu() sequences to preempt_disable()/preempt_enable(). Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2655d8b42ed940aa384fe18ee1129bbbcf730a08.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/vm86_32.c | 14 ++++++-------- 4 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 03bb004bb15e..4e7fb9c3bfa5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1570,7 +1570,7 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, me); - load_sp0(t, ¤t->thread); + load_sp0(current->thread.sp0); set_tss_desc(cpu, t); load_TR_desc(); load_mm_ldt(&init_mm); @@ -1625,7 +1625,7 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, curr); - load_sp0(t, thread); + load_sp0(thread->sp0); set_tss_desc(cpu, t); load_TR_desc(); load_mm_ldt(&init_mm); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0936ed3da6b6..40b85870e429 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */ - load_sp0(tss, next); + load_sp0(next->sp0); refresh_sysenter_cs(next); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a6ff6d1a0110..2124304fb77a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); /* Reload sp0. */ - load_sp0(tss, next); + load_sp0(next->sp0); /* * Now maybe reload the debug registers and handle I/O bitmaps diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 5bc1c3ab6287..0f1d92cd20ad 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -94,7 +94,6 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) { - struct tss_struct *tss; struct task_struct *tsk = current; struct vm86plus_struct __user *user; struct vm86 *vm86 = current->thread.vm86; @@ -146,13 +145,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) do_exit(SIGSEGV); } - tss = &per_cpu(cpu_tss, get_cpu()); + preempt_disable(); tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tss, &tsk->thread); + load_sp0(tsk->thread.sp0); refresh_sysenter_cs(&tsk->thread); vm86->saved_sp0 = 0; - put_cpu(); + preempt_enable(); memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); @@ -238,7 +237,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg) static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) { - struct tss_struct *tss; struct task_struct *tsk = current; struct vm86 *vm86 = tsk->thread.vm86; struct kernel_vm86_regs vm86regs; @@ -366,8 +364,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) vm86->saved_sp0 = tsk->thread.sp0; lazy_save_gs(vm86->regs32.gs); - tss = &per_cpu(cpu_tss, get_cpu()); /* make room for real-mode segments */ + preempt_disable(); tsk->thread.sp0 += 16; if (static_cpu_has(X86_FEATURE_SEP)) { @@ -375,8 +373,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) refresh_sysenter_cs(&tsk->thread); } - load_sp0(tss, &tsk->thread); - put_cpu(); + load_sp0(tsk->thread.sp0); + preempt_enable(); if (vm86->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); -- cgit v1.2.3 From 20bb83443ea79087b5e5f8dab4e9d80bb9bf7acb Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:59:13 -0700 Subject: x86/entry/64: Stop initializing TSS.sp0 at boot In my quest to get rid of thread_struct::sp0, I want to clean up or remove all of its readers. Two of them are in cpu_init() (32-bit and 64-bit), and they aren't needed. This is because we never enter userspace at all on the threads that CPUs are initialized in. Poison the initial TSS.sp0 and stop initializing it on CPU init. The comment text mostly comes from Dave Hansen. Thanks! Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/ee4a00540ad28c6cff475fbcc7769a4460acc861.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 13 ++++++++++--- arch/x86/kernel/process.c | 8 +++++++- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 4e7fb9c3bfa5..cdf79ab628c2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1570,9 +1570,13 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, me); - load_sp0(current->thread.sp0); + /* + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ set_tss_desc(cpu, t); load_TR_desc(); + load_mm_ldt(&init_mm); clear_all_debug_regs(); @@ -1594,7 +1598,6 @@ void cpu_init(void) int cpu = smp_processor_id(); struct task_struct *curr = current; struct tss_struct *t = &per_cpu(cpu_tss, cpu); - struct thread_struct *thread = &curr->thread; wait_for_master_cpu(cpu); @@ -1625,9 +1628,13 @@ void cpu_init(void) initialize_tlbstate_and_flush(); enter_lazy_tlb(&init_mm, curr); - load_sp0(thread->sp0); + /* + * Initialize the TSS. Don't bother initializing sp0, as the initial + * task never enters user mode. + */ set_tss_desc(cpu, t); load_TR_desc(); + load_mm_ldt(&init_mm); t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bd6b85fac666..ff8a9acbcf8b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -48,7 +48,13 @@ */ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { .x86_tss = { - .sp0 = TOP_OF_INIT_STACK, + /* + * .sp0 is only used when entering ring 0 from a lower + * privilege level. Since the init task never runs anything + * but ring 0 code, there is no need for a valid value here. + * Poison it. + */ + .sp0 = (1UL << (BITS_PER_LONG-1)) + 1, #ifdef CONFIG_X86_32 .ss0 = __KERNEL_DS, .ss1 = __KERNEL_CS, -- cgit v1.2.3 From 46f5a10a721ce8dce8cc8fe55279b49e1c6b3288 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:59:14 -0700 Subject: x86/entry/64: Remove all remaining direct thread_struct::sp0 reads The only remaining readers in context switch code or vm86(), and they all just want to update TSS.sp0 to match the current task. Replace them all with a new helper update_sp0(). Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/2d231687f4ff288c9d9e98d7861b7df374246ac3.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/x86/kernel/vm86_32.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 40b85870e429..45bf0c5f93e1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -287,7 +287,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */ - load_sp0(next->sp0); + update_sp0(next_p); refresh_sysenter_cs(next); this_cpu_write(cpu_current_top_of_stack, (unsigned long)task_stack_page(next_p) + diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 2124304fb77a..45e380958392 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -465,7 +465,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) this_cpu_write(current_task, next_p); /* Reload sp0. */ - load_sp0(next->sp0); + update_sp0(next_p); /* * Now maybe reload the debug registers and handle I/O bitmaps diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 0f1d92cd20ad..a7b44c75c642 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -148,7 +148,7 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) preempt_disable(); tsk->thread.sp0 = vm86->saved_sp0; tsk->thread.sysenter_cs = __KERNEL_CS; - load_sp0(tsk->thread.sp0); + update_sp0(tsk); refresh_sysenter_cs(&tsk->thread); vm86->saved_sp0 = 0; preempt_enable(); @@ -373,7 +373,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus) refresh_sysenter_cs(&tsk->thread); } - load_sp0(tsk->thread.sp0); + update_sp0(tsk); preempt_enable(); if (vm86->flags & VM86_SCREEN_BITMAP) -- cgit v1.2.3 From cd493a6deb8b78eca280d05f7fa73fd69403ae29 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:59:15 -0700 Subject: x86/entry/32: Fix cpu_current_top_of_stack initialization at boot cpu_current_top_of_stack's initialization forgot about TOP_OF_KERNEL_STACK_PADDING. This bug didn't matter because the idle threads never enter user mode. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/e5e370a7e6e4fddd1c4e4cf619765d96bb874b21.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ad59edd84de7..06c18fe1c09e 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -961,8 +961,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); - per_cpu(cpu_current_top_of_stack, cpu) = - (unsigned long)task_stack_page(idle) + THREAD_SIZE; + per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); #else initial_gs = per_cpu_offset(cpu); #endif -- cgit v1.2.3 From d375cf1530595e33961a8844192cddab913650e3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:59:16 -0700 Subject: x86/entry/64: Remove thread_struct::sp0 On x86_64, we can easily calculate sp0 when needed instead of storing it in thread_struct. On x86_32, a similar cleanup would be possible, but it would require cleaning up the vm86 code first, and that can wait for a later cleanup series. Signed-off-by: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/719cd9c66c548c4350d98a90f050aee8b17f8919.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 45e380958392..eeeb34f85c25 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, struct inactive_task_frame *frame; struct task_struct *me = current; - p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE; childregs = task_pt_regs(p); fork_frame = container_of(childregs, struct fork_frame, regs); frame = &fork_frame->frame; -- cgit v1.2.3 From 3383642c2f9d4f5b4fa37436db4a109a1a10018c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 2 Nov 2017 00:59:17 -0700 Subject: x86/traps: Use a new on_thread_stack() helper to clean up an assertion Let's keep the stack-related logic together rather than open-coding a comparison in an assertion in the traps code. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/856b15bee1f55017b8f79d3758b0d51c48a08cf8.1509609304.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 67db4f43309e..42a9c4458f5d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs) * will catch asm bugs and any attempt to use ist_preempt_enable * from double_fault. */ - BUG_ON((unsigned long)(current_top_of_stack() - - current_stack_pointer) >= THREAD_SIZE); + BUG_ON(!on_thread_stack()); preempt_enable_no_resched(); } -- cgit v1.2.3