diff options
Diffstat (limited to 'arch')
78 files changed, 4063 insertions, 3321 deletions
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 72484a645f05..2fd3ebbb4e33 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -332,4 +332,16 @@ config X86_DEBUG_STATIC_CPU_HAS If unsure, say N. +config X86_DEBUG_FPU + bool "Debug the x86 FPU code" + depends on DEBUG_KERNEL + default y + ---help--- + If this option is enabled then there will be extra sanity + checks and (boot time) debug printouts added to the kernel. + This debugging adds some small amount of runtime overhead + to the kernel. + + If unsure, say N. + endmenu diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 112cefacf2af..b419f43ce0c5 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -32,7 +32,7 @@ #include <crypto/lrw.h> #include <crypto/xts.h> #include <asm/cpu_device_id.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #include <asm/crypto/aes.h> #include <crypto/ablk_helper.h> #include <crypto/scatterwalk.h> diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index baf0ac21ace5..4c65c70e628b 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -19,8 +19,7 @@ #include <crypto/ctr.h> #include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> @@ -561,16 +560,15 @@ static struct crypto_alg cmll_algs[10] = { { static int __init camellia_aesni_init(void) { - u64 xcr0; + const char *feature_name; if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { pr_info("AVX2 or AES-NI instructions are not detected.\n"); return -ENODEV; } - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX2 detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 78818a1e73e3..80a0e4389c9a 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -19,8 +19,7 @@ #include <crypto/ctr.h> #include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <asm/crypto/camellia.h> #include <asm/crypto/glue_helper.h> @@ -553,16 +552,10 @@ static struct crypto_alg cmll_algs[10] = { { static int __init camellia_aesni_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { - pr_info("AVX or AES-NI instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 236c80974457..be00aa48b2b5 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -31,8 +31,7 @@ #include <crypto/cast5.h> #include <crypto/cryptd.h> #include <crypto/ctr.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <asm/crypto/glue_helper.h> #define CAST5_PARALLEL_BLOCKS 16 @@ -468,16 +467,10 @@ static struct crypto_alg cast5_algs[6] = { { static int __init cast5_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - pr_info("AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index f448810ca4ac..5dbba7224221 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -36,8 +36,7 @@ #include <crypto/ctr.h> #include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <asm/crypto/glue_helper.h> #define CAST6_PARALLEL_BLOCKS 8 @@ -590,16 +589,10 @@ static struct crypto_alg cast6_algs[10] = { { static int __init cast6_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - pr_info("AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 1937fc1d8763..07d2c6c86a54 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -35,7 +35,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_device_id.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 28640c3d6af7..81a595d75cf5 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -32,8 +32,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_device_id.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index b6c67bf30fdf..a3fcfc97a311 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -29,7 +29,7 @@ #include <linux/init.h> #include <linux/string.h> #include <linux/kernel.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #include <asm/cpufeature.h> #include <asm/cpu_device_id.h> diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index f368ba261739..5a2f30f9f52d 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -18,7 +18,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/crypto.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> struct crypto_fpu_ctx { struct crypto_blkcipher *child; diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 2079baf06bdd..64d7cf1b50e1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,7 +19,7 @@ #include <crypto/cryptd.h> #include <crypto/gf128mul.h> #include <crypto/internal/hash.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #include <asm/cpu_device_id.h> #define GHASH_BLOCK_SIZE 16 diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 2f63dc89e7a9..7d838dc4d888 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -20,8 +20,7 @@ #include <crypto/lrw.h> #include <crypto/xts.h> #include <crypto/serpent.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <asm/crypto/serpent-avx.h> #include <asm/crypto/glue_helper.h> @@ -537,16 +536,14 @@ static struct crypto_alg srp_algs[10] = { { static int __init init(void) { - u64 xcr0; + const char *feature_name; if (!cpu_has_avx2 || !cpu_has_osxsave) { pr_info("AVX2 instructions are not detected.\n"); return -ENODEV; } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index c8d478af8456..da7dafc9b16d 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -36,8 +36,7 @@ #include <crypto/ctr.h> #include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <asm/crypto/serpent-avx.h> #include <asm/crypto/glue_helper.h> @@ -596,16 +595,10 @@ static struct crypto_alg serpent_algs[10] = { { static int __init serpent_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - printk(KERN_INFO "AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - printk(KERN_INFO "AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index e510b1c5d690..f53ed1dc88ea 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -65,11 +65,8 @@ #include <crypto/mcryptd.h> #include <crypto/crypto_wq.h> #include <asm/byteorder.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> #include <linux/hardirq.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/api.h> #include "sha_mb_ctx.h" #define FLUSH_INTERVAL 1000 /* in usec */ diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 33d1b9dc14cc..7c48e8b20848 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -29,9 +29,7 @@ #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, @@ -123,15 +121,9 @@ static struct shash_alg alg = { #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { - u64 xcr0; - - if (!cpu_has_avx || !cpu_has_osxsave) - return false; - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (cpu_has_avx) + pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index ccc338881ee8..f8097fc0d1d1 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -37,9 +37,7 @@ #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <linux/string.h> asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, @@ -132,15 +130,9 @@ static struct shash_alg algs[] = { { #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { - u64 xcr0; - - if (!cpu_has_avx || !cpu_has_osxsave) - return false; - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (cpu_has_avx) + pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index d9fa4c1e063f..2edad7b81870 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -35,9 +35,7 @@ #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha512_base.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <linux/string.h> @@ -131,15 +129,9 @@ static struct shash_alg algs[] = { { #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { - u64 xcr0; - - if (!cpu_has_avx || !cpu_has_osxsave) - return false; - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (cpu_has_avx) + pr_info("AVX detected but unusable.\n"); return false; } diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index b5e2d5651851..c2bd0ce718ee 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -36,9 +36,7 @@ #include <crypto/ctr.h> #include <crypto/lrw.h> #include <crypto/xts.h> -#include <asm/i387.h> -#include <asm/xcr.h> -#include <asm/xsave.h> +#include <asm/fpu/api.h> #include <asm/crypto/twofish.h> #include <asm/crypto/glue_helper.h> #include <crypto/scatterwalk.h> @@ -558,16 +556,10 @@ static struct crypto_alg twofish_algs[10] = { { static int __init twofish_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - printk(KERN_INFO "AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - printk(KERN_INFO "AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index c81d35e6c7f1..ae3a29ae875b 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -21,8 +21,8 @@ #include <linux/binfmts.h> #include <asm/ucontext.h> #include <asm/uaccess.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> +#include <asm/fpu/signal.h> #include <asm/ptrace.h> #include <asm/ia32_unistd.h> #include <asm/user32.h> @@ -198,7 +198,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, buf = compat_ptr(tmp); } get_user_catch(err); - err |= restore_xstate_sig(buf, 1); + err |= fpu__restore_sig(buf, 1); force_iret(); @@ -308,6 +308,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { + struct fpu *fpu = ¤t->thread.fpu; unsigned long sp; /* Default to using normal stack */ @@ -322,12 +323,12 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (used_math()) { + if (fpu->fpstate_active) { unsigned long fx_aligned, math_size; - sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); + sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); *fpstate = (struct _fpstate_ia32 __user *) sp; - if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, + if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, math_size) < 0) return (void __user *) -1L; } diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ba32af062f61..7bfc85bbb8ff 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -52,6 +52,12 @@ struct alt_instr { u8 padlen; /* length of build-time padding */ } __packed; +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; + extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 1eef55596e82..03bb1065c335 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -7,7 +7,7 @@ #include <linux/kernel.h> #include <linux/crypto.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #include <crypto/b128ops.h> typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3738b138b843..155162ea0e00 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_EFI_H #define _ASM_X86_EFI_H -#include <asm/i387.h> +#include <asm/fpu/api.h> #include <asm/pgtable.h> /* diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h deleted file mode 100644 index da5e96756570..000000000000 --- a/arch/x86/include/asm/fpu-internal.h +++ /dev/null @@ -1,626 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _FPU_INTERNAL_H -#define _FPU_INTERNAL_H - -#include <linux/kernel_stat.h> -#include <linux/regset.h> -#include <linux/compat.h> -#include <linux/slab.h> -#include <asm/asm.h> -#include <asm/cpufeature.h> -#include <asm/processor.h> -#include <asm/sigcontext.h> -#include <asm/user.h> -#include <asm/uaccess.h> -#include <asm/xsave.h> -#include <asm/smap.h> - -#ifdef CONFIG_X86_64 -# include <asm/sigcontext32.h> -# include <asm/user32.h> -struct ksignal; -int ia32_setup_rt_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); -#else -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -# define ia32_setup_frame __setup_frame -# define ia32_setup_rt_frame __setup_rt_frame -#endif - -extern unsigned int mxcsr_feature_mask; -extern void fpu_init(void); -extern void eager_fpu_init(void); - -DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); - -extern void convert_from_fxsr(struct user_i387_ia32_struct *env, - struct task_struct *tsk); -extern void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env); - -extern user_regset_active_fn fpregs_active, xfpregs_active; -extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, - xstateregs_get; -extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, - xstateregs_set; - -/* - * xstateregs_active == fpregs_active. Please refer to the comment - * at the definition of fpregs_active. - */ -#define xstateregs_active fpregs_active - -#ifdef CONFIG_MATH_EMULATION -extern void finit_soft_fpu(struct i387_soft_struct *soft); -#else -static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} -#endif - -/* - * Must be run with preemption disabled: this clears the fpu_owner_task, - * on this CPU. - * - * This will disable any lazy FPU state restore of the current FPU state, - * but if the current thread owns the FPU, it will still be saved by. - */ -static inline void __cpu_disable_lazy_restore(unsigned int cpu) -{ - per_cpu(fpu_owner_task, cpu) = NULL; -} - -/* - * Used to indicate that the FPU state in memory is newer than the FPU - * state in registers, and the FPU state should be reloaded next time the - * task is run. Only safe on the current task, or non-running tasks. - */ -static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) -{ - tsk->thread.fpu.last_cpu = ~0; -} - -static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) -{ - return new == this_cpu_read_stable(fpu_owner_task) && - cpu == new->thread.fpu.last_cpu; -} - -static inline int is_ia32_compat_frame(void) -{ - return config_enabled(CONFIG_IA32_EMULATION) && - test_thread_flag(TIF_IA32); -} - -static inline int is_ia32_frame(void) -{ - return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); -} - -static inline int is_x32_frame(void) -{ - return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); -} - -#define X87_FSW_ES (1 << 7) /* Exception Summary */ - -static __always_inline __pure bool use_eager_fpu(void) -{ - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); -} - -static __always_inline __pure bool use_xsaveopt(void) -{ - return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); -} - -static __always_inline __pure bool use_xsave(void) -{ - return static_cpu_has_safe(X86_FEATURE_XSAVE); -} - -static __always_inline __pure bool use_fxsr(void) -{ - return static_cpu_has_safe(X86_FEATURE_FXSR); -} - -static inline void fx_finit(struct i387_fxsave_struct *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} - -extern void __sanitize_i387_state(struct task_struct *); - -static inline void sanitize_i387_state(struct task_struct *tsk) -{ - if (!use_xsaveopt()) - return; - __sanitize_i387_state(tsk); -} - -#define user_insn(insn, output, input...) \ -({ \ - int err; \ - asm volatile(ASM_STAC "\n" \ - "1:" #insn "\n\t" \ - "2: " ASM_CLAC "\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) - -#define check_insn(insn, output, input...) \ -({ \ - int err; \ - asm volatile("1:" #insn "\n\t" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) - -static inline int fsave_user(struct i387_fsave_struct __user *fx) -{ - return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); -} - -static inline int fxsave_user(struct i387_fxsave_struct __user *fx) -{ - if (config_enabled(CONFIG_X86_32)) - return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - - /* See comment in fpu_fxsave() below. */ - return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); -} - -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) -{ - if (config_enabled(CONFIG_X86_32)) - return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - - /* See comment in fpu_fxsave() below. */ - return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); -} - -static inline int fxrstor_user(struct i387_fxsave_struct __user *fx) -{ - if (config_enabled(CONFIG_X86_32)) - return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - - /* See comment in fpu_fxsave() below. */ - return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); -} - -static inline int frstor_checking(struct i387_fsave_struct *fx) -{ - return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int frstor_user(struct i387_fsave_struct __user *fx) -{ - return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline void fpu_fxsave(struct fpu *fpu) -{ - if (config_enabled(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); - else { - /* Using "rex64; fxsave %0" is broken because, if the memory - * operand uses any extended registers for addressing, a second - * REX prefix will be generated (to the assembler, rex64 - * followed by semicolon is a separate instruction), and hence - * the 64-bitness is lost. - * - * Using "fxsaveq %0" would be the ideal choice, but is only - * supported starting with gas 2.16. - * - * Using, as a workaround, the properly prefixed form below - * isn't accepted by any binutils version so far released, - * complaining that the same type of prefix is used twice if - * an extended register is needed for addressing (fix submitted - * to mainline 2005-11-21). - * - * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); - * - * This, however, we can work around by forcing the compiler to - * select an addressing mode that doesn't require extended - * registers. - */ - asm volatile( "rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); - } -} - -/* - * These must be called with preempt disabled. Returns - * 'true' if the FPU state is still intact. - */ -static inline int fpu_save_init(struct fpu *fpu) -{ - if (use_xsave()) { - fpu_xsave(fpu); - - /* - * xsave header may indicate the init state of the FP. - */ - if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) - return 1; - } else if (use_fxsr()) { - fpu_fxsave(fpu); - } else { - asm volatile("fnsave %[fx]; fwait" - : [fx] "=m" (fpu->state->fsave)); - return 0; - } - - /* - * If exceptions are pending, we need to clear them so - * that we don't randomly get exceptions later. - * - * FIXME! Is this perhaps only true for the old-style - * irq13 case? Maybe we could leave the x87 state - * intact otherwise? - */ - if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { - asm volatile("fnclex"); - return 0; - } - return 1; -} - -static inline int __save_init_fpu(struct task_struct *tsk) -{ - return fpu_save_init(&tsk->thread.fpu); -} - -static inline int fpu_restore_checking(struct fpu *fpu) -{ - if (use_xsave()) - return fpu_xrstor_checking(&fpu->state->xsave); - else if (use_fxsr()) - return fxrstor_checking(&fpu->state->fxsave); - else - return frstor_checking(&fpu->state->fsave); -} - -static inline int restore_fpu_checking(struct task_struct *tsk) -{ - /* - * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is - * pending. Clear the x87 state here by setting it to fixed values. - * "m" is a random variable that should be in L1. - */ - if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { - asm volatile( - "fnclex\n\t" - "emms\n\t" - "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (tsk->thread.fpu.has_fpu)); - } - - return fpu_restore_checking(&tsk->thread.fpu); -} - -/* - * Software FPU state helpers. Careful: these need to - * be preemption protection *and* they need to be - * properly paired with the CR0.TS changes! - */ -static inline int __thread_has_fpu(struct task_struct *tsk) -{ - return tsk->thread.fpu.has_fpu; -} - -/* Must be paired with an 'stts' after! */ -static inline void __thread_clear_has_fpu(struct task_struct *tsk) -{ - tsk->thread.fpu.has_fpu = 0; - this_cpu_write(fpu_owner_task, NULL); -} - -/* Must be paired with a 'clts' before! */ -static inline void __thread_set_has_fpu(struct task_struct *tsk) -{ - tsk->thread.fpu.has_fpu = 1; - this_cpu_write(fpu_owner_task, tsk); -} - -/* - * Encapsulate the CR0.TS handling together with the - * software flag. - * - * These generally need preemption protection to work, - * do try to avoid using these on their own. - */ -static inline void __thread_fpu_end(struct task_struct *tsk) -{ - __thread_clear_has_fpu(tsk); - if (!use_eager_fpu()) - stts(); -} - -static inline void __thread_fpu_begin(struct task_struct *tsk) -{ - if (!use_eager_fpu()) - clts(); - __thread_set_has_fpu(tsk); -} - -static inline void drop_fpu(struct task_struct *tsk) -{ - /* - * Forget coprocessor state.. - */ - preempt_disable(); - tsk->thread.fpu_counter = 0; - - if (__thread_has_fpu(tsk)) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); - } - - clear_stopped_child_used_math(tsk); - preempt_enable(); -} - -static inline void restore_init_xstate(void) -{ - if (use_xsave()) - xrstor_state(init_xstate_buf, -1); - else - fxrstor_checking(&init_xstate_buf->i387); -} - -/* - * Reset the FPU state in the eager case and drop it in the lazy case (later use - * will reinit it). - */ -static inline void fpu_reset_state(struct task_struct *tsk) -{ - if (!use_eager_fpu()) - drop_fpu(tsk); - else - restore_init_xstate(); -} - -/* - * FPU state switching for scheduling. - * - * This is a two-stage process: - * - * - switch_fpu_prepare() saves the old state and - * sets the new state of the CR0.TS bit. This is - * done within the context of the old process. - * - * - switch_fpu_finish() restores the new state as - * necessary. - */ -typedef struct { int preload; } fpu_switch_t; - -static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) -{ - fpu_switch_t fpu; - - /* - * If the task has used the math, pre-load the FPU on xsave processors - * or if the past 5 consecutive context-switches used math. - */ - fpu.preload = tsk_used_math(new) && - (use_eager_fpu() || new->thread.fpu_counter > 5); - - if (__thread_has_fpu(old)) { - if (!__save_init_fpu(old)) - task_disable_lazy_fpu_restore(old); - else - old->thread.fpu.last_cpu = cpu; - - /* But leave fpu_owner_task! */ - old->thread.fpu.has_fpu = 0; - - /* Don't change CR0.TS if we just switch! */ - if (fpu.preload) { - new->thread.fpu_counter++; - __thread_set_has_fpu(new); - prefetch(new->thread.fpu.state); - } else if (!use_eager_fpu()) - stts(); - } else { - old->thread.fpu_counter = 0; - task_disable_lazy_fpu_restore(old); - if (fpu.preload) { - new->thread.fpu_counter++; - if (fpu_lazy_restore(new, cpu)) - fpu.preload = 0; - else - prefetch(new->thread.fpu.state); - __thread_fpu_begin(new); - } - } - return fpu; -} - -/* - * By the time this gets called, we've already cleared CR0.TS and - * given the process the FPU if we are going to preload the FPU - * state - all we need to do is to conditionally restore the register - * state itself. - */ -static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) -{ - if (fpu.preload) { - if (unlikely(restore_fpu_checking(new))) - fpu_reset_state(new); - } -} - -/* - * Signal frame handlers... - */ -extern int save_xstate_sig(void __user *buf, void __user *fx, int size); -extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); - -static inline int xstate_sigframe_size(void) -{ - return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; -} - -static inline int restore_xstate_sig(void __user *buf, int ia32_frame) -{ - void __user *buf_fx = buf; - int size = xstate_sigframe_size(); - - if (ia32_frame && use_fxsr()) { - buf_fx = buf + sizeof(struct i387_fsave_struct); - size += sizeof(struct i387_fsave_struct); - } - - return __restore_xstate_sig(buf, buf_fx, size); -} - -/* - * Needs to be preemption-safe. - * - * NOTE! user_fpu_begin() must be used only immediately before restoring - * the save state. It does not do any saving/restoring on its own. In - * lazy FPU mode, it is just an optimization to avoid a #NM exception, - * the task can lose the FPU right after preempt_enable(). - */ -static inline void user_fpu_begin(void) -{ - preempt_disable(); - if (!user_has_fpu()) - __thread_fpu_begin(current); - preempt_enable(); -} - -static inline void __save_fpu(struct task_struct *tsk) -{ - if (use_xsave()) { - if (unlikely(system_state == SYSTEM_BOOTING)) - xsave_state_booting(&tsk->thread.fpu.state->xsave, -1); - else - xsave_state(&tsk->thread.fpu.state->xsave, -1); - } else - fpu_fxsave(&tsk->thread.fpu); -} - -/* - * i387 state interaction - */ -static inline unsigned short get_fpu_cwd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.cwd; - } else { - return (unsigned short)tsk->thread.fpu.state->fsave.cwd; - } -} - -static inline unsigned short get_fpu_swd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.swd; - } else { - return (unsigned short)tsk->thread.fpu.state->fsave.swd; - } -} - -static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) -{ - if (cpu_has_xmm) { - return tsk->thread.fpu.state->fxsave.mxcsr; - } else { - return MXCSR_DEFAULT; - } -} - -static bool fpu_allocated(struct fpu *fpu) -{ - return fpu->state != NULL; -} - -static inline int fpu_alloc(struct fpu *fpu) -{ - if (fpu_allocated(fpu)) - return 0; - fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); - if (!fpu->state) - return -ENOMEM; - WARN_ON((unsigned long)fpu->state & 15); - return 0; -} - -static inline void fpu_free(struct fpu *fpu) -{ - if (fpu->state) { - kmem_cache_free(task_xstate_cachep, fpu->state); - fpu->state = NULL; - } -} - -static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) -{ - if (use_eager_fpu()) { - memset(&dst->thread.fpu.state->xsave, 0, xstate_size); - __save_fpu(dst); - } else { - struct fpu *dfpu = &dst->thread.fpu; - struct fpu *sfpu = &src->thread.fpu; - - unlazy_fpu(src); - memcpy(dfpu->state, sfpu->state, xstate_size); - } -} - -static inline unsigned long -alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, - unsigned long *size) -{ - unsigned long frame_size = xstate_sigframe_size(); - - *buf_fx = sp = round_down(sp - frame_size, 64); - if (ia32_frame && use_fxsr()) { - frame_size += sizeof(struct i387_fsave_struct); - sp -= sizeof(struct i387_fsave_struct); - } - - *size = frame_size; - return sp; -} - -#endif diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h new file mode 100644 index 000000000000..1429a7c736db --- /dev/null +++ b/arch/x86/include/asm/fpu/api.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_FPU_API_H +#define _ASM_X86_FPU_API_H + +/* + * Careful: __kernel_fpu_begin/end() must be called with preempt disabled + * and they don't touch the preempt state on their own. + * If you enable preemption after __kernel_fpu_begin(), preempt notifier + * should call the __kernel_fpu_end() to prevent the kernel/user FPU + * state from getting corrupted. KVM for example uses this model. + * + * All other cases use kernel_fpu_begin/end() which disable preemption + * during kernel FPU usage. + */ +extern void __kernel_fpu_begin(void); +extern void __kernel_fpu_end(void); +extern void kernel_fpu_begin(void); +extern void kernel_fpu_end(void); +extern bool irq_fpu_usable(void); + +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context as well. To prevent these kernel instructions + * in interrupt context interacting wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +extern int irq_ts_save(void); +extern void irq_ts_restore(int TS_state); + +/* + * Query the presence of one or more xfeatures. Works on any legacy CPU as well. + * + * If 'feature_name' is set then put a human-readable description of + * the feature there as well - this can be used to print error (or success) + * messages. + */ +extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); + +#endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h new file mode 100644 index 000000000000..3c3550c3a4a3 --- /dev/null +++ b/arch/x86/include/asm/fpu/internal.h @@ -0,0 +1,694 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_FPU_INTERNAL_H +#define _ASM_X86_FPU_INTERNAL_H + +#include <linux/compat.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/user.h> +#include <asm/fpu/api.h> +#include <asm/fpu/xstate.h> + +/* + * High level FPU state handling functions: + */ +extern void fpu__activate_curr(struct fpu *fpu); +extern void fpu__activate_fpstate_read(struct fpu *fpu); +extern void fpu__activate_fpstate_write(struct fpu *fpu); +extern void fpu__save(struct fpu *fpu); +extern void fpu__restore(struct fpu *fpu); +extern int fpu__restore_sig(void __user *buf, int ia32_frame); +extern void fpu__drop(struct fpu *fpu); +extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); +extern void fpu__clear(struct fpu *fpu); +extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); + +/* + * Boot time FPU initialization functions: + */ +extern void fpu__init_cpu(void); +extern void fpu__init_system_xstate(void); +extern void fpu__init_cpu_xstate(void); +extern void fpu__init_system(struct cpuinfo_x86 *c); +extern void fpu__init_check_bugs(void); +extern void fpu__resume_cpu(void); + +/* + * Debugging facility: + */ +#ifdef CONFIG_X86_DEBUG_FPU +# define WARN_ON_FPU(x) WARN_ON_ONCE(x) +#else +# define WARN_ON_FPU(x) ({ (void)(x); 0; }) +#endif + +/* + * FPU related CPU feature flag helper routines: + */ +static __always_inline __pure bool use_eager_fpu(void) +{ + return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); +} + +static __always_inline __pure bool use_xsaveopt(void) +{ + return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); +} + +static __always_inline __pure bool use_xsave(void) +{ + return static_cpu_has_safe(X86_FEATURE_XSAVE); +} + +static __always_inline __pure bool use_fxsr(void) +{ + return static_cpu_has_safe(X86_FEATURE_FXSR); +} + +/* + * fpstate handling functions: + */ + +extern union fpregs_state init_fpstate; + +extern void fpstate_init(union fpregs_state *state); +#ifdef CONFIG_MATH_EMULATION +extern void fpstate_init_soft(struct swregs_state *soft); +#else +static inline void fpstate_init_soft(struct swregs_state *soft) {} +#endif +static inline void fpstate_init_fxstate(struct fxregs_state *fx) +{ + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} +extern void fpstate_sanitize_xstate(struct fpu *fpu); + +#define user_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile(ASM_STAC "\n" \ + "1:" #insn "\n\t" \ + "2: " ASM_CLAC "\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define check_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +static inline int copy_fregs_to_user(struct fregs_state __user *fx) +{ + return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); +} + +static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) +{ + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); + + /* See comment in copy_fxregs_to_kernel() below. */ + return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); +} + +static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) +{ + int err; + + if (config_enabled(CONFIG_X86_32)) { + err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + } else { + if (config_enabled(CONFIG_AS_FXSAVEQ)) { + err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + } else { + /* See comment in copy_fxregs_to_kernel() below. */ + err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); + } + } + /* Copying from a kernel buffer to FPU registers should never fail: */ + WARN_ON_FPU(err); +} + +static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) +{ + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + + /* See comment in copy_fxregs_to_kernel() below. */ + return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); +} + +static inline void copy_kernel_to_fregs(struct fregs_state *fx) +{ + int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + + WARN_ON_FPU(err); +} + +static inline int copy_user_to_fregs(struct fregs_state __user *fx) +{ + return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline void copy_fxregs_to_kernel(struct fpu *fpu) +{ + if (config_enabled(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); + else { + /* Using "rex64; fxsave %0" is broken because, if the memory + * operand uses any extended registers for addressing, a second + * REX prefix will be generated (to the assembler, rex64 + * followed by semicolon is a separate instruction), and hence + * the 64-bitness is lost. + * + * Using "fxsaveq %0" would be the ideal choice, but is only + * supported starting with gas 2.16. + * + * Using, as a workaround, the properly prefixed form below + * isn't accepted by any binutils version so far released, + * complaining that the same type of prefix is used twice if + * an extended register is needed for addressing (fix submitted + * to mainline 2005-11-21). + * + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave)); + * + * This, however, we can work around by forcing the compiler to + * select an addressing mode that doesn't require extended + * registers. + */ + asm volatile( "rex64/fxsave (%[fx])" + : "=m" (fpu->state.fxsave) + : [fx] "R" (&fpu->state.fxsave)); + } +} + +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + +/* xstate instruction fault handler: */ +#define xstate_fault(__err) \ + \ + ".section .fixup,\"ax\"\n" \ + \ + "3: movl $-2,%[_err]\n" \ + " jmp 2b\n" \ + \ + ".previous\n" \ + \ + _ASM_EXTABLE(1b, 3b) \ + : [_err] "=r" (__err) + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XSAVES"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); + else + asm volatile("1:"XSAVE"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); + + /* We should never fault when copying to a kernel buffer: */ + WARN_ON_FPU(err); +} + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XRSTORS"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); + else + asm volatile("1:"XRSTOR"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); + + /* We should never fault when copying from a kernel buffer: */ + WARN_ON_FPU(err); +} + +/* + * Save processor xstate to xsave area. + */ +static inline void copy_xregs_to_kernel(struct xregs_state *xstate) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(!alternatives_patched); + + /* + * If xsaves is enabled, xsaves replaces xsaveopt because + * it supports compact format and supervisor states in addition to + * modified optimization in xsaveopt. + * + * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave + * because xsaveopt supports modified optimization which is not + * supported by xsave. + * + * If none of xsaves and xsaveopt is enabled, use xsave. + */ + alternative_input_2( + "1:"XSAVE, + XSAVEOPT, + X86_FEATURE_XSAVEOPT, + XSAVES, + X86_FEATURE_XSAVES, + [xstate] "D" (xstate), "a" (lmask), "d" (hmask) : + "memory"); + asm volatile("2:\n\t" + xstate_fault(err) + : "0" (err) + : "memory"); + + /* We should never fault when copying to a kernel buffer: */ + WARN_ON_FPU(err); +} + +/* + * Restore processor xstate from xsave area. + */ +static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + /* + * Use xrstors to restore context if it is enabled. xrstors supports + * compacted format of xsave area which is not supported by xrstor. + */ + alternative_input( + "1: " XRSTOR, + XRSTORS, + X86_FEATURE_XSAVES, + "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile("2:\n" + xstate_fault(err) + : "0" (err) + : "memory"); + + /* We should never fault when copying from a kernel buffer: */ + WARN_ON_FPU(err); +} + +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for + * backward compatibility for old applications which don't understand + * compacted format of xsave area. + */ +static inline int copy_xregs_to_user(struct xregs_state __user *buf) +{ + int err; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + err = __clear_user(&buf->header, sizeof(buf->header)); + if (unlikely(err)) + return -EFAULT; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XSAVE"\n" + "2: " ASM_CLAC "\n" + xstate_fault(err) + : "D" (buf), "a" (-1), "d" (-1), "0" (err) + : "memory"); + return err; +} + +/* + * Restore xstate from user space xsave area. + */ +static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) +{ + struct xregs_state *xstate = ((__force struct xregs_state *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XRSTOR"\n" + "2: " ASM_CLAC "\n" + xstate_fault(err) + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); /* memory required? */ + return err; +} + +/* + * These must be called with preempt disabled. Returns + * 'true' if the FPU state is still intact and we can + * keep registers active. + * + * The legacy FNSAVE instruction cleared all FPU state + * unconditionally, so registers are essentially destroyed. + * Modern FPU state can be kept in registers, if there are + * no pending FP exceptions. + */ +static inline int copy_fpregs_to_fpstate(struct fpu *fpu) +{ + if (likely(use_xsave())) { + copy_xregs_to_kernel(&fpu->state.xsave); + return 1; + } + + if (likely(use_fxsr())) { + copy_fxregs_to_kernel(fpu); + return 1; + } + + /* + * Legacy FPU register saving, FNSAVE always clears FPU registers, + * so we have to mark them inactive: + */ + asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); + + return 0; +} + +static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) +{ + if (use_xsave()) { + copy_kernel_to_xregs(&fpstate->xsave, -1); + } else { + if (use_fxsr()) + copy_kernel_to_fxregs(&fpstate->fxsave); + else + copy_kernel_to_fregs(&fpstate->fsave); + } +} + +static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) +{ + /* + * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is + * pending. Clear the x87 state here by setting it to fixed values. + * "m" is a random variable that should be in L1. + */ + if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { + asm volatile( + "fnclex\n\t" + "emms\n\t" + "fildl %P[addr]" /* set F?P to defined value */ + : : [addr] "m" (fpstate)); + } + + __copy_kernel_to_fpregs(fpstate); +} + +extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); + +/* + * FPU context switch related helper methods: + */ + +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +/* + * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, + * on this CPU. + * + * This will disable any lazy FPU state restore of the current FPU state, + * but if the current thread owns the FPU, it will still be saved by. + */ +static inline void __cpu_disable_lazy_restore(unsigned int cpu) +{ + per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; +} + +static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) +{ + return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; +} + + +/* + * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' + * idiom, which is then paired with the sw-flag (fpregs_active) later on: + */ + +static inline void __fpregs_activate_hw(void) +{ + if (!use_eager_fpu()) + clts(); +} + +static inline void __fpregs_deactivate_hw(void) +{ + if (!use_eager_fpu()) + stts(); +} + +/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ +static inline void __fpregs_deactivate(struct fpu *fpu) +{ + WARN_ON_FPU(!fpu->fpregs_active); + + fpu->fpregs_active = 0; + this_cpu_write(fpu_fpregs_owner_ctx, NULL); +} + +/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ +static inline void __fpregs_activate(struct fpu *fpu) +{ + WARN_ON_FPU(fpu->fpregs_active); + + fpu->fpregs_active = 1; + this_cpu_write(fpu_fpregs_owner_ctx, fpu); +} + +/* + * The question "does this thread have fpu access?" + * is slightly racy, since preemption could come in + * and revoke it immediately after the test. + * + * However, even in that very unlikely scenario, + * we can just assume we have FPU access - typically + * to save the FP state - we'll just take a #NM + * fault and get the FPU access back. + */ +static inline int fpregs_active(void) +{ + return current->thread.fpu.fpregs_active; +} + +/* + * Encapsulate the CR0.TS handling together with the + * software flag. + * + * These generally need preemption protection to work, + * do try to avoid using these on their own. + */ +static inline void fpregs_activate(struct fpu *fpu) +{ + __fpregs_activate_hw(); + __fpregs_activate(fpu); +} + +static inline void fpregs_deactivate(struct fpu *fpu) +{ + __fpregs_deactivate(fpu); + __fpregs_deactivate_hw(); +} + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state and + * sets the new state of the CR0.TS bit. This is + * done within the context of the old process. + * + * - switch_fpu_finish() restores the new state as + * necessary. + */ +typedef struct { int preload; } fpu_switch_t; + +static inline fpu_switch_t +switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) +{ + fpu_switch_t fpu; + + /* + * If the task has used the math, pre-load the FPU on xsave processors + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = new_fpu->fpstate_active && + (use_eager_fpu() || new_fpu->counter > 5); + + if (old_fpu->fpregs_active) { + if (!copy_fpregs_to_fpstate(old_fpu)) + old_fpu->last_cpu = -1; + else + old_fpu->last_cpu = cpu; + + /* But leave fpu_fpregs_owner_ctx! */ + old_fpu->fpregs_active = 0; + + /* Don't change CR0.TS if we just switch! */ + if (fpu.preload) { + new_fpu->counter++; + __fpregs_activate(new_fpu); + prefetch(&new_fpu->state); + } else { + __fpregs_deactivate_hw(); + } + } else { + old_fpu->counter = 0; + old_fpu->last_cpu = -1; + if (fpu.preload) { + new_fpu->counter++; + if (fpu_want_lazy_restore(new_fpu, cpu)) + fpu.preload = 0; + else + prefetch(&new_fpu->state); + fpregs_activate(new_fpu); + } + } + return fpu; +} + +/* + * Misc helper functions: + */ + +/* + * By the time this gets called, we've already cleared CR0.TS and + * given the process the FPU if we are going to preload the FPU + * state - all we need to do is to conditionally restore the register + * state itself. + */ +static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) +{ + if (fpu_switch.preload) + copy_kernel_to_fpregs(&new_fpu->state); +} + +/* + * Needs to be preemption-safe. + * + * NOTE! user_fpu_begin() must be used only immediately before restoring + * the save state. It does not do any saving/restoring on its own. In + * lazy FPU mode, it is just an optimization to avoid a #NM exception, + * the task can lose the FPU right after preempt_enable(). + */ +static inline void user_fpu_begin(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + preempt_disable(); + if (!fpregs_active()) + fpregs_activate(fpu); + preempt_enable(); +} + +/* + * MXCSR and XCR definitions: + */ + +extern unsigned int mxcsr_feature_mask; + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 + +static inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ + : : "a" (eax), "d" (edx), "c" (index)); +} + +#endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h new file mode 100644 index 000000000000..39d3107ac6c7 --- /dev/null +++ b/arch/x86/include/asm/fpu/regset.h @@ -0,0 +1,21 @@ +/* + * FPU regset handling methods: + */ +#ifndef _ASM_X86_FPU_REGSET_H +#define _ASM_X86_FPU_REGSET_H + +#include <linux/regset.h> + +extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, + xstateregs_set; + +/* + * xstateregs_active == regset_fpregs_active. Please refer to the comment + * at the definition of regset_fpregs_active. + */ +#define xstateregs_active regset_fpregs_active + +#endif /* _ASM_X86_FPU_REGSET_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h new file mode 100644 index 000000000000..7358e9d61f1e --- /dev/null +++ b/arch/x86/include/asm/fpu/signal.h @@ -0,0 +1,33 @@ +/* + * x86 FPU signal frame handling methods: + */ +#ifndef _ASM_X86_FPU_SIGNAL_H +#define _ASM_X86_FPU_SIGNAL_H + +#ifdef CONFIG_X86_64 +# include <asm/sigcontext32.h> +# include <asm/user32.h> +struct ksignal; +int ia32_setup_rt_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + +unsigned long +fpu__alloc_mathframe(unsigned long sp, int ia32_frame, + unsigned long *buf_fx, unsigned long *size); + +extern void fpu__init_prepare_fx_sw_frame(void); + +#endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h new file mode 100644 index 000000000000..0637826292de --- /dev/null +++ b/arch/x86/include/asm/fpu/types.h @@ -0,0 +1,293 @@ +/* + * FPU data structures: + */ +#ifndef _ASM_X86_FPU_H +#define _ASM_X86_FPU_H + +/* + * The legacy x87 FPU state format, as saved by FSAVE and + * restored by the FRSTOR instructions: + */ +struct fregs_state { + u32 cwd; /* FPU Control Word */ + u32 swd; /* FPU Status Word */ + u32 twd; /* FPU Tag Word */ + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Pointer Offset */ + u32 fos; /* FPU Operand Pointer Selector */ + + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + + /* Software status information [not touched by FSAVE]: */ + u32 status; +}; + +/* + * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and + * restored by the FXRSTOR instructions. It's similar to the FSAVE + * format, but differs in some areas, plus has extensions at + * the end for the XMM registers. + */ +struct fxregs_state { + u16 cwd; /* Control Word */ + u16 swd; /* Status Word */ + u16 twd; /* Tag Word */ + u16 fop; /* Last Instruction Opcode */ + union { + struct { + u64 rip; /* Instruction Pointer */ + u64 rdp; /* Data Pointer */ + }; + struct { + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Offset */ + u32 fos; /* FPU Operand Selector */ + }; + }; + u32 mxcsr; /* MXCSR Register State */ + u32 mxcsr_mask; /* MXCSR Mask */ + + /* 8*16 bytes for each FP-reg = 128 bytes: */ + u32 st_space[32]; + + /* 16*16 bytes for each XMM-reg = 256 bytes: */ + u32 xmm_space[64]; + + u32 padding[12]; + + union { + u32 padding1[12]; + u32 sw_reserved[12]; + }; + +} __attribute__((aligned(16))); + +/* Default value for fxregs_state.mxcsr: */ +#define MXCSR_DEFAULT 0x1f80 + +/* + * Software based FPU emulation state. This is arbitrary really, + * it matches the x87 format to make it easier to understand: + */ +struct swregs_state { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + u8 ftop; + u8 changed; + u8 lookahead; + u8 no_update; + u8 rm; + u8 alimit; + struct math_emu_info *info; + u32 entry_eip; +}; + +/* + * List of XSAVE features Linux knows about: + */ +enum xfeature_bit { + XSTATE_BIT_FP, + XSTATE_BIT_SSE, + XSTATE_BIT_YMM, + XSTATE_BIT_BNDREGS, + XSTATE_BIT_BNDCSR, + XSTATE_BIT_OPMASK, + XSTATE_BIT_ZMM_Hi256, + XSTATE_BIT_Hi16_ZMM, + + XFEATURES_NR_MAX, +}; + +#define XSTATE_FP (1 << XSTATE_BIT_FP) +#define XSTATE_SSE (1 << XSTATE_BIT_SSE) +#define XSTATE_YMM (1 << XSTATE_BIT_YMM) +#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) +#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) +#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) +#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) +#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) + +#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) + +/* + * There are 16x 256-bit AVX registers named YMM0-YMM15. + * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) + * and are stored in 'struct fxregs_state::xmm_space[]'. + * + * The high 128 bits are stored here: + * 16x 128 bits == 256 bytes. + */ +struct ymmh_struct { + u8 ymmh_space[256]; +}; + +/* We don't support LWP yet: */ +struct lwp_struct { + u8 reserved[128]; +}; + +/* Intel MPX support: */ +struct bndreg { + u64 lower_bound; + u64 upper_bound; +} __packed; + +struct bndcsr { + u64 bndcfgu; + u64 bndstatus; +} __packed; + +struct mpx_struct { + struct bndreg bndreg[4]; + struct bndcsr bndcsr; +}; + +struct xstate_header { + u64 xfeatures; + u64 xcomp_bv; + u64 reserved[6]; +} __attribute__((packed)); + +/* New processor state extensions should be added here: */ +#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \ + sizeof(struct lwp_struct) + \ + sizeof(struct mpx_struct) ) +/* + * This is our most modern FPU state format, as saved by the XSAVE + * and restored by the XRSTOR instructions. + * + * It consists of a legacy fxregs portion, an xstate header and + * subsequent fixed size areas as defined by the xstate header. + * Not all CPUs support all the extensions. + */ +struct xregs_state { + struct fxregs_state i387; + struct xstate_header header; + u8 __reserved[XSTATE_RESERVE]; +} __attribute__ ((packed, aligned (64))); + +/* + * This is a union of all the possible FPU state formats + * put together, so that we can pick the right one runtime. + * + * The size of the structure is determined by the largest + * member - which is the xsave area: + */ +union fpregs_state { + struct fregs_state fsave; + struct fxregs_state fxsave; + struct swregs_state soft; + struct xregs_state xsave; +}; + +/* + * Highest level per task FPU state data structure that + * contains the FPU register state plus various FPU + * state fields: + */ +struct fpu { + /* + * @state: + * + * In-memory copy of all FPU registers that we save/restore + * over context switches. If the task is using the FPU then + * the registers in the FPU are more recent than this state + * copy. If the task context-switches away then they get + * saved here and represent the FPU state. + * + * After context switches there may be a (short) time period + * during which the in-FPU hardware registers are unchanged + * and still perfectly match this state, if the tasks + * scheduled afterwards are not using the FPU. + * + * This is the 'lazy restore' window of optimization, which + * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. + * + * We detect whether a subsequent task uses the FPU via setting + * CR0::TS to 1, which causes any FPU use to raise a #NM fault. + * + * During this window, if the task gets scheduled again, we + * might be able to skip having to do a restore from this + * memory buffer to the hardware registers - at the cost of + * incurring the overhead of #NM fault traps. + * + * Note that on modern CPUs that support the XSAVEOPT (or other + * optimized XSAVE instructions), we don't use #NM traps anymore, + * as the hardware can track whether FPU registers need saving + * or not. On such CPUs we activate the non-lazy ('eagerfpu') + * logic, which unconditionally saves/restores all FPU state + * across context switches. (if FPU state exists.) + */ + union fpregs_state state; + + /* + * @last_cpu: + * + * Records the last CPU on which this context was loaded into + * FPU registers. (In the lazy-restore case we might be + * able to reuse FPU registers across multiple context switches + * this way, if no intermediate task used the FPU.) + * + * A value of -1 is used to indicate that the FPU state in context + * memory is newer than the FPU state in registers, and that the + * FPU state should be reloaded next time the task is run. + */ + unsigned int last_cpu; + + /* + * @fpstate_active: + * + * This flag indicates whether this context is active: if the task + * is not running then we can restore from this context, if the task + * is running then we should save into this context. + */ + unsigned char fpstate_active; + + /* + * @fpregs_active: + * + * This flag determines whether a given context is actively + * loaded into the FPU's registers and that those registers + * represent the task's current FPU state. + * + * Note the interaction with fpstate_active: + * + * # task does not use the FPU: + * fpstate_active == 0 + * + * # task uses the FPU and regs are active: + * fpstate_active == 1 && fpregs_active == 1 + * + * # the regs are inactive but still match fpstate: + * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu + * + * The third state is what we use for the lazy restore optimization + * on lazy-switching CPUs. + */ + unsigned char fpregs_active; + + /* + * @counter: + * + * This counter contains the number of consecutive context switches + * during which the FPU stays used. If this is over a threshold, the + * lazy FPU restore logic becomes eager, to save the trap overhead. + * This is an unsigned char so that after 256 iterations the counter + * wraps and the context switch behavior turns lazy again; this is to + * deal with bursty apps that only use the FPU for a short time: + */ + unsigned char counter; +}; + +#endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h new file mode 100644 index 000000000000..4656b25bb9a7 --- /dev/null +++ b/arch/x86/include/asm/fpu/xstate.h @@ -0,0 +1,46 @@ +#ifndef __ASM_X86_XSAVE_H +#define __ASM_X86_XSAVE_H + +#include <linux/types.h> +#include <asm/processor.h> +#include <linux/uaccess.h> + +/* Bit 63 of XCR0 is reserved for future expansion */ +#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) + +#define XSTATE_CPUID 0x0000000d + +#define FXSAVE_SIZE 512 + +#define XSAVE_HDR_SIZE 64 +#define XSAVE_HDR_OFFSET FXSAVE_SIZE + +#define XSAVE_YMM_SIZE 256 +#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) + +/* Supported features which support lazy state saving */ +#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ + | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) + +/* Supported features which require eager state saving */ +#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) + +/* All currently supported features */ +#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +extern unsigned int xstate_size; +extern u64 xfeatures_mask; +extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; + +extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); + +void *get_xsave_addr(struct xregs_state *xsave, int xstate); +const void *get_xsave_field_ptr(int xstate_field); + +#endif diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h deleted file mode 100644 index 6eb6fcb83f63..000000000000 --- a/arch/x86/include/asm/i387.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _ASM_X86_I387_H -#define _ASM_X86_I387_H - -#ifndef __ASSEMBLY__ - -#include <linux/sched.h> -#include <linux/hardirq.h> - -struct pt_regs; -struct user_i387_struct; - -extern int init_fpu(struct task_struct *child); -extern void fpu_finit(struct fpu *fpu); -extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern void math_state_restore(void); - -extern bool irq_fpu_usable(void); - -/* - * Careful: __kernel_fpu_begin/end() must be called with preempt disabled - * and they don't touch the preempt state on their own. - * If you enable preemption after __kernel_fpu_begin(), preempt notifier - * should call the __kernel_fpu_end() to prevent the kernel/user FPU - * state from getting corrupted. KVM for example uses this model. - * - * All other cases use kernel_fpu_begin/end() which disable preemption - * during kernel FPU usage. - */ -extern void __kernel_fpu_begin(void); -extern void __kernel_fpu_end(void); - -static inline void kernel_fpu_begin(void) -{ - preempt_disable(); - WARN_ON_ONCE(!irq_fpu_usable()); - __kernel_fpu_begin(); -} - -static inline void kernel_fpu_end(void) -{ - __kernel_fpu_end(); - preempt_enable(); -} - -/* Must be called with preempt disabled */ -extern void kernel_fpu_disable(void); -extern void kernel_fpu_enable(void); - -/* - * Some instructions like VIA's padlock instructions generate a spurious - * DNA fault but don't modify SSE registers. And these instructions - * get used from interrupt context as well. To prevent these kernel instructions - * in interrupt context interacting wrongly with other user/kernel fpu usage, we - * should use them only in the context of irq_ts_save/restore() - */ -static inline int irq_ts_save(void) -{ - /* - * If in process context and not atomic, we can take a spurious DNA fault. - * Otherwise, doing clts() in process context requires disabling preemption - * or some heavy lifting like kernel_fpu_begin() - */ - if (!in_atomic()) - return 0; - - if (read_cr0() & X86_CR0_TS) { - clts(); - return 1; - } - - return 0; -} - -static inline void irq_ts_restore(int TS_state) -{ - if (TS_state) - stts(); -} - -/* - * The question "does this thread have fpu access?" - * is slightly racy, since preemption could come in - * and revoke it immediately after the test. - * - * However, even in that very unlikely scenario, - * we can just assume we have FPU access - typically - * to save the FP state - we'll just take a #NM - * fault and get the FPU access back. - */ -static inline int user_has_fpu(void) -{ - return current->thread.fpu.has_fpu; -} - -extern void unlazy_fpu(struct task_struct *tsk); - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_I387_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f4a555beef19..f8c0ec3a4a97 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1002,8 +1002,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); -int fx_init(struct kvm_vcpu *vcpu); - void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 883f6b933fa4..5e8daee7c5c9 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -142,6 +142,19 @@ static inline void arch_exit_mmap(struct mm_struct *mm) paravirt_arch_exit_mmap(mm); } +#ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !config_enabled(CONFIG_IA32_EMULATION) || + !(mm->context.ia32_compat == TIF_IA32); +} +#else +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return false; +} +#endif + static inline void arch_bprm_mm_init(struct mm_struct *mm, struct vm_area_struct *vma) { diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index a952a13d59a7..7a35495275a9 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -13,55 +13,50 @@ #define MPX_BNDCFG_ENABLE_FLAG 0x1 #define MPX_BD_ENTRY_VALID_FLAG 0x1 -#ifdef CONFIG_X86_64 - -/* upper 28 bits [47:20] of the virtual address in 64-bit used to - * index into bounds directory (BD). - */ -#define MPX_BD_ENTRY_OFFSET 28 -#define MPX_BD_ENTRY_SHIFT 3 -/* bits [19:3] of the virtual address in 64-bit used to index into - * bounds table (BT). +/* + * The upper 28 bits [47:20] of the virtual address in 64-bit + * are used to index into bounds directory (BD). + * + * The directory is 2G (2^31) in size, and with 8-byte entries + * it has 2^28 entries. */ -#define MPX_BT_ENTRY_OFFSET 17 -#define MPX_BT_ENTRY_SHIFT 5 -#define MPX_IGN_BITS 3 -#define MPX_BD_ENTRY_TAIL 3 +#define MPX_BD_SIZE_BYTES_64 (1UL<<31) +#define MPX_BD_ENTRY_BYTES_64 8 +#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) -#else - -#define MPX_BD_ENTRY_OFFSET 20 -#define MPX_BD_ENTRY_SHIFT 2 -#define MPX_BT_ENTRY_OFFSET 10 -#define MPX_BT_ENTRY_SHIFT 4 -#define MPX_IGN_BITS 2 -#define MPX_BD_ENTRY_TAIL 2 +/* + * The 32-bit directory is 4MB (2^22) in size, and with 4-byte + * entries it has 2^20 entries. + */ +#define MPX_BD_SIZE_BYTES_32 (1UL<<22) +#define MPX_BD_ENTRY_BYTES_32 4 +#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) -#endif +/* + * A 64-bit table is 4MB total in size, and an entry is + * 4 64-bit pointers in size. + */ +#define MPX_BT_SIZE_BYTES_64 (1UL<<22) +#define MPX_BT_ENTRY_BYTES_64 32 +#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) -#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) -#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) +/* + * A 32-bit table is 16kB total in size, and an entry is + * 4 32-bit pointers in size. + */ +#define MPX_BT_SIZE_BYTES_32 (1UL<<14) +#define MPX_BT_ENTRY_BYTES_32 16 +#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) #define MPX_BNDSTA_TAIL 2 #define MPX_BNDCFG_TAIL 12 #define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1)) #define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) -#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1)) - -#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1)) #define MPX_BNDSTA_ERROR_CODE 0x3 -#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1) -#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1) -#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \ - MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT) -#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \ - MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) - #ifdef CONFIG_X86_INTEL_MPX -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xsave_struct *xsave_buf); -int mpx_handle_bd_fault(struct xsave_struct *xsave_buf); +siginfo_t *mpx_generate_siginfo(struct pt_regs *regs); +int mpx_handle_bd_fault(void); static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); @@ -77,12 +72,11 @@ static inline void mpx_mm_init(struct mm_struct *mm) void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long end); #else -static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xsave_struct *xsave_buf) +static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) { return NULL; } -static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) +static inline int mpx_handle_bd_fault(void) { return -EINVAL; } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 9aa52fd13a78..43e6519df0d5 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -21,6 +21,7 @@ struct mm_struct; #include <asm/desc_defs.h> #include <asm/nops.h> #include <asm/special_insns.h> +#include <asm/fpu/types.h> #include <linux/personality.h> #include <linux/cpumask.h> @@ -52,11 +53,16 @@ static inline void *current_text_addr(void) return pc; } +/* + * These alignment constraints are for performance in the vSMP case, + * but in the task_struct case we must also meet hardware imposed + * alignment requirements of the FPU state: + */ #ifdef CONFIG_X86_VSMP # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) #else -# define ARCH_MIN_TASKALIGN 16 +# define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif @@ -166,7 +172,6 @@ extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); -extern void fpu_detect(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_boot_cpu(void); @@ -313,128 +318,6 @@ struct orig_ist { unsigned long ist[7]; }; -#define MXCSR_DEFAULT 0x1f80 - -struct i387_fsave_struct { - u32 cwd; /* FPU Control Word */ - u32 swd; /* FPU Status Word */ - u32 twd; /* FPU Tag Word */ - u32 fip; /* FPU IP Offset */ - u32 fcs; /* FPU IP Selector */ - u32 foo; /* FPU Operand Pointer Offset */ - u32 fos; /* FPU Operand Pointer Selector */ - - /* 8*10 bytes for each FP-reg = 80 bytes: */ - u32 st_space[20]; - - /* Software status information [not touched by FSAVE ]: */ - u32 status; -}; - -struct i387_fxsave_struct { - u16 cwd; /* Control Word */ - u16 swd; /* Status Word */ - u16 twd; /* Tag Word */ - u16 fop; /* Last Instruction Opcode */ - union { - struct { - u64 rip; /* Instruction Pointer */ - u64 rdp; /* Data Pointer */ - }; - struct { - u32 fip; /* FPU IP Offset */ - u32 fcs; /* FPU IP Selector */ - u32 foo; /* FPU Operand Offset */ - u32 fos; /* FPU Operand Selector */ - }; - }; - u32 mxcsr; /* MXCSR Register State */ - u32 mxcsr_mask; /* MXCSR Mask */ - - /* 8*16 bytes for each FP-reg = 128 bytes: */ - u32 st_space[32]; - - /* 16*16 bytes for each XMM-reg = 256 bytes: */ - u32 xmm_space[64]; - - u32 padding[12]; - - union { - u32 padding1[12]; - u32 sw_reserved[12]; - }; - -} __attribute__((aligned(16))); - -struct i387_soft_struct { - u32 cwd; - u32 swd; - u32 twd; - u32 fip; - u32 fcs; - u32 foo; - u32 fos; - /* 8*10 bytes for each FP-reg = 80 bytes: */ - u32 st_space[20]; - u8 ftop; - u8 changed; - u8 lookahead; - u8 no_update; - u8 rm; - u8 alimit; - struct math_emu_info *info; - u32 entry_eip; -}; - -struct ymmh_struct { - /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ - u32 ymmh_space[64]; -}; - -/* We don't support LWP yet: */ -struct lwp_struct { - u8 reserved[128]; -}; - -struct bndreg { - u64 lower_bound; - u64 upper_bound; -} __packed; - -struct bndcsr { - u64 bndcfgu; - u64 bndstatus; -} __packed; - -struct xsave_hdr_struct { - u64 xstate_bv; - u64 xcomp_bv; - u64 reserved[6]; -} __attribute__((packed)); - -struct xsave_struct { - struct i387_fxsave_struct i387; - struct xsave_hdr_struct xsave_hdr; - struct ymmh_struct ymmh; - struct lwp_struct lwp; - struct bndreg bndreg[4]; - struct bndcsr bndcsr; - /* new processor state extensions will go here */ -} __attribute__ ((packed, aligned (64))); - -union thread_xstate { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; - struct xsave_struct xsave; -}; - -struct fpu { - unsigned int last_cpu; - unsigned int has_fpu; - union thread_xstate *state; -}; - #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); @@ -483,8 +366,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; -extern void free_thread_xstate(struct task_struct *); -extern struct kmem_cache *task_xstate_cachep; struct perf_event; @@ -508,6 +389,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; + + /* Floating point and extended processor state */ + struct fpu fpu; + /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ @@ -518,8 +403,6 @@ struct thread_struct { unsigned long cr2; unsigned long trap_nr; unsigned long error_code; - /* floating point and extended processor state */ - struct fpu fpu; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ struct vm86_struct __user *vm86_info; @@ -535,15 +418,6 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; - /* - * fpu_counter contains the number of consecutive context switches - * that the FPU is used. If this is over a threshold, the lazy fpu - * saving becomes unlazy to save the trap. This is an unsigned char - * so that after 256 times the counter wraps and the behavior turns - * lazy again; this to deal with bursty apps that only use FPU for - * a short time - */ - unsigned char fpu_counter; }; /* @@ -928,18 +802,18 @@ extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); /* Register/unregister a process' MPX related resource */ -#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) -#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) +#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() +#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() #ifdef CONFIG_X86_INTEL_MPX -extern int mpx_enable_management(struct task_struct *tsk); -extern int mpx_disable_management(struct task_struct *tsk); +extern int mpx_enable_management(void); +extern int mpx_disable_management(void); #else -static inline int mpx_enable_management(struct task_struct *tsk) +static inline int mpx_enable_management(void) { return -EINVAL; } -static inline int mpx_disable_management(struct task_struct *tsk) +static inline int mpx_disable_management(void) { return -EINVAL; } diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h index ee80b92f0096..6c8a7ed13365 100644 --- a/arch/x86/include/asm/simd.h +++ b/arch/x86/include/asm/simd.h @@ -1,5 +1,5 @@ -#include <asm/i387.h> +#include <asm/fpu/api.h> /* * may_use_simd - whether it is allowable at this time to issue SIMD diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 6a998598f172..c2e00bb2a136 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -39,7 +39,9 @@ #include <asm/processor.h> #include <asm/percpu.h> #include <asm/desc.h> + #include <linux/random.h> +#include <linux/sched.h> /* * 24 byte read-only segment initializer for stack canary. Linker diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 552d6c90a6d4..d1793f06854d 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -7,7 +7,7 @@ #define _ASM_X86_SUSPEND_32_H #include <asm/desc.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> /* image of the saved processor state */ struct saved_context { diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index bc6232834bab..7ebf0ebe4e68 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -7,7 +7,7 @@ #define _ASM_X86_SUSPEND_64_H #include <asm/desc.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> /* * Image of the saved processor state, used by the low level ACPI suspend to diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h new file mode 100644 index 000000000000..173dd3ba108c --- /dev/null +++ b/arch/x86/include/asm/trace/mpx.h @@ -0,0 +1,132 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mpx + +#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MPX_H + +#include <linux/tracepoint.h> + +#ifdef CONFIG_X86_INTEL_MPX + +TRACE_EVENT(mpx_bounds_register_exception, + + TP_PROTO(void *addr_referenced, + const struct bndreg *bndreg), + TP_ARGS(addr_referenced, bndreg), + + TP_STRUCT__entry( + __field(void *, addr_referenced) + __field(u64, lower_bound) + __field(u64, upper_bound) + ), + + TP_fast_assign( + __entry->addr_referenced = addr_referenced; + __entry->lower_bound = bndreg->lower_bound; + __entry->upper_bound = bndreg->upper_bound; + ), + /* + * Note that we are printing out the '~' of the upper + * bounds register here. It is actually stored in its + * one's complement form so that its 'init' state + * corresponds to all 0's. But, that looks like + * gibberish when printed out, so print out the 1's + * complement instead of the actual value here. Note + * though that you still need to specify filters for the + * actual value, not the displayed one. + */ + TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx", + __entry->addr_referenced, + __entry->lower_bound, + ~__entry->upper_bound + ) +); + +TRACE_EVENT(bounds_exception_mpx, + + TP_PROTO(const struct bndcsr *bndcsr), + TP_ARGS(bndcsr), + + TP_STRUCT__entry( + __field(u64, bndcfgu) + __field(u64, bndstatus) + ), + + TP_fast_assign( + /* need to get rid of the 'const' on bndcsr */ + __entry->bndcfgu = (u64)bndcsr->bndcfgu; + __entry->bndstatus = (u64)bndcsr->bndstatus; + ), + + TP_printk("bndcfgu:0x%llx bndstatus:0x%llx", + __entry->bndcfgu, + __entry->bndstatus) +); + +DECLARE_EVENT_CLASS(mpx_range_trace, + + TP_PROTO(unsigned long start, + unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field(unsigned long, start) + __field(unsigned long, end) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("[0x%p:0x%p]", + (void *)__entry->start, + (void *)__entry->end + ) +); + +DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end) +); + +DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end) +); + +TRACE_EVENT(mpx_new_bounds_table, + + TP_PROTO(unsigned long table_vaddr), + TP_ARGS(table_vaddr), + + TP_STRUCT__entry( + __field(unsigned long, table_vaddr) + ), + + TP_fast_assign( + __entry->table_vaddr = table_vaddr; + ), + + TP_printk("table vaddr:%p", (void *)__entry->table_vaddr) +); + +#else + +/* + * This gets used outside of MPX-specific code, so we need a stub. + */ +static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr) +{ +} + +#endif /* CONFIG_X86_INTEL_MPX */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/trace/ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mpx +#endif /* _TRACE_MPX_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h index ccab4af1646d..59a54e869f15 100644 --- a/arch/x86/include/asm/user.h +++ b/arch/x86/include/asm/user.h @@ -14,8 +14,8 @@ struct user_ymmh_regs { __u32 ymmh_space[64]; }; -struct user_xsave_hdr { - __u64 xstate_bv; +struct user_xstate_header { + __u64 xfeatures; __u64 reserved1[2]; __u64 reserved2[5]; }; @@ -41,11 +41,11 @@ struct user_xsave_hdr { * particular process/thread. * * Also when the user modifies certain state FP/SSE/etc through the - * ptrace interface, they must ensure that the xsave_hdr.xstate_bv + * ptrace interface, they must ensure that the header.xfeatures * bytes[512..519] of the memory layout are updated correspondingly. * i.e., for example when FP state is modified to a non-init state, - * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to - * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc. + * header.xfeatures's bit 0 must be set to '1', when SSE is modified to + * non-init state, header.xfeatures's bit 1 must to be set to '1', etc. */ #define USER_XSTATE_FX_SW_WORDS 6 #define USER_XSTATE_XCR0_WORD 0 @@ -55,7 +55,7 @@ struct user_xstateregs { __u64 fpx_space[58]; __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS]; } i387; - struct user_xsave_hdr xsave_hdr; + struct user_xstate_header header; struct user_ymmh_regs ymmh; /* further processor state extensions go here */ }; diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h deleted file mode 100644 index f2cba4e79a23..000000000000 --- a/arch/x86/include/asm/xcr.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright 2008 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2 or (at your - * option) any later version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * asm-x86/xcr.h - * - * Definitions for the eXtended Control Register instructions - */ - -#ifndef _ASM_X86_XCR_H -#define _ASM_X86_XCR_H - -#define XCR_XFEATURE_ENABLED_MASK 0x00000000 - -#ifdef __KERNEL__ -# ifndef __ASSEMBLY__ - -#include <linux/types.h> - -static inline u64 xgetbv(u32 index) -{ - u32 eax, edx; - - asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((u64)edx << 32); -} - -static inline void xsetbv(u32 index, u64 value) -{ - u32 eax = value; - u32 edx = value >> 32; - - asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ - : : "a" (eax), "d" (edx), "c" (index)); -} - -# endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_XCR_H */ diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index d8829751b3f8..1f5c5161ead6 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -36,7 +36,7 @@ * no advantages to be gotten from x86-64 here anyways. */ -#include <asm/i387.h> +#include <asm/fpu/api.h> #ifdef CONFIG_X86_32 /* reduce register pressure */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index ce05722e3c68..5a08bc8bff33 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -26,7 +26,7 @@ #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" -#include <asm/i387.h> +#include <asm/fpu/api.h> static void xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 492b29802f57..7c0a517ec751 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -18,7 +18,7 @@ #ifdef CONFIG_AS_AVX #include <linux/compiler.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #define BLOCK4(i) \ BLOCK(32 * i, 0) \ diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h deleted file mode 100644 index c9a6d68b8d62..000000000000 --- a/arch/x86/include/asm/xsave.h +++ /dev/null @@ -1,257 +0,0 @@ -#ifndef __ASM_X86_XSAVE_H -#define __ASM_X86_XSAVE_H - -#include <linux/types.h> -#include <asm/processor.h> - -#define XSTATE_CPUID 0x0000000d - -#define XSTATE_FP 0x1 -#define XSTATE_SSE 0x2 -#define XSTATE_YMM 0x4 -#define XSTATE_BNDREGS 0x8 -#define XSTATE_BNDCSR 0x10 -#define XSTATE_OPMASK 0x20 -#define XSTATE_ZMM_Hi256 0x40 -#define XSTATE_Hi16_ZMM 0x80 - -#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) -#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) -/* Bit 63 of XCR0 is reserved for future expansion */ -#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) - -#define FXSAVE_SIZE 512 - -#define XSAVE_HDR_SIZE 64 -#define XSAVE_HDR_OFFSET FXSAVE_SIZE - -#define XSAVE_YMM_SIZE 256 -#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) - -/* Supported features which support lazy state saving */ -#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) - -/* Supported features which require eager state saving */ -#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) - -/* All currently supported features */ -#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) - -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -extern unsigned int xstate_size; -extern u64 pcntxt_mask; -extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern struct xsave_struct *init_xstate_buf; - -extern void xsave_init(void); -extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); -extern int init_fpu(struct task_struct *child); - -/* These macros all use (%edi)/(%rdi) as the single memory argument. */ -#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" -#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" -#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" - -#define xstate_fault ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XSAVES"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XSAVE"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XRSTORS"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XRSTOR"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * Save processor xstate to xsave area. - */ -static inline int xsave_state(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - /* - * If xsaves is enabled, xsaves replaces xsaveopt because - * it supports compact format and supervisor states in addition to - * modified optimization in xsaveopt. - * - * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave - * because xsaveopt supports modified optimization which is not - * supported by xsave. - * - * If none of xsaves and xsaveopt is enabled, use xsave. - */ - alternative_input_2( - "1:"XSAVE, - XSAVEOPT, - X86_FEATURE_XSAVEOPT, - XSAVES, - X86_FEATURE_XSAVES, - [fx] "D" (fx), "a" (lmask), "d" (hmask) : - "memory"); - asm volatile("2:\n\t" - xstate_fault - : "0" (0) - : "memory"); - - return err; -} - -/* - * Restore processor xstate from xsave area. - */ -static inline int xrstor_state(struct xsave_struct *fx, u64 mask) -{ - int err = 0; - u32 lmask = mask; - u32 hmask = mask >> 32; - - /* - * Use xrstors to restore context if it is enabled. xrstors supports - * compacted format of xsave area which is not supported by xrstor. - */ - alternative_input( - "1: " XRSTOR, - XRSTORS, - X86_FEATURE_XSAVES, - "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - - asm volatile("2:\n" - xstate_fault - : "0" (0) - : "memory"); - - return err; -} - -/* - * Save xstate context for old process during context switch. - */ -static inline void fpu_xsave(struct fpu *fpu) -{ - xsave_state(&fpu->state->xsave, -1); -} - -/* - * Restore xstate context for new process during context switch. - */ -static inline int fpu_xrstor_checking(struct xsave_struct *fx) -{ - return xrstor_state(fx, -1); -} - -/* - * Save xstate to user space xsave area. - * - * We don't use modified optimization because xrstor/xrstors might track - * a different application. - * - * We don't use compacted format xsave area for - * backward compatibility for old applications which don't understand - * compacted format of xsave area. - */ -static inline int xsave_user(struct xsave_struct __user *buf) -{ - int err; - - /* - * Clear the xsave header first, so that reserved fields are - * initialized to zero. - */ - err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); - if (unlikely(err)) - return -EFAULT; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XSAVE"\n" - "2: " ASM_CLAC "\n" - xstate_fault - : "D" (buf), "a" (-1), "d" (-1), "0" (0) - : "memory"); - return err; -} - -/* - * Restore xstate from user space xsave area. - */ -static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) -{ - int err = 0; - struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XRSTOR"\n" - "2: " ASM_CLAC "\n" - xstate_fault - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) - : "memory"); /* memory required? */ - return err; -} - -void *get_xsave_addr(struct xsave_struct *xsave, int xstate); -void setup_xstate_comp(void); - -#endif diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 16dc4e8a2cd3..0e8a973de9ee 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -25,7 +25,7 @@ struct _fpx_sw_bytes { __u32 extended_size; /* total size of the layout referred by * fpstate pointer in the sigcontext. */ - __u64 xstate_bv; + __u64 xfeatures; /* feature bit mask (including fp/sse/extended * state) that is present in the memory * layout. @@ -209,8 +209,8 @@ struct sigcontext { #endif /* !__i386__ */ -struct _xsave_hdr { - __u64 xstate_bv; +struct _header { + __u64 xfeatures; __u64 reserved1[2]; __u64 reserved2[5]; }; @@ -228,7 +228,7 @@ struct _ymmh_state { */ struct _xstate { struct _fpstate fpstate; - struct _xsave_hdr xstate_hdr; + struct _header xstate_hdr; struct _ymmh_state ymmh; /* new processor state extensions go here */ }; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9bcd0b56ca17..febaf180621b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -44,7 +44,7 @@ obj-y += pci-iommu_table.o obj-y += resource.o obj-y += process.o -obj-y += i387.o xsave.o +obj-y += fpu/ obj-y += ptrace.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef653193160..7fe097235376 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -21,6 +21,10 @@ #include <asm/io.h> #include <asm/fixmap.h> +int __read_mostly alternatives_patched; + +EXPORT_SYMBOL_GPL(alternatives_patched); + #define MAX_PATCH_LEN (255-1) static int __initdata_or_module debug_alternative; @@ -627,6 +631,7 @@ void __init alternative_instructions(void) apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); + alternatives_patched = 1; } /** diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 03445346ee0a..bd17db15a2c1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,57 +12,11 @@ #include <asm/bugs.h> #include <asm/processor.h> #include <asm/processor-flags.h> -#include <asm/i387.h> +#include <asm/fpu/internal.h> #include <asm/msr.h> #include <asm/paravirt.h> #include <asm/alternative.h> -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - s32 fdiv_bug; - - kernel_fpu_begin(); - - /* - * trap_init() enabled FXSR and company _before_ testing for FP - * problems here. - * - * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug - */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&fdiv_bug) - : "m" (*&x), "m" (*&y)); - - kernel_fpu_end(); - - if (fdiv_bug) { - set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); - pr_warn("Hmm, FPU with FDIV bug\n"); - } -} - void __init check_bugs(void) { identify_boot_cpu(); @@ -85,10 +39,5 @@ void __init check_bugs(void) '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); - /* - * kernel_fpu_begin/end() in check_fpu() relies on the patched - * alternative instructions. - */ - if (cpu_has_fpu) - check_fpu(); + fpu__init_check_bugs(); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 351197cbbc8e..b28e5262a0a5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -32,8 +32,7 @@ #include <asm/setup.h> #include <asm/apic.h> #include <asm/desc.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> #include <asm/mtrr.h> #include <linux/numa.h> #include <asm/asm.h> @@ -146,32 +145,21 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -static int __init x86_xsave_setup(char *s) +static int __init x86_mpx_setup(char *s) { + /* require an exact match without trailing characters */ if (strlen(s)) return 0; - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - return 1; -} -__setup("noxsave", x86_xsave_setup); -static int __init x86_xsaveopt_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - return 1; -} -__setup("noxsaveopt", x86_xsaveopt_setup); + /* do not emit a message if the feature is not present */ + if (!boot_cpu_has(X86_FEATURE_MPX)) + return 1; -static int __init x86_xsaves_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); + setup_clear_cpu_cap(X86_FEATURE_MPX); + pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); return 1; } -__setup("noxsaves", x86_xsaves_setup); +__setup("nompx", x86_mpx_setup); #ifdef CONFIG_X86_32 static int cachesize_override = -1; @@ -184,14 +172,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - static int __init x86_sep_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_SEP); @@ -762,7 +742,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); - fpu_detect(c); + fpu__init_system(c); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -1186,8 +1166,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); -DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); - /* * Special IST stacks which the CPU switches to when it calls * an IST-marked descriptor entry. Up to 7 stacks (hardware @@ -1278,7 +1256,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); -DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); /* * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find @@ -1442,7 +1419,7 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - fpu_init(); + fpu__init_cpu(); if (is_uv_system()) uv_cpu_init(); @@ -1498,7 +1475,7 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - fpu_init(); + fpu__init_cpu(); } #endif diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile new file mode 100644 index 000000000000..68279efb811a --- /dev/null +++ b/arch/x86/kernel/fpu/Makefile @@ -0,0 +1,5 @@ +# +# Build rules for the FPU support code: +# + +obj-y += init.o bugs.o core.o regset.o signal.o xstate.o diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c new file mode 100644 index 000000000000..dd9ca9b60ff3 --- /dev/null +++ b/arch/x86/kernel/fpu/bugs.c @@ -0,0 +1,71 @@ +/* + * x86 FPU bug checks: + */ +#include <asm/fpu/internal.h> + +/* + * Boot time CPU/FPU FDIV bug detection code: + */ + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + u32 cr0_saved; + s32 fdiv_bug; + + /* We might have CR0::TS set already, clear it: */ + cr0_saved = read_cr0(); + write_cr0(cr0_saved & ~X86_CR0_TS); + + kernel_fpu_begin(); + + /* + * trap_init() enabled FXSR and company _before_ testing for FP + * problems here. + * + * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug + */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&fdiv_bug) + : "m" (*&x), "m" (*&y)); + + kernel_fpu_end(); + + write_cr0(cr0_saved); + + if (fdiv_bug) { + set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); + pr_warn("Hmm, FPU with FDIV bug\n"); + } +} + +void __init fpu__init_check_bugs(void) +{ + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + if (cpu_has_fpu) + check_fpu(); +} diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c new file mode 100644 index 000000000000..79de954626fd --- /dev/null +++ b/arch/x86/kernel/fpu/core.c @@ -0,0 +1,523 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes <gareth@valinux.com>, May 2000 + */ +#include <asm/fpu/internal.h> +#include <asm/fpu/regset.h> +#include <asm/fpu/signal.h> +#include <asm/traps.h> + +#include <linux/hardirq.h> + +/* + * Represents the initial FPU state. It's mostly (but not completely) zeroes, + * depending on the FPU hardware format: + */ +union fpregs_state init_fpstate __read_mostly; + +/* + * Track whether the kernel is using the FPU state + * currently. + * + * This flag is used: + * + * - by IRQ context code to potentially use the FPU + * if it's unused. + * + * - to debug kernel_fpu_begin()/end() correctness + */ +static DEFINE_PER_CPU(bool, in_kernel_fpu); + +/* + * Track which context is using the FPU on the CPU: + */ +DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +static void kernel_fpu_disable(void) +{ + WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); + this_cpu_write(in_kernel_fpu, true); +} + +static void kernel_fpu_enable(void) +{ + WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); + this_cpu_write(in_kernel_fpu, false); +} + +static bool kernel_fpu_disabled(void) +{ + return this_cpu_read(in_kernel_fpu); +} + +/* + * Were we in an interrupt that interrupted kernel mode? + * + * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that + * pair does nothing at all: the thread must not have fpu (so + * that we don't try to save the FPU state), and TS must + * be set (so that the clts/stts pair does nothing that is + * visible in the interrupted kernel thread). + * + * Except for the eagerfpu case when we return true; in the likely case + * the thread has FPU but we are not going to set/clear TS. + */ +static bool interrupted_kernel_fpu_idle(void) +{ + if (kernel_fpu_disabled()) + return false; + + if (use_eager_fpu()) + return true; + + return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); +} + +/* + * Were we in user mode (or vm86 mode) when we were + * interrupted? + * + * Doing kernel_fpu_begin/end() is ok if we are running + * in an interrupt context from user mode - we'll just + * save the FPU state as required. + */ +static bool interrupted_user_mode(void) +{ + struct pt_regs *regs = get_irq_regs(); + return regs && user_mode(regs); +} + +/* + * Can we use the FPU in kernel mode with the + * whole "kernel_fpu_begin/end()" sequence? + * + * It's always ok in process context (ie "not interrupt") + * but it is sometimes ok even from an irq. + */ +bool irq_fpu_usable(void) +{ + return !in_interrupt() || + interrupted_user_mode() || + interrupted_kernel_fpu_idle(); +} +EXPORT_SYMBOL(irq_fpu_usable); + +void __kernel_fpu_begin(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + WARN_ON_FPU(!irq_fpu_usable()); + + kernel_fpu_disable(); + + if (fpu->fpregs_active) { + copy_fpregs_to_fpstate(fpu); + } else { + this_cpu_write(fpu_fpregs_owner_ctx, NULL); + __fpregs_activate_hw(); + } +} +EXPORT_SYMBOL(__kernel_fpu_begin); + +void __kernel_fpu_end(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + if (fpu->fpregs_active) + copy_kernel_to_fpregs(&fpu->state); + else + __fpregs_deactivate_hw(); + + kernel_fpu_enable(); +} +EXPORT_SYMBOL(__kernel_fpu_end); + +void kernel_fpu_begin(void) +{ + preempt_disable(); + __kernel_fpu_begin(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); + +/* + * CR0::TS save/restore functions: + */ +int irq_ts_save(void) +{ + /* + * If in process context and not atomic, we can take a spurious DNA fault. + * Otherwise, doing clts() in process context requires disabling preemption + * or some heavy lifting like kernel_fpu_begin() + */ + if (!in_atomic()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} +EXPORT_SYMBOL_GPL(irq_ts_save); + +void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} +EXPORT_SYMBOL_GPL(irq_ts_restore); + +/* + * Save the FPU state (mark it for reload if necessary): + * + * This only ever gets called for the current task. + */ +void fpu__save(struct fpu *fpu) +{ + WARN_ON_FPU(fpu != ¤t->thread.fpu); + + preempt_disable(); + if (fpu->fpregs_active) { + if (!copy_fpregs_to_fpstate(fpu)) + fpregs_deactivate(fpu); + } + preempt_enable(); +} +EXPORT_SYMBOL_GPL(fpu__save); + +/* + * Legacy x87 fpstate state init: + */ +static inline void fpstate_init_fstate(struct fregs_state *fp) +{ + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; +} + +void fpstate_init(union fpregs_state *state) +{ + if (!cpu_has_fpu) { + fpstate_init_soft(&state->soft); + return; + } + + memset(state, 0, xstate_size); + + if (cpu_has_fxsr) + fpstate_init_fxstate(&state->fxsave); + else + fpstate_init_fstate(&state->fsave); +} +EXPORT_SYMBOL_GPL(fpstate_init); + +/* + * Copy the current task's FPU state to a new task's FPU context. + * + * In both the 'eager' and the 'lazy' case we save hardware registers + * directly to the destination buffer. + */ +static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) +{ + WARN_ON_FPU(src_fpu != ¤t->thread.fpu); + + /* + * Don't let 'init optimized' areas of the XSAVE area + * leak into the child task: + */ + if (use_eager_fpu()) + memset(&dst_fpu->state.xsave, 0, xstate_size); + + /* + * Save current FPU registers directly into the child + * FPU context, without any memory-to-memory copying. + * + * If the FPU context got destroyed in the process (FNSAVE + * done on old CPUs) then copy it back into the source + * context and mark the current task for lazy restore. + * + * We have to do all this with preemption disabled, + * mostly because of the FNSAVE case, because in that + * case we must not allow preemption in the window + * between the FNSAVE and us marking the context lazy. + * + * It shouldn't be an issue as even FNSAVE is plenty + * fast in terms of critical section length. + */ + preempt_disable(); + if (!copy_fpregs_to_fpstate(dst_fpu)) { + memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); + fpregs_deactivate(src_fpu); + } + preempt_enable(); +} + +int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) +{ + dst_fpu->counter = 0; + dst_fpu->fpregs_active = 0; + dst_fpu->last_cpu = -1; + + if (src_fpu->fpstate_active) + fpu_copy(dst_fpu, src_fpu); + + return 0; +} + +/* + * Activate the current task's in-memory FPU context, + * if it has not been used before: + */ +void fpu__activate_curr(struct fpu *fpu) +{ + WARN_ON_FPU(fpu != ¤t->thread.fpu); + + if (!fpu->fpstate_active) { + fpstate_init(&fpu->state); + + /* Safe to do for the current task: */ + fpu->fpstate_active = 1; + } +} +EXPORT_SYMBOL_GPL(fpu__activate_curr); + +/* + * This function must be called before we read a task's fpstate. + * + * If the task has not used the FPU before then initialize its + * fpstate. + * + * If the task has used the FPU before then save it. + */ +void fpu__activate_fpstate_read(struct fpu *fpu) +{ + /* + * If fpregs are active (in the current CPU), then + * copy them to the fpstate: + */ + if (fpu->fpregs_active) { + fpu__save(fpu); + } else { + if (!fpu->fpstate_active) { + fpstate_init(&fpu->state); + + /* Safe to do for current and for stopped child tasks: */ + fpu->fpstate_active = 1; + } + } +} + +/* + * This function must be called before we write a task's fpstate. + * + * If the task has used the FPU before then unlazy it. + * If the task has not used the FPU before then initialize its fpstate. + * + * After this function call, after registers in the fpstate are + * modified and the child task has woken up, the child task will + * restore the modified FPU state from the modified context. If we + * didn't clear its lazy status here then the lazy in-registers + * state pending on its former CPU could be restored, corrupting + * the modifications. + */ +void fpu__activate_fpstate_write(struct fpu *fpu) +{ + /* + * Only stopped child tasks can be used to modify the FPU + * state in the fpstate buffer: + */ + WARN_ON_FPU(fpu == ¤t->thread.fpu); + + if (fpu->fpstate_active) { + /* Invalidate any lazy state: */ + fpu->last_cpu = -1; + } else { + fpstate_init(&fpu->state); + + /* Safe to do for stopped child tasks: */ + fpu->fpstate_active = 1; + } +} + +/* + * 'fpu__restore()' is called to copy FPU registers from + * the FPU fpstate to the live hw registers and to activate + * access to the hardware registers, so that FPU instructions + * can be used afterwards. + * + * Must be called with kernel preemption disabled (for example + * with local interrupts disabled, as it is in the case of + * do_device_not_available()). + */ +void fpu__restore(struct fpu *fpu) +{ + fpu__activate_curr(fpu); + + /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ + kernel_fpu_disable(); + fpregs_activate(fpu); + copy_kernel_to_fpregs(&fpu->state); + fpu->counter++; + kernel_fpu_enable(); +} +EXPORT_SYMBOL_GPL(fpu__restore); + +/* + * Drops current FPU state: deactivates the fpregs and + * the fpstate. NOTE: it still leaves previous contents + * in the fpregs in the eager-FPU case. + * + * This function can be used in cases where we know that + * a state-restore is coming: either an explicit one, + * or a reschedule. + */ +void fpu__drop(struct fpu *fpu) +{ + preempt_disable(); + fpu->counter = 0; + + if (fpu->fpregs_active) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + fpregs_deactivate(fpu); + } + + fpu->fpstate_active = 0; + + preempt_enable(); +} + +/* + * Clear FPU registers by setting them up from + * the init fpstate: + */ +static inline void copy_init_fpstate_to_fpregs(void) +{ + if (use_xsave()) + copy_kernel_to_xregs(&init_fpstate.xsave, -1); + else + copy_kernel_to_fxregs(&init_fpstate.fxsave); +} + +/* + * Clear the FPU state back to init state. + * + * Called by sys_execve(), by the signal handler code and by various + * error paths. + */ +void fpu__clear(struct fpu *fpu) +{ + WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ + + if (!use_eager_fpu()) { + /* FPU state will be reallocated lazily at the first use. */ + fpu__drop(fpu); + } else { + if (!fpu->fpstate_active) { + fpu__activate_curr(fpu); + user_fpu_begin(); + } + copy_init_fpstate_to_fpregs(); + } +} + +/* + * x87 math exception handling: + */ + +static inline unsigned short get_fpu_cwd(struct fpu *fpu) +{ + if (cpu_has_fxsr) { + return fpu->state.fxsave.cwd; + } else { + return (unsigned short)fpu->state.fsave.cwd; + } +} + +static inline unsigned short get_fpu_swd(struct fpu *fpu) +{ + if (cpu_has_fxsr) { + return fpu->state.fxsave.swd; + } else { + return (unsigned short)fpu->state.fsave.swd; + } +} + +static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) +{ + if (cpu_has_xmm) { + return fpu->state.fxsave.mxcsr; + } else { + return MXCSR_DEFAULT; + } +} + +int fpu__exception_code(struct fpu *fpu, int trap_nr) +{ + int err; + + if (trap_nr == X86_TRAP_MF) { + unsigned short cwd, swd; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't synchronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(fpu); + swd = get_fpu_swd(fpu); + + err = swd & ~cwd; + } else { + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + unsigned short mxcsr = get_fpu_mxcsr(fpu); + err = ~(mxcsr >> 7) & mxcsr; + } + + if (err & 0x001) { /* Invalid op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + return FPE_FLTINV; + } else if (err & 0x004) { /* Divide by Zero */ + return FPE_FLTDIV; + } else if (err & 0x008) { /* Overflow */ + return FPE_FLTOVF; + } else if (err & 0x012) { /* Denormal, Underflow */ + return FPE_FLTUND; + } else if (err & 0x020) { /* Precision */ + return FPE_FLTRES; + } + + /* + * If we're using IRQ 13, or supposedly even some trap + * X86_TRAP_MF implementations, it's possible + * we get a spurious trap, which is not an error. + */ + return 0; +} diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c new file mode 100644 index 000000000000..fc878fee6a51 --- /dev/null +++ b/arch/x86/kernel/fpu/init.c @@ -0,0 +1,354 @@ +/* + * x86 FPU boot time init code: + */ +#include <asm/fpu/internal.h> +#include <asm/tlbflush.h> + +/* + * Initialize the TS bit in CR0 according to the style of context-switches + * we are using: + */ +static void fpu__init_cpu_ctx_switch(void) +{ + if (!cpu_has_eager_fpu) + stts(); + else + clts(); +} + +/* + * Initialize the registers found in all CPUs, CR0 and CR4: + */ +static void fpu__init_cpu_generic(void) +{ + unsigned long cr0; + unsigned long cr4_mask = 0; + + if (cpu_has_fxsr) + cr4_mask |= X86_CR4_OSFXSR; + if (cpu_has_xmm) + cr4_mask |= X86_CR4_OSXMMEXCPT; + if (cr4_mask) + cr4_set_bits(cr4_mask); + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ + if (!cpu_has_fpu) + cr0 |= X86_CR0_EM; + write_cr0(cr0); + + /* Flush out any pending x87 state: */ + asm volatile ("fninit"); +} + +/* + * Enable all supported FPU features. Called when a CPU is brought online: + */ +void fpu__init_cpu(void) +{ + fpu__init_cpu_generic(); + fpu__init_cpu_xstate(); + fpu__init_cpu_ctx_switch(); +} + +/* + * The earliest FPU detection code. + * + * Set the X86_FEATURE_FPU CPU-capability bit based on + * trying to execute an actual sequence of FPU instructions: + */ +static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +{ + unsigned long cr0; + u16 fsw, fcw; + + fsw = fcw = 0xffff; + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS | X86_CR0_EM); + write_cr0(cr0); + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + +#ifndef CONFIG_MATH_EMULATION + if (!cpu_has_fpu) { + pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); + for (;;) + asm volatile("hlt"); + } +#endif +} + +/* + * Boot time FPU feature detection code: + */ +unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; + +static void __init fpu__init_system_mxcsr(void) +{ + unsigned int mask = 0; + + if (cpu_has_fxsr) { + struct fxregs_state fx_tmp __aligned(32) = { }; + + asm volatile("fxsave %0" : "+m" (fx_tmp)); + + mask = fx_tmp.mxcsr_mask; + + /* + * If zero then use the default features mask, + * which has all features set, except the + * denormals-are-zero feature bit: + */ + if (mask == 0) + mask = 0x0000ffbf; + } + mxcsr_feature_mask &= mask; +} + +/* + * Once per bootup FPU initialization sequences that will run on most x86 CPUs: + */ +static void __init fpu__init_system_generic(void) +{ + /* + * Set up the legacy init FPU context. (xstate init might overwrite this + * with a more modern format, if the CPU supports it.) + */ + fpstate_init_fxstate(&init_fpstate.fxsave); + + fpu__init_system_mxcsr(); +} + +/* + * Size of the FPU context state. All tasks in the system use the + * same context size, regardless of what portion they use. + * This is inherent to the XSAVE architecture which puts all state + * components into a single, continuous memory block: + */ +unsigned int xstate_size; +EXPORT_SYMBOL_GPL(xstate_size); + +/* + * Set up the xstate_size based on the legacy FPU context size. + * + * We set this up first, and later it will be overwritten by + * fpu__init_system_xstate() if the CPU knows about xstates. + */ +static void __init fpu__init_system_xstate_size_legacy(void) +{ + static int on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + + /* + * Note that xstate_size might be overwriten later during + * fpu__init_system_xstate(). + */ + + if (!cpu_has_fpu) { + /* + * Disable xsave as we do not support it if i387 + * emulation is enabled. + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + xstate_size = sizeof(struct swregs_state); + } else { + if (cpu_has_fxsr) + xstate_size = sizeof(struct fxregs_state); + else + xstate_size = sizeof(struct fregs_state); + } + /* + * Quirk: we don't yet handle the XSAVES* instructions + * correctly, as we don't correctly convert between + * standard and compacted format when interfacing + * with user-space - so disable it for now. + * + * The difference is small: with recent CPUs the + * compacted format is only marginally smaller than + * the standard FPU state format. + * + * ( This is easy to backport while we are fixing + * XSAVES* support. ) + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); +} + +/* + * FPU context switching strategies: + * + * Against popular belief, we don't do lazy FPU saves, due to the + * task migration complications it brings on SMP - we only do + * lazy FPU restores. + * + * 'lazy' is the traditional strategy, which is based on setting + * CR0::TS to 1 during context-switch (instead of doing a full + * restore of the FPU state), which causes the first FPU instruction + * after the context switch (whenever it is executed) to fault - at + * which point we lazily restore the FPU state into FPU registers. + * + * Tasks are of course under no obligation to execute FPU instructions, + * so it can easily happen that another context-switch occurs without + * a single FPU instruction being executed. If we eventually switch + * back to the original task (that still owns the FPU) then we have + * not only saved the restores along the way, but we also have the + * FPU ready to be used for the original task. + * + * 'eager' switching is used on modern CPUs, there we switch the FPU + * state during every context switch, regardless of whether the task + * has used FPU instructions in that time slice or not. This is done + * because modern FPU context saving instructions are able to optimize + * state saving and restoration in hardware: they can detect both + * unused and untouched FPU state and optimize accordingly. + * + * [ Note that even in 'lazy' mode we might optimize context switches + * to use 'eager' restores, if we detect that a task is using the FPU + * frequently. See the fpu->counter logic in fpu/internal.h for that. ] + */ +static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; + +static int __init eager_fpu_setup(char *s) +{ + if (!strcmp(s, "on")) + eagerfpu = ENABLE; + else if (!strcmp(s, "off")) + eagerfpu = DISABLE; + else if (!strcmp(s, "auto")) + eagerfpu = AUTO; + return 1; +} +__setup("eagerfpu=", eager_fpu_setup); + +/* + * Pick the FPU context switching strategy: + */ +static void __init fpu__init_system_ctx_switch(void) +{ + static bool on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + + WARN_ON_FPU(current->thread.fpu.fpstate_active); + current_thread_info()->status = 0; + + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; + + if (xfeatures_mask & XSTATE_EAGER) { + if (eagerfpu == DISABLE) { + pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", + xfeatures_mask & XSTATE_EAGER); + xfeatures_mask &= ~XSTATE_EAGER; + } else { + eagerfpu = ENABLE; + } + } + + if (eagerfpu == ENABLE) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + + printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); +} + +/* + * Called on the boot CPU once per system bootup, to set up the initial + * FPU state that is later cloned into all processes: + */ +void __init fpu__init_system(struct cpuinfo_x86 *c) +{ + fpu__init_system_early_generic(c); + + /* + * The FPU has to be operational for some of the + * later FPU init activities: + */ + fpu__init_cpu(); + + /* + * But don't leave CR0::TS set yet, as some of the FPU setup + * methods depend on being able to execute FPU instructions + * that will fault on a set TS, such as the FXSAVE in + * fpu__init_system_mxcsr(). + */ + clts(); + + fpu__init_system_generic(); + fpu__init_system_xstate_size_legacy(); + fpu__init_system_xstate(); + + fpu__init_system_ctx_switch(); +} + +/* + * Boot parameter to turn off FPU support and fall back to math-emu: + */ +static int __init no_387(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FPU); + return 1; +} +__setup("no387", no_387); + +/* + * Disable all xstate CPU features: + */ +static int __init x86_noxsave_setup(char *s) +{ + if (strlen(s)) + return 0; + + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + setup_clear_cpu_cap(X86_FEATURE_AVX); + setup_clear_cpu_cap(X86_FEATURE_AVX2); + + return 1; +} +__setup("noxsave", x86_noxsave_setup); + +/* + * Disable the XSAVEOPT instruction specifically: + */ +static int __init x86_noxsaveopt_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + + return 1; +} +__setup("noxsaveopt", x86_noxsaveopt_setup); + +/* + * Disable the XSAVES instruction: + */ +static int __init x86_noxsaves_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + + return 1; +} +__setup("noxsaves", x86_noxsaves_setup); + +/* + * Disable FX save/restore and SSE support: + */ +static int __init x86_nofxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); + setup_clear_cpu_cap(X86_FEATURE_XMM); + + return 1; +} +__setup("nofxsr", x86_nofxsr_setup); diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c new file mode 100644 index 000000000000..dc60810c1c74 --- /dev/null +++ b/arch/x86/kernel/fpu/regset.c @@ -0,0 +1,356 @@ +/* + * FPU register's regset abstraction, for ptrace, core dumps, etc. + */ +#include <asm/fpu/internal.h> +#include <asm/fpu/signal.h> +#include <asm/fpu/regset.h> + +/* + * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, + * as the "regset->n" for the xstate regset will be updated based on the feature + * capabilites supported by the xsave. + */ +int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + struct fpu *target_fpu = &target->thread.fpu; + + return target_fpu->fpstate_active ? regset->n : 0; +} + +int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + struct fpu *target_fpu = &target->thread.fpu; + + return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; +} + +int xfpregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + + if (!cpu_has_fxsr) + return -ENODEV; + + fpu__activate_fpstate_read(fpu); + fpstate_sanitize_xstate(fpu); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &fpu->state.fxsave, 0, -1); +} + +int xfpregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + int ret; + + if (!cpu_has_fxsr) + return -ENODEV; + + fpu__activate_fpstate_write(fpu); + fpstate_sanitize_xstate(fpu); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fpu->state.fxsave, 0, -1); + + /* + * mxcsr reserved bits must be masked to zero for security reasons. + */ + fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; + + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE; + + return ret; +} + +int xstateregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct xregs_state *xsave; + int ret; + + if (!cpu_has_xsave) + return -ENODEV; + + fpu__activate_fpstate_read(fpu); + + xsave = &fpu->state.xsave; + + /* + * Copy the 48bytes defined by the software first into the xstate + * memory layout in the thread struct, so that we can copy the entire + * xstateregs to the user using one user_regset_copyout(). + */ + memcpy(&xsave->i387.sw_reserved, + xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); + /* + * Copy the xstate memory layout. + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + return ret; +} + +int xstateregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct xregs_state *xsave; + int ret; + + if (!cpu_has_xsave) + return -ENODEV; + + fpu__activate_fpstate_write(fpu); + + xsave = &fpu->state.xsave; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + /* + * mxcsr reserved bits must be masked to zero for security reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + xsave->header.xfeatures &= xfeatures_mask; + /* + * These bits must be zero. + */ + memset(&xsave->header.reserved, 0, 48); + + return ret; +} + +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + + return tmp; +} + +#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) +#define FP_EXP_TAG_VALID 0 +#define FP_EXP_TAG_ZERO 1 +#define FP_EXP_TAG_SPECIAL 2 +#define FP_EXP_TAG_EMPTY 3 + +static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave) +{ + struct _fpxreg *st; + u32 tos = (fxsave->swd >> 11) & 7; + u32 twd = (unsigned long) fxsave->twd; + u32 tag; + u32 ret = 0xffff0000u; + int i; + + for (i = 0; i < 8; i++, twd >>= 1) { + if (twd & 0x1) { + st = FPREG_ADDR(fxsave, (i - tos) & 7); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = FP_EXP_TAG_SPECIAL; + break; + case 0x0000: + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) + tag = FP_EXP_TAG_ZERO; + else + tag = FP_EXP_TAG_SPECIAL; + break; + default: + if (st->significand[3] & 0x8000) + tag = FP_EXP_TAG_VALID; + else + tag = FP_EXP_TAG_SPECIAL; + break; + } + } else { + tag = FP_EXP_TAG_EMPTY; + } + ret |= tag << (2 * i); + } + return ret; +} + +/* + * FXSR floating point environment conversions. + */ + +void +convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) +{ + struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; + struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; + struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; + int i; + + env->cwd = fxsave->cwd | 0xffff0000u; + env->swd = fxsave->swd | 0xffff0000u; + env->twd = twd_fxsr_to_i387(fxsave); + +#ifdef CONFIG_X86_64 + env->fip = fxsave->rip; + env->foo = fxsave->rdp; + /* + * should be actually ds/cs at fpu exception time, but + * that information is not available in 64bit mode. + */ + env->fcs = task_pt_regs(tsk)->cs; + if (tsk == current) { + savesegment(ds, env->fos); + } else { + env->fos = tsk->thread.ds; + } + env->fos |= 0xffff0000; +#else + env->fip = fxsave->fip; + env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); + env->foo = fxsave->foo; + env->fos = fxsave->fos; +#endif + + for (i = 0; i < 8; ++i) + memcpy(&to[i], &from[i], sizeof(to[0])); +} + +void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env) + +{ + struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; + struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; + struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; + int i; + + fxsave->cwd = env->cwd; + fxsave->swd = env->swd; + fxsave->twd = twd_i387_to_fxsr(env->twd); + fxsave->fop = (u16) ((u32) env->fcs >> 16); +#ifdef CONFIG_X86_64 + fxsave->rip = env->fip; + fxsave->rdp = env->foo; + /* cs and ds ignored */ +#else + fxsave->fip = env->fip; + fxsave->fcs = (env->fcs & 0xffff); + fxsave->foo = env->foo; + fxsave->fos = env->fos; +#endif + + for (i = 0; i < 8; ++i) + memcpy(&to[i], &from[i], sizeof(from[0])); +} + +int fpregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct user_i387_ia32_struct env; + + fpu__activate_fpstate_read(fpu); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); + + if (!cpu_has_fxsr) + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &fpu->state.fsave, 0, + -1); + + fpstate_sanitize_xstate(fpu); + + if (kbuf && pos == 0 && count == sizeof(env)) { + convert_from_fxsr(kbuf, target); + return 0; + } + + convert_from_fxsr(&env, target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); +} + +int fpregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct user_i387_ia32_struct env; + int ret; + + fpu__activate_fpstate_write(fpu); + fpstate_sanitize_xstate(fpu); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); + + if (!cpu_has_fxsr) + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fpu->state.fsave, 0, + -1); + + if (pos > 0 || count < sizeof(env)) + convert_from_fxsr(&env, target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); + if (!ret) + convert_to_fxsr(target, &env); + + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + fpu->state.xsave.header.xfeatures |= XSTATE_FP; + return ret; +} + +/* + * FPU state for core dumps. + * This is only used for a.out dumps now. + * It is declared generically using elf_fpregset_t (which is + * struct user_i387_struct) but is in fact only used for 32-bit + * dumps, so on 64-bit it is really struct user_i387_ia32_struct. + */ +int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) +{ + struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; + int fpvalid; + + fpvalid = fpu->fpstate_active; + if (fpvalid) + fpvalid = !fpregs_get(tsk, NULL, + 0, sizeof(struct user_i387_ia32_struct), + ufpu, NULL); + + return fpvalid; +} +EXPORT_SYMBOL(dump_fpu); + +#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c new file mode 100644 index 000000000000..50ec9af1bd51 --- /dev/null +++ b/arch/x86/kernel/fpu/signal.c @@ -0,0 +1,404 @@ +/* + * FPU signal frame handling routines. + */ + +#include <linux/compat.h> +#include <linux/cpu.h> + +#include <asm/fpu/internal.h> +#include <asm/fpu/signal.h> +#include <asm/fpu/regset.h> + +#include <asm/sigframe.h> + +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +static inline int check_for_xstate(struct fxregs_state __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) +{ + int min_xstate_size = sizeof(struct fxregs_state) + + sizeof(struct xstate_header); + unsigned int magic2; + + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; + + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; + + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + +/* + * Signal frame handlers. + */ +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) +{ + if (use_fxsr()) { + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; + + convert_from_fxsr(&env, tsk); + + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; + } else { + struct fregs_state __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) + return -1; + } + + return 0; +} + +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +{ + struct xregs_state __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xfeatures; + int err; + + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); + + if (!use_xsave()) + return err; + + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); + + /* + * Read the xfeatures which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); + + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xfeatures in the xsave header. + * + * xsave aware apps can change the xfeatures in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xfeatures |= XSTATE_FPSSE; + + err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); + + return err; +} + +static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) +{ + int err; + + if (use_xsave()) + err = copy_xregs_to_user(buf); + else if (use_fxsr()) + err = copy_fxregs_to_user((struct fxregs_state __user *) buf); + else + err = copy_fregs_to_user((struct fregs_state __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; +} + +/* + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. + */ +int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) +{ + struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; + + if (fpregs_active()) { + /* Save the live register state to the user directly. */ + if (copy_fpregs_to_sigframe(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + copy_fxregs_to_kernel(&tsk->thread.fpu); + } else { + fpstate_sanitize_xstate(&tsk->thread.fpu); + if (__copy_to_user(buf_fx, xsave, xstate_size)) + return -1; + } + + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; + + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + return 0; +} + +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xfeatures, int fx_only) +{ + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; + struct xstate_header *header = &xsave->header; + + if (use_xsave()) { + /* These bits must be zero. */ + memset(header->reserved, 0, 48); + + /* + * Init the state that is not present in the memory + * layout and not enabled by the OS. + */ + if (fx_only) + header->xfeatures = XSTATE_FPSSE; + else + header->xfeatures &= (xfeatures_mask & xfeatures); + } + + if (use_fxsr()) { + /* + * mscsr reserved bits must be masked to zero for security + * reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + + convert_to_fxsr(tsk, ia32_env); + } +} + +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE state. + */ +static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) +{ + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + return copy_user_to_fxregs(buf); + } else { + u64 init_bv = xfeatures_mask & ~xbv; + if (unlikely(init_bv)) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + return copy_user_to_xregs(buf, xbv); + } + } else if (use_fxsr()) { + return copy_user_to_fxregs(buf); + } else + return copy_user_to_fregs(buf); +} + +static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) +{ + int ia32_fxstate = (buf != buf_fx); + struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; + int state_size = xstate_size; + u64 xfeatures = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!buf) { + fpu__clear(fpu); + return 0; + } + + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + fpu__activate_curr(fpu); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; + + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct fxregs_state); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xfeatures = fx_sw_user.xfeatures; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct fpu *fpu = &tsk->thread.fpu; + struct user_i387_ia32_struct env; + int err = 0; + + /* + * Drop the current fpu which clears fpu->fpstate_active. This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * fpu->fpstate_active is again set. + */ + fpu__drop(fpu); + + if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + fpstate_init(&fpu->state); + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); + } + + fpu->fpstate_active = 1; + if (use_eager_fpu()) { + preempt_disable(); + fpu__restore(fpu); + preempt_enable(); + } + + return err; + } else { + /* + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). + */ + user_fpu_begin(); + if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { + fpu__clear(fpu); + return -1; + } + } + + return 0; +} + +static inline int xstate_sigframe_size(void) +{ + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +/* + * Restore FPU state from a sigframe: + */ +int fpu__restore_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct fregs_state); + size += sizeof(struct fregs_state); + } + + return __fpu__restore_sig(buf, buf_fx, size); +} + +unsigned long +fpu__alloc_mathframe(unsigned long sp, int ia32_frame, + unsigned long *buf_fx, unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct fregs_state); + sp -= sizeof(struct fregs_state); + } + + *size = frame_size; + + return sp; +} +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +void fpu__init_prepare_fx_sw_frame(void) +{ + int fsave_header_size = sizeof(struct fregs_state); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; + + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = size; + fx_sw_reserved.xfeatures = xfeatures_mask; + fx_sw_reserved.xstate_size = xstate_size; + + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} + diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c new file mode 100644 index 000000000000..62fc001c7846 --- /dev/null +++ b/arch/x86/kernel/fpu/xstate.c @@ -0,0 +1,461 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha <suresh.b.siddha@intel.com> + */ +#include <linux/compat.h> +#include <linux/cpu.h> + +#include <asm/fpu/api.h> +#include <asm/fpu/internal.h> +#include <asm/fpu/signal.h> +#include <asm/fpu/regset.h> + +#include <asm/tlbflush.h> + +static const char *xfeature_names[] = +{ + "x87 floating point registers" , + "SSE registers" , + "AVX registers" , + "MPX bounds registers" , + "MPX CSR" , + "AVX-512 opmask" , + "AVX-512 Hi256" , + "AVX-512 ZMM_Hi256" , + "unknown xstate feature" , +}; + +/* + * Mask of xstate features supported by the CPU and the kernel: + */ +u64 xfeatures_mask __read_mostly; + +static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; +static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; +static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; + +/* The number of supported xfeatures in xfeatures_mask: */ +static unsigned int xfeatures_nr; + +/* + * Return whether the system supports a given xfeature. + * + * Also return the name of the (most advanced) feature that the caller requested: + */ +int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) +{ + u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; + + if (unlikely(feature_name)) { + long xfeature_idx, max_idx; + u64 xfeatures_print; + /* + * So we use FLS here to be able to print the most advanced + * feature that was requested but is missing. So if a driver + * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the + * missing AVX feature - this is the most informative message + * to users: + */ + if (xfeatures_missing) + xfeatures_print = xfeatures_missing; + else + xfeatures_print = xfeatures_needed; + + xfeature_idx = fls64(xfeatures_print)-1; + max_idx = ARRAY_SIZE(xfeature_names)-1; + xfeature_idx = min(xfeature_idx, max_idx); + + *feature_name = xfeature_names[xfeature_idx]; + } + + if (xfeatures_missing) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(cpu_has_xfeatures); + +/* + * When executing XSAVEOPT (or other optimized XSAVE instructions), if + * a processor implementation detects that an FPU state component is still + * (or is again) in its initialized state, it may clear the corresponding + * bit in the header.xfeatures field, and can skip the writeout of registers + * to the corresponding memory layout. + * + * This means that when the bit is zero, the state component might still contain + * some previous - non-initialized register state. + * + * Before writing xstate information to user-space we sanitize those components, + * to always ensure that the memory layout of a feature will be in the init state + * if the corresponding header bit is zero. This is to ensure that user-space doesn't + * see some stale state in the memory layout during signal handling, debugging etc. + */ +void fpstate_sanitize_xstate(struct fpu *fpu) +{ + struct fxregs_state *fx = &fpu->state.fxsave; + int feature_bit; + u64 xfeatures; + + if (!use_xsaveopt()) + return; + + xfeatures = fpu->state.xsave.header.xfeatures; + + /* + * None of the feature bits are in init state. So nothing else + * to do for us, as the memory layout is up to date. + */ + if ((xfeatures & xfeatures_mask) == xfeatures_mask) + return; + + /* + * FP is in init state + */ + if (!(xfeatures & XSTATE_FP)) { + fx->cwd = 0x37f; + fx->swd = 0; + fx->twd = 0; + fx->fop = 0; + fx->rip = 0; + fx->rdp = 0; + memset(&fx->st_space[0], 0, 128); + } + + /* + * SSE is in init state + */ + if (!(xfeatures & XSTATE_SSE)) + memset(&fx->xmm_space[0], 0, 256); + + /* + * First two features are FPU and SSE, which above we handled + * in a special way already: + */ + feature_bit = 0x2; + xfeatures = (xfeatures_mask & ~xfeatures) >> 2; + + /* + * Update all the remaining memory layouts according to their + * standard xstate layout, if their header bit is in the init + * state: + */ + while (xfeatures) { + if (xfeatures & 0x1) { + int offset = xstate_offsets[feature_bit]; + int size = xstate_sizes[feature_bit]; + + memcpy((void *)fx + offset, + (void *)&init_fpstate.xsave + offset, + size); + } + + xfeatures >>= 1; + feature_bit++; + } +} + +/* + * Enable the extended processor state save/restore feature. + * Called once per CPU onlining. + */ +void fpu__init_cpu_xstate(void) +{ + if (!cpu_has_xsave || !xfeatures_mask) + return; + + cr4_set_bits(X86_CR4_OSXSAVE); + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); +} + +/* + * Record the offsets and sizes of various xstates contained + * in the XSAVE state memory layout. + * + * ( Note that certain features might be non-present, for them + * we'll have 0 offset and 0 size. ) + */ +static void __init setup_xstate_features(void) +{ + u32 eax, ebx, ecx, edx, leaf; + + xfeatures_nr = fls64(xfeatures_mask); + + for (leaf = 2; leaf < xfeatures_nr; leaf++) { + cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); + + xstate_offsets[leaf] = ebx; + xstate_sizes[leaf] = eax; + + printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax); + } +} + +static void __init print_xstate_feature(u64 xstate_mask) +{ + const char *feature_name; + + if (cpu_has_xfeatures(xstate_mask, &feature_name)) + pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name); +} + +/* + * Print out all the supported xstate features: + */ +static void __init print_xstate_features(void) +{ + print_xstate_feature(XSTATE_FP); + print_xstate_feature(XSTATE_SSE); + print_xstate_feature(XSTATE_YMM); + print_xstate_feature(XSTATE_BNDREGS); + print_xstate_feature(XSTATE_BNDCSR); + print_xstate_feature(XSTATE_OPMASK); + print_xstate_feature(XSTATE_ZMM_Hi256); + print_xstate_feature(XSTATE_Hi16_ZMM); +} + +/* + * This function sets up offsets and sizes of all extended states in + * xsave area. This supports both standard format and compacted format + * of the xsave aread. + */ +static void __init setup_xstate_comp(void) +{ + unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; + int i; + + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); + + if (!cpu_has_xsaves) { + for (i = 2; i < xfeatures_nr; i++) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) { + xstate_comp_offsets[i] = xstate_offsets[i]; + xstate_comp_sizes[i] = xstate_sizes[i]; + } + } + return; + } + + xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for (i = 2; i < xfeatures_nr; i++) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) + xstate_comp_sizes[i] = xstate_sizes[i]; + else + xstate_comp_sizes[i] = 0; + + if (i > 2) + xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + + xstate_comp_sizes[i-1]; + + } +} + +/* + * setup the xstate image representing the init state + */ +static void __init setup_init_fpu_buf(void) +{ + static int on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + + if (!cpu_has_xsave) + return; + + setup_xstate_features(); + print_xstate_features(); + + if (cpu_has_xsaves) { + init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; + init_fpstate.xsave.header.xfeatures = xfeatures_mask; + } + + /* + * Init all the features state with header_bv being 0x0 + */ + copy_kernel_to_xregs_booting(&init_fpstate.xsave); + + /* + * Dump the init state again. This is to identify the init state + * of any feature which is not represented by all zero's. + */ + copy_xregs_to_kernel_booting(&init_fpstate.xsave); +} + +/* + * Calculate total size of enabled xstates in XCR0/xfeatures_mask. + */ +static void __init init_xstate_size(void) +{ + unsigned int eax, ebx, ecx, edx; + int i; + + if (!cpu_has_xsaves) { + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xstate_size = ebx; + return; + } + + xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + for (i = 2; i < 64; i++) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) { + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + xstate_size += eax; + } + } +} + +/* + * Enable and initialize the xsave feature. + * Called once per system bootup. + */ +void __init fpu__init_system_xstate(void) +{ + unsigned int eax, ebx, ecx, edx; + static int on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + + if (!cpu_has_xsave) { + pr_info("x86/fpu: Legacy x87 FPU detected.\n"); + return; + } + + if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { + WARN_ON_FPU(1); + return; + } + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xfeatures_mask = eax + ((u64)edx << 32); + + if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); + BUG(); + } + + /* Support only the state known to the OS: */ + xfeatures_mask = xfeatures_mask & XCNTXT_MASK; + + /* Enable xstate instructions to be able to continue with initialization: */ + fpu__init_cpu_xstate(); + + /* Recompute the context size for enabled features: */ + init_xstate_size(); + + update_regset_xstate_info(xstate_size, xfeatures_mask); + fpu__init_prepare_fx_sw_frame(); + setup_init_fpu_buf(); + setup_xstate_comp(); + + pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", + xfeatures_mask, + xstate_size, + cpu_has_xsaves ? "compacted" : "standard"); +} + +/* + * Restore minimal FPU state after suspend: + */ +void fpu__resume_cpu(void) +{ + /* + * Restore XCR0 on xsave capable CPUs: + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); +} + +/* + * Given the xsave area and a state inside, this function returns the + * address of the state. + * + * This is the API that is called to get xstate address in either + * standard format or compacted format of xsave area. + * + * Note that if there is no data for the field in the xsave buffer + * this will return NULL. + * + * Inputs: + * xstate: the thread's storage area for all FPU data + * xstate_feature: state which is defined in xsave.h (e.g. + * XSTATE_FP, XSTATE_SSE, etc...) + * Output: + * address of the state in the xsave area, or NULL if the + * field is not present in the xsave buffer. + */ +void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) +{ + int feature_nr = fls64(xstate_feature) - 1; + /* + * Do we even *have* xsave state? + */ + if (!boot_cpu_has(X86_FEATURE_XSAVE)) + return NULL; + + xsave = ¤t->thread.fpu.state.xsave; + /* + * We should not ever be requesting features that we + * have not enabled. Remember that pcntxt_mask is + * what we write to the XCR0 register. + */ + WARN_ONCE(!(xfeatures_mask & xstate_feature), + "get of unsupported state"); + /* + * This assumes the last 'xsave*' instruction to + * have requested that 'xstate_feature' be saved. + * If it did not, we might be seeing and old value + * of the field in the buffer. + * + * This can happen because the last 'xsave' did not + * request that this feature be saved (unlikely) + * or because the "init optimization" caused it + * to not be saved. + */ + if (!(xsave->header.xfeatures & xstate_feature)) + return NULL; + + return (void *)xsave + xstate_comp_offsets[feature_nr]; +} +EXPORT_SYMBOL_GPL(get_xsave_addr); + +/* + * This wraps up the common operations that need to occur when retrieving + * data from xsave state. It first ensures that the current task was + * using the FPU and retrieves the data in to a buffer. It then calculates + * the offset of the requested field in the buffer. + * + * This function is safe to call whether the FPU is in use or not. + * + * Note that this only works on the current task. + * + * Inputs: + * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP, + * XSTATE_SSE, etc...) + * Output: + * address of the state in the xsave area or NULL if the state + * is not present or is in its 'init state'. + */ +const void *get_xsave_field_ptr(int xsave_state) +{ + struct fpu *fpu = ¤t->thread.fpu; + + if (!fpu->fpstate_active) + return NULL; + /* + * fpu__save() takes the CPU's xstate registers + * and saves them off to the 'fpu memory buffer. + */ + fpu__save(fpu); + + return get_xsave_addr(&fpu->state.xsave, xsave_state); +} diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c deleted file mode 100644 index 6185d3141219..000000000000 --- a/arch/x86/kernel/i387.c +++ /dev/null @@ -1,671 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes <gareth@valinux.com>, May 2000 - */ -#include <linux/module.h> -#include <linux/regset.h> -#include <linux/sched.h> -#include <linux/slab.h> - -#include <asm/sigcontext.h> -#include <asm/processor.h> -#include <asm/math_emu.h> -#include <asm/tlbflush.h> -#include <asm/uaccess.h> -#include <asm/ptrace.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/user.h> - -static DEFINE_PER_CPU(bool, in_kernel_fpu); - -void kernel_fpu_disable(void) -{ - WARN_ON(this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, true); -} - -void kernel_fpu_enable(void) -{ - this_cpu_write(in_kernel_fpu, false); -} - -/* - * Were we in an interrupt that interrupted kernel mode? - * - * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - * - * Except for the eagerfpu case when we return true; in the likely case - * the thread has FPU but we are not going to set/clear TS. - */ -static inline bool interrupted_kernel_fpu_idle(void) -{ - if (this_cpu_read(in_kernel_fpu)) - return false; - - if (use_eager_fpu()) - return true; - - return !__thread_has_fpu(current) && - (read_cr0() & X86_CR0_TS); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static inline bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode(regs); -} - -/* - * Can we use the FPU in kernel mode with the - * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. - */ -bool irq_fpu_usable(void) -{ - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); -} -EXPORT_SYMBOL(irq_fpu_usable); - -void __kernel_fpu_begin(void) -{ - struct task_struct *me = current; - - this_cpu_write(in_kernel_fpu, true); - - if (__thread_has_fpu(me)) { - __save_init_fpu(me); - } else { - this_cpu_write(fpu_owner_task, NULL); - if (!use_eager_fpu()) - clts(); - } -} -EXPORT_SYMBOL(__kernel_fpu_begin); - -void __kernel_fpu_end(void) -{ - struct task_struct *me = current; - - if (__thread_has_fpu(me)) { - if (WARN_ON(restore_fpu_checking(me))) - fpu_reset_state(me); - } else if (!use_eager_fpu()) { - stts(); - } - - this_cpu_write(in_kernel_fpu, false); -} -EXPORT_SYMBOL(__kernel_fpu_end); - -void unlazy_fpu(struct task_struct *tsk) -{ - preempt_disable(); - if (__thread_has_fpu(tsk)) { - if (use_eager_fpu()) { - __save_fpu(tsk); - } else { - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - } - } - preempt_enable(); -} -EXPORT_SYMBOL(unlazy_fpu); - -unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); -static struct i387_fxsave_struct fx_scratch; - -static void mxcsr_feature_mask_init(void) -{ - unsigned long mask = 0; - - if (cpu_has_fxsr) { - memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : "+m" (fx_scratch)); - mask = fx_scratch.mxcsr_mask; - if (mask == 0) - mask = 0x0000ffbf; - } - mxcsr_feature_mask &= mask; -} - -static void init_thread_xstate(void) -{ - /* - * Note that xstate_size might be overwriten later during - * xsave_init(). - */ - - if (!cpu_has_fpu) { - /* - * Disable xsave as we do not support it if i387 - * emulation is enabled. - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct i387_soft_struct); - return; - } - - if (cpu_has_fxsr) - xstate_size = sizeof(struct i387_fxsave_struct); - else - xstate_size = sizeof(struct i387_fsave_struct); - - /* - * Quirk: we don't yet handle the XSAVES* instructions - * correctly, as we don't correctly convert between - * standard and compacted format when interfacing - * with user-space - so disable it for now. - * - * The difference is small: with recent CPUs the - * compacted format is only marginally smaller than - * the standard FPU state format. - * - * ( This is easy to backport while we are fixing - * XSAVES* support. ) - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); -} - -/* - * Called at bootup to set up the initial FPU state that is later cloned - * into all processes. - */ - -void fpu_init(void) -{ - unsigned long cr0; - unsigned long cr4_mask = 0; - -#ifndef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) { - pr_emerg("No FPU found and no math emulation present\n"); - pr_emerg("Giving up\n"); - for (;;) - asm volatile("hlt"); - } -#endif - if (cpu_has_fxsr) - cr4_mask |= X86_CR4_OSFXSR; - if (cpu_has_xmm) - cr4_mask |= X86_CR4_OSXMMEXCPT; - if (cr4_mask) - cr4_set_bits(cr4_mask); - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!cpu_has_fpu) - cr0 |= X86_CR0_EM; - write_cr0(cr0); - - /* - * init_thread_xstate is only called once to avoid overriding - * xstate_size during boot time or during CPU hotplug. - */ - if (xstate_size == 0) - init_thread_xstate(); - - mxcsr_feature_mask_init(); - xsave_init(); - eager_fpu_init(); -} - -void fpu_finit(struct fpu *fpu) -{ - if (!cpu_has_fpu) { - finit_soft_fpu(&fpu->state->soft); - return; - } - - memset(fpu->state, 0, xstate_size); - - if (cpu_has_fxsr) { - fx_finit(&fpu->state->fxsave); - } else { - struct i387_fsave_struct *fp = &fpu->state->fsave; - fp->cwd = 0xffff037fu; - fp->swd = 0xffff0000u; - fp->twd = 0xffffffffu; - fp->fos = 0xffff0000u; - } -} -EXPORT_SYMBOL_GPL(fpu_finit); - -/* - * The _current_ task is using the FPU for the first time - * so initialize it and set the mxcsr to its default - * value at reset if we support XMM instructions and then - * remember the current task has used the FPU. - */ -int init_fpu(struct task_struct *tsk) -{ - int ret; - - if (tsk_used_math(tsk)) { - if (cpu_has_fpu && tsk == current) - unlazy_fpu(tsk); - task_disable_lazy_fpu_restore(tsk); - return 0; - } - - /* - * Memory allocation at the first usage of the FPU and other state. - */ - ret = fpu_alloc(&tsk->thread.fpu); - if (ret) - return ret; - - fpu_finit(&tsk->thread.fpu); - - set_stopped_child_used_math(tsk); - return 0; -} -EXPORT_SYMBOL_GPL(init_fpu); - -/* - * The xstateregs_active() routine is the same as the fpregs_active() routine, - * as the "regset->n" for the xstate regset will be updated based on the feature - * capabilites supported by the xsave. - */ -int fpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - return tsk_used_math(target) ? regset->n : 0; -} - -int xfpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; -} - -int xfpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_fxsr) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); -} - -int xfpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_fxsr) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); - - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - - /* - * update the header bits in the xsave header, indicating the - * presence of FP and SSE state. - */ - if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - return ret; -} - -int xstateregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct xsave_struct *xsave; - int ret; - - if (!cpu_has_xsave) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - xsave = &target->thread.fpu.state->xsave; - - /* - * Copy the 48bytes defined by the software first into the xstate - * memory layout in the thread struct, so that we can copy the entire - * xstateregs to the user using one user_regset_copyout(). - */ - memcpy(&xsave->i387.sw_reserved, - xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - /* - * Copy the xstate memory layout. - */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - return ret; -} - -int xstateregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct xsave_struct *xsave; - int ret; - - if (!cpu_has_xsave) - return -ENODEV; - - ret = init_fpu(target); - if (ret) - return ret; - - xsave = &target->thread.fpu.state->xsave; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - xsave->xsave_hdr.xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - memset(&xsave->xsave_hdr.reserved, 0, 48); - return ret; -} - -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr(unsigned short twd) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - - return tmp; -} - -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) -#define FP_EXP_TAG_VALID 0 -#define FP_EXP_TAG_ZERO 1 -#define FP_EXP_TAG_SPECIAL 2 -#define FP_EXP_TAG_EMPTY 3 - -static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) -{ - struct _fpxreg *st; - u32 tos = (fxsave->swd >> 11) & 7; - u32 twd = (unsigned long) fxsave->twd; - u32 tag; - u32 ret = 0xffff0000u; - int i; - - for (i = 0; i < 8; i++, twd >>= 1) { - if (twd & 0x1) { - st = FPREG_ADDR(fxsave, (i - tos) & 7); - - switch (st->exponent & 0x7fff) { - case 0x7fff: - tag = FP_EXP_TAG_SPECIAL; - break; - case 0x0000: - if (!st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3]) - tag = FP_EXP_TAG_ZERO; - else - tag = FP_EXP_TAG_SPECIAL; - break; - default: - if (st->significand[3] & 0x8000) - tag = FP_EXP_TAG_VALID; - else - tag = FP_EXP_TAG_SPECIAL; - break; - } - } else { - tag = FP_EXP_TAG_EMPTY; - } - ret |= tag << (2 * i); - } - return ret; -} - -/* - * FXSR floating point environment conversions. - */ - -void -convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; - struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - env->cwd = fxsave->cwd | 0xffff0000u; - env->swd = fxsave->swd | 0xffff0000u; - env->twd = twd_fxsr_to_i387(fxsave); - -#ifdef CONFIG_X86_64 - env->fip = fxsave->rip; - env->foo = fxsave->rdp; - /* - * should be actually ds/cs at fpu exception time, but - * that information is not available in 64bit mode. - */ - env->fcs = task_pt_regs(tsk)->cs; - if (tsk == current) { - savesegment(ds, env->fos); - } else { - env->fos = tsk->thread.ds; - } - env->fos |= 0xffff0000; -#else - env->fip = fxsave->fip; - env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); - env->foo = fxsave->foo; - env->fos = fxsave->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(to[0])); -} - -void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) - -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; - struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - fxsave->cwd = env->cwd; - fxsave->swd = env->swd; - fxsave->twd = twd_i387_to_fxsr(env->twd); - fxsave->fop = (u16) ((u32) env->fcs >> 16); -#ifdef CONFIG_X86_64 - fxsave->rip = env->fip; - fxsave->rdp = env->foo; - /* cs and ds ignored */ -#else - fxsave->fip = env->fip; - fxsave->fcs = (env->fcs & 0xffff); - fxsave->foo = env->foo; - fxsave->fos = env->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(from[0])); -} - -int fpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct user_i387_ia32_struct env; - int ret; - - ret = init_fpu(target); - if (ret) - return ret; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, - -1); - - sanitize_i387_state(target); - - if (kbuf && pos == 0 && count == sizeof(env)) { - convert_from_fxsr(kbuf, target); - return 0; - } - - convert_from_fxsr(&env, target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); -} - -int fpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct user_i387_ia32_struct env; - int ret; - - ret = init_fpu(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, - -1); - - if (pos > 0 || count < sizeof(env)) - convert_from_fxsr(&env, target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); - if (!ret) - convert_to_fxsr(target, &env); - - /* - * update the header bit in the xsave header, indicating the - * presence of FP. - */ - if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; - return ret; -} - -/* - * FPU state for core dumps. - * This is only used for a.out dumps now. - * It is declared generically using elf_fpregset_t (which is - * struct user_i387_struct) but is in fact only used for 32-bit - * dumps, so on 64-bit it is really struct user_i387_ia32_struct. - */ -int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) -{ - struct task_struct *tsk = current; - int fpvalid; - - fpvalid = !!used_math(); - if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, - 0, sizeof(struct user_i387_ia32_struct), - fpu, NULL); - - return fpvalid; -} -EXPORT_SYMBOL(dump_fpu); - -#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ - -static int __init no_387(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FPU); - return 1; -} - -__setup("no387", no_387); - -void fpu_detect(struct cpuinfo_x86 *c) -{ - unsigned long cr0; - u16 fsw, fcw; - - fsw = fcw = 0xffff; - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS | X86_CR0_EM); - write_cr0(cr0); - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); - - /* The final cr0 value is set in fpu_init() */ -} diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c648139d68d7..9cad694ed7c4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -25,8 +25,7 @@ #include <asm/idle.h> #include <asm/uaccess.h> #include <asm/mwait.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> #include <asm/debugreg.h> #include <asm/nmi.h> #include <asm/tlbflush.h> @@ -76,9 +75,6 @@ void idle_notifier_unregister(struct notifier_block *n) EXPORT_SYMBOL_GPL(idle_notifier_unregister); #endif -struct kmem_cache *task_xstate_cachep; -EXPORT_SYMBOL_GPL(task_xstate_cachep); - /* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. @@ -87,36 +83,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; - dst->thread.fpu_counter = 0; - dst->thread.fpu.has_fpu = 0; - dst->thread.fpu.state = NULL; - task_disable_lazy_fpu_restore(dst); - if (tsk_used_math(src)) { - int err = fpu_alloc(&dst->thread.fpu); - if (err) - return err; - fpu_copy(dst, src); - } - return 0; -} - -void free_thread_xstate(struct task_struct *tsk) -{ - fpu_free(&tsk->thread.fpu); -} - -void arch_release_task_struct(struct task_struct *tsk) -{ - free_thread_xstate(tsk); -} - -void arch_task_cache_init(void) -{ - task_xstate_cachep = - kmem_cache_create("task_xstate", xstate_size, - __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); - setup_xstate_comp(); + return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } /* @@ -127,6 +94,7 @@ void exit_thread(void) struct task_struct *me = current; struct thread_struct *t = &me->thread; unsigned long *bp = t->io_bitmap_ptr; + struct fpu *fpu = &t->fpu; if (bp) { struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); @@ -142,7 +110,7 @@ void exit_thread(void) kfree(bp); } - drop_fpu(me); + fpu__drop(fpu); } void flush_thread(void) @@ -152,19 +120,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - if (!use_eager_fpu()) { - /* FPU state will be reallocated lazily at the first use. */ - drop_fpu(tsk); - free_thread_xstate(tsk); - } else { - if (!tsk_used_math(tsk)) { - /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(init_fpu(tsk))) - force_sig(SIGKILL, tsk); - user_fpu_begin(); - } - restore_init_xstate(); - } + fpu__clear(&tsk->thread.fpu); } static void hard_disable_TSC(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106b06da..deff651835b4 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -39,8 +39,7 @@ #include <asm/pgtable.h> #include <asm/ldt.h> #include <asm/processor.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> #include <asm/desc.h> #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> @@ -242,14 +241,16 @@ __visible __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, - *next = &next_p->thread; + *next = &next_p->thread; + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(cpu_tss, cpu); - fpu_switch_t fpu; + fpu_switch_t fpu_switch; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - fpu = switch_fpu_prepare(prev_p, next_p, cpu); + fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -296,7 +297,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up + * done before fpu__restore(), so the TS bit is up * to date. */ arch_end_context_switch(next_p); @@ -319,7 +320,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) lazy_load_gs(next->gs); - switch_fpu_finish(next_p, fpu); + switch_fpu_finish(next_fpu, fpu_switch); this_cpu_write(current_task, next_p); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf74f174..c50e013b57d2 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -38,8 +38,7 @@ #include <asm/pgtable.h> #include <asm/processor.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> #include <asm/mmu_context.h> #include <asm/prctl.h> #include <asm/desc.h> @@ -274,12 +273,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(cpu_tss, cpu); unsigned fsindex, gsindex; - fpu_switch_t fpu; + fpu_switch_t fpu_switch; - fpu = switch_fpu_prepare(prev_p, next_p, cpu); + fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -299,7 +300,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Leave lazy mode, flushing any hypercalls made here. This * must be done after loading TLS entries in the GDT but before * loading segments that might reference them, and and it must - * be done before math_state_restore, so the TS bit is up to + * be done before fpu__restore(), so the TS bit is up to * date. */ arch_end_context_switch(next_p); @@ -391,7 +392,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); prev->gsindex = gsindex; - switch_fpu_finish(next_p, fpu); + switch_fpu_finish(next_fpu, fpu_switch); /* * Switch the PDA and FPU contexts. diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a7bc79480719..9be72bc3613f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -11,7 +11,6 @@ #include <linux/errno.h> #include <linux/slab.h> #include <linux/ptrace.h> -#include <linux/regset.h> #include <linux/tracehook.h> #include <linux/user.h> #include <linux/elf.h> @@ -28,8 +27,9 @@ #include <asm/uaccess.h> #include <asm/pgtable.h> #include <asm/processor.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> +#include <asm/fpu/signal.h> +#include <asm/fpu/regset.h> #include <asm/debugreg.h> #include <asm/ldt.h> #include <asm/desc.h> @@ -1297,7 +1297,7 @@ static struct user_regset x86_64_regsets[] __read_mostly = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, @@ -1338,13 +1338,13 @@ static struct user_regset x86_32_regsets[] __read_mostly = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = fpregs_active, .get = fpregs_get, .set = fpregs_set + .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1ea14fd53933..206996c1669d 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -26,8 +26,8 @@ #include <asm/processor.h> #include <asm/ucontext.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> +#include <asm/fpu/signal.h> #include <asm/vdso.h> #include <asm/mce.h> #include <asm/sighandling.h> @@ -103,7 +103,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) get_user_ex(buf, &sc->fpstate); } get_user_catch(err); - err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); + err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32)); force_iret(); @@ -199,6 +199,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long sp = regs->sp; unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); + struct fpu *fpu = ¤t->thread.fpu; /* redzone */ if (config_enabled(CONFIG_X86_64)) @@ -218,9 +219,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } } - if (used_math()) { - sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), - &buf_fx, &math_size); + if (fpu->fpstate_active) { + sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32), + &buf_fx, &math_size); *fpstate = (void __user *)sp; } @@ -234,8 +235,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if (used_math() && - save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) + if (fpu->fpstate_active && + copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; return (void __user *)sp; @@ -593,6 +594,22 @@ badframe: return 0; } +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) { @@ -617,6 +634,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { bool stepping, failed; + struct fpu *fpu = ¤t->thread.fpu; /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { @@ -665,8 +683,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (used_math()) - fpu_reset_state(current); + if (fpu->fpstate_active) + fpu__clear(fpu); } signal_setup_done(failed, ksig, stepping); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0e8209619455..6d4bfea25874 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -68,8 +68,7 @@ #include <asm/mwait.h> #include <asm/apic.h> #include <asm/io_apic.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> #include <asm/setup.h> #include <asm/uv/uv.h> #include <linux/mc146818rtc.h> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab5247687..36cb15b7b367 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -54,12 +54,13 @@ #include <asm/ftrace.h> #include <asm/traps.h> #include <asm/desc.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> #include <asm/mce.h> #include <asm/fixmap.h> #include <asm/mach_traps.h> #include <asm/alternative.h> +#include <asm/fpu/xstate.h> +#include <asm/trace/mpx.h> #include <asm/mpx.h> #ifdef CONFIG_X86_64 @@ -371,10 +372,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; - struct xsave_struct *xsave_buf; enum ctx_state prev_state; - struct bndcsr *bndcsr; + const struct bndcsr *bndcsr; siginfo_t *info; prev_state = exception_enter(); @@ -393,15 +392,15 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) /* * We need to look at BNDSTATUS to resolve this exception. - * It is not directly accessible, though, so we need to - * do an xsave and then pull it out of the xsave buffer. + * A NULL here might mean that it is in its 'init state', + * which is all zeros which indicates MPX was not + * responsible for the exception. */ - fpu_save_init(&tsk->thread.fpu); - xsave_buf = &(tsk->thread.fpu.state->xsave); - bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); if (!bndcsr) goto exit_trap; + trace_bounds_exception_mpx(bndcsr); /* * The error code field of the BNDSTATUS register communicates status * information of a bound range exception #BR or operation involving @@ -409,11 +408,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) */ switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { case 2: /* Bound directory has invalid entry. */ - if (mpx_handle_bd_fault(xsave_buf)) + if (mpx_handle_bd_fault()) goto exit_trap; break; /* Success, it was handled */ case 1: /* Bound violation. */ - info = mpx_generate_siginfo(regs, xsave_buf); + info = mpx_generate_siginfo(regs); if (IS_ERR(info)) { /* * We failed to decode the MPX instruction. Act as if @@ -709,8 +708,8 @@ NOKPROBE_SYMBOL(do_debug); static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; + struct fpu *fpu = &task->thread.fpu; siginfo_t info; - unsigned short err; char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; @@ -718,8 +717,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) return; conditional_sti(regs); - if (!user_mode(regs)) - { + if (!user_mode(regs)) { if (!fixup_exception(regs)) { task->thread.error_code = error_code; task->thread.trap_nr = trapnr; @@ -731,62 +729,20 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) /* * Save the info for the exception handler and clear the error. */ - unlazy_fpu(task); - task->thread.trap_nr = trapnr; + fpu__save(fpu); + + task->thread.trap_nr = trapnr; task->thread.error_code = error_code; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)uprobe_get_trap_addr(regs); - if (trapnr == X86_TRAP_MF) { - unsigned short cwd, swd; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't synchronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); - err = swd & ~cwd; - } else { - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - unsigned short mxcsr = get_fpu_mxcsr(task); - err = ~(mxcsr >> 7) & mxcsr; - } + info.si_code = fpu__exception_code(fpu, trapnr); - if (err & 0x001) { /* Invalid op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - info.si_code = FPE_FLTINV; - } else if (err & 0x004) { /* Divide by Zero */ - info.si_code = FPE_FLTDIV; - } else if (err & 0x008) { /* Overflow */ - info.si_code = FPE_FLTOVF; - } else if (err & 0x012) { /* Denormal, Underflow */ - info.si_code = FPE_FLTUND; - } else if (err & 0x020) { /* Precision */ - info.si_code = FPE_FLTRES; - } else { - /* - * If we're using IRQ 13, or supposedly even some trap - * X86_TRAP_MF implementations, it's possible - * we get a spurious trap, which is not an error. - */ + /* Retry when we get spurious exceptions: */ + if (!info.si_code) return; - } + force_sig_info(SIGFPE, &info, task); } @@ -827,48 +783,6 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) { } -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - * - * Must be called with kernel preemption disabled (eg with local - * local interrupts as in the case of do_device_not_available). - */ -void math_state_restore(void) -{ - struct task_struct *tsk = current; - - if (!tsk_used_math(tsk)) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (init_fpu(tsk)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } - - /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ - kernel_fpu_disable(); - __thread_fpu_begin(tsk); - if (unlikely(restore_fpu_checking(tsk))) { - fpu_reset_state(tsk); - force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); - } else { - tsk->thread.fpu_counter++; - } - kernel_fpu_enable(); -} -EXPORT_SYMBOL_GPL(math_state_restore); - dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { @@ -889,7 +803,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) return; } #endif - math_state_restore(); /* interrupts still off */ + fpu__restore(¤t->thread.fpu); /* interrupts still off */ #ifdef CONFIG_X86_32 conditional_sti(regs); #endif diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0b81ad67da07..66476244731e 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -29,6 +29,7 @@ #include <linux/kdebug.h> #include <asm/processor.h> #include <asm/insn.h> +#include <asm/mmu_context.h> /* Post-execution fixups. */ @@ -312,11 +313,6 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool } #ifdef CONFIG_X86_64 -static inline bool is_64bit_mm(struct mm_struct *mm) -{ - return !config_enabled(CONFIG_IA32_EMULATION) || - !(mm->context.ia32_compat == TIF_IA32); -} /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses @@ -497,10 +493,6 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) } } #else /* 32-bit: */ -static inline bool is_64bit_mm(struct mm_struct *mm) -{ - return false; -} /* * No RIP-relative addressing on 32-bit */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c deleted file mode 100644 index 87a815b85f3e..000000000000 --- a/arch/x86/kernel/xsave.c +++ /dev/null @@ -1,724 +0,0 @@ -/* - * xsave/xrstor support. - * - * Author: Suresh Siddha <suresh.b.siddha@intel.com> - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/bootmem.h> -#include <linux/compat.h> -#include <linux/cpu.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> -#include <asm/sigframe.h> -#include <asm/tlbflush.h> -#include <asm/xcr.h> - -/* - * Supported feature mask by the CPU and the kernel. - */ -u64 pcntxt_mask; - -/* - * Represents init state for the supported extended state. - */ -struct xsave_struct *init_xstate_buf; - -static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; -static unsigned int *xstate_offsets, *xstate_sizes; -static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; -static unsigned int xstate_features; - -/* - * If a processor implementation discern that a processor state component is - * in its initialized state it may modify the corresponding bit in the - * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory - * layout in the case of xsaveopt. While presenting the xstate information to - * the user, we always ensure that the memory layout of a feature will be in - * the init state if the corresponding header bit is zero. This is to ensure - * that the user doesn't see some stale state in the memory layout during - * signal handling, debugging etc. - */ -void __sanitize_i387_state(struct task_struct *tsk) -{ - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - int feature_bit = 0x2; - u64 xstate_bv; - - if (!fx) - return; - - xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; - - /* - * None of the feature bits are in init state. So nothing else - * to do for us, as the memory layout is up to date. - */ - if ((xstate_bv & pcntxt_mask) == pcntxt_mask) - return; - - /* - * FP is in init state - */ - if (!(xstate_bv & XSTATE_FP)) { - fx->cwd = 0x37f; - fx->swd = 0; - fx->twd = 0; - fx->fop = 0; - fx->rip = 0; - fx->rdp = 0; - memset(&fx->st_space[0], 0, 128); - } - - /* - * SSE is in init state - */ - if (!(xstate_bv & XSTATE_SSE)) - memset(&fx->xmm_space[0], 0, 256); - - xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; - - /* - * Update all the other memory layouts for which the corresponding - * header bit is in the init state. - */ - while (xstate_bv) { - if (xstate_bv & 0x1) { - int offset = xstate_offsets[feature_bit]; - int size = xstate_sizes[feature_bit]; - - memcpy(((void *) fx) + offset, - ((void *) init_xstate_buf) + offset, - size); - } - - xstate_bv >>= 1; - feature_bit++; - } -} - -/* - * Check for the presence of extended state information in the - * user fpstate pointer in the sigcontext. - */ -static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw) -{ - int min_xstate_size = sizeof(struct i387_fxsave_struct) + - sizeof(struct xsave_hdr_struct); - unsigned int magic2; - - if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) - return -1; - - /* Check for the first magic field and other error scenarios. */ - if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || - fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > xstate_size || - fx_sw->xstate_size > fx_sw->extended_size) - return -1; - - /* - * Check for the presence of second magic word at the end of memory - * layout. This detects the case where the user just copied the legacy - * fpstate layout with out copying the extended state information - * in the memory layout. - */ - if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) - || magic2 != FP_XSTATE_MAGIC2) - return -1; - - return 0; -} - -/* - * Signal frame handlers. - */ -static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) -{ - if (use_fxsr()) { - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; - struct user_i387_ia32_struct env; - struct _fpstate_ia32 __user *fp = buf; - - convert_from_fxsr(&env, tsk); - - if (__copy_to_user(buf, &env, sizeof(env)) || - __put_user(xsave->i387.swd, &fp->status) || - __put_user(X86_FXSR_MAGIC, &fp->magic)) - return -1; - } else { - struct i387_fsave_struct __user *fp = buf; - u32 swd; - if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) - return -1; - } - - return 0; -} - -static inline int save_xstate_epilog(void __user *buf, int ia32_frame) -{ - struct xsave_struct __user *x = buf; - struct _fpx_sw_bytes *sw_bytes; - u32 xstate_bv; - int err; - - /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ - sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; - err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - - if (!use_xsave()) - return err; - - err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - - /* - * Read the xstate_bv which we copied (directly from the cpu or - * from the state in task struct) to the user buffers. - */ - err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware apps can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xstate_bv |= XSTATE_FPSSE; - - err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - - return err; -} - -static inline int save_user_xstate(struct xsave_struct __user *buf) -{ - int err; - - if (use_xsave()) - err = xsave_user(buf); - else if (use_fxsr()) - err = fxsave_user((struct i387_fxsave_struct __user *) buf); - else - err = fsave_user((struct i387_fsave_struct __user *) buf); - - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - return err; -} - -/* - * Save the fpu, extended register state to the user signal frame. - * - * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save - * state is copied. - * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. - * - * buf == buf_fx for 64-bit frames and 32-bit fsave frame. - * buf != buf_fx for 32-bit frames with fxstate. - * - * If the fpu, extended register state is live, save the state directly - * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, - * copy the thread's fpu state to the user frame starting at 'buf_fx'. - * - * If this is a 32-bit frame with fxstate, put a fsave header before - * the aligned state at 'buf_fx'. - * - * For [f]xsave state, update the SW reserved fields in the [f]xsave frame - * indicating the absence/presence of the extended state to the user. - */ -int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) -{ - struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; - struct task_struct *tsk = current; - int ia32_fxstate = (buf != buf_fx); - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!access_ok(VERIFY_WRITE, buf, size)) - return -EACCES; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), NULL, - (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - - if (user_has_fpu()) { - /* Save the live register state to the user directly. */ - if (save_user_xstate(buf_fx)) - return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - fpu_fxsave(&tsk->thread.fpu); - } else { - sanitize_i387_state(tsk); - if (__copy_to_user(buf_fx, xsave, xstate_size)) - return -1; - } - - /* Save the fsave header for the 32-bit frames. */ - if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) - return -1; - - if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) - return -1; - - return 0; -} - -static inline void -sanitize_restored_xstate(struct task_struct *tsk, - struct user_i387_ia32_struct *ia32_env, - u64 xstate_bv, int fx_only) -{ - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; - struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; - - if (use_xsave()) { - /* These bits must be zero. */ - memset(xsave_hdr->reserved, 0, 48); - - /* - * Init the state that is not present in the memory - * layout and not enabled by the OS. - */ - if (fx_only) - xsave_hdr->xstate_bv = XSTATE_FPSSE; - else - xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); - } - - if (use_fxsr()) { - /* - * mscsr reserved bits must be masked to zero for security - * reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - - convert_to_fxsr(tsk, ia32_env); - } -} - -/* - * Restore the extended state if present. Otherwise, restore the FP/SSE state. - */ -static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) -{ - if (use_xsave()) { - if ((unsigned long)buf % 64 || fx_only) { - u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; - xrstor_state(init_xstate_buf, init_bv); - return fxrstor_user(buf); - } else { - u64 init_bv = pcntxt_mask & ~xbv; - if (unlikely(init_bv)) - xrstor_state(init_xstate_buf, init_bv); - return xrestore_user(buf, xbv); - } - } else if (use_fxsr()) { - return fxrstor_user(buf); - } else - return frstor_user(buf); -} - -int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) -{ - int ia32_fxstate = (buf != buf_fx); - struct task_struct *tsk = current; - int state_size = xstate_size; - u64 xstate_bv = 0; - int fx_only = 0; - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!buf) { - fpu_reset_state(tsk); - return 0; - } - - if (!access_ok(VERIFY_READ, buf, size)) - return -EACCES; - - if (!used_math() && init_fpu(tsk)) - return -1; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; - - if (use_xsave()) { - struct _fpx_sw_bytes fx_sw_user; - if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { - /* - * Couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - state_size = sizeof(struct i387_fxsave_struct); - fx_only = 1; - } else { - state_size = fx_sw_user.xstate_size; - xstate_bv = fx_sw_user.xstate_bv; - } - } - - if (ia32_fxstate) { - /* - * For 32-bit frames with fxstate, copy the user state to the - * thread's fpu state, reconstruct fxstate from the fsave - * header. Sanitize the copied state etc. - */ - struct fpu *fpu = &tsk->thread.fpu; - struct user_i387_ia32_struct env; - int err = 0; - - /* - * Drop the current fpu which clears used_math(). This ensures - * that any context-switch during the copy of the new state, - * avoids the intermediate state from getting restored/saved. - * Thus avoiding the new restored state from getting corrupted. - * We will be ready to restore/save the state only after - * set_used_math() is again set. - */ - drop_fpu(tsk); - - if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { - fpu_finit(fpu); - err = -1; - } else { - sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); - } - - set_used_math(); - if (use_eager_fpu()) { - preempt_disable(); - math_state_restore(); - preempt_enable(); - } - - return err; - } else { - /* - * For 64-bit frames and 32-bit fsave frames, restore the user - * state to the registers directly (with exceptions handled). - */ - user_fpu_begin(); - if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { - fpu_reset_state(tsk); - return -1; - } - } - - return 0; -} - -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -static void prepare_fx_sw_frame(void) -{ - int fsave_header_size = sizeof(struct i387_fsave_struct); - int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - - if (config_enabled(CONFIG_X86_32)) - size += fsave_header_size; - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = size; - fx_sw_reserved.xstate_bv = pcntxt_mask; - fx_sw_reserved.xstate_size = xstate_size; - - if (config_enabled(CONFIG_IA32_EMULATION)) { - fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size += fsave_header_size; - } -} - -/* - * Enable the extended processor state save/restore feature - */ -static inline void xstate_enable(void) -{ - cr4_set_bits(X86_CR4_OSXSAVE); - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); -} - -/* - * Record the offsets and sizes of different state managed by the xsave - * memory layout. - */ -static void __init setup_xstate_features(void) -{ - int eax, ebx, ecx, edx, leaf = 0x2; - - xstate_features = fls64(pcntxt_mask); - xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); - xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); - - do { - cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); - - if (eax == 0) - break; - - xstate_offsets[leaf] = ebx; - xstate_sizes[leaf] = eax; - - leaf++; - } while (1); -} - -/* - * This function sets up offsets and sizes of all extended states in - * xsave area. This supports both standard format and compacted format - * of the xsave aread. - * - * Input: void - * Output: void - */ -void setup_xstate_comp(void) -{ - unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; - int i; - - /* - * The FP xstates and SSE xstates are legacy states. They are always - * in the fixed offsets in the xsave area in either compacted form - * or standard form. - */ - xstate_comp_offsets[0] = 0; - xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); - - if (!cpu_has_xsaves) { - for (i = 2; i < xstate_features; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) { - xstate_comp_offsets[i] = xstate_offsets[i]; - xstate_comp_sizes[i] = xstate_sizes[i]; - } - } - return; - } - - xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; - - for (i = 2; i < xstate_features; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) - xstate_comp_sizes[i] = xstate_sizes[i]; - else - xstate_comp_sizes[i] = 0; - - if (i > 2) - xstate_comp_offsets[i] = xstate_comp_offsets[i-1] - + xstate_comp_sizes[i-1]; - - } -} - -/* - * setup the xstate image representing the init state - */ -static void __init setup_init_fpu_buf(void) -{ - /* - * Setup init_xstate_buf to represent the init state of - * all the features managed by the xsave - */ - init_xstate_buf = alloc_bootmem_align(xstate_size, - __alignof__(struct xsave_struct)); - fx_finit(&init_xstate_buf->i387); - - if (!cpu_has_xsave) - return; - - setup_xstate_features(); - - if (cpu_has_xsaves) { - init_xstate_buf->xsave_hdr.xcomp_bv = - (u64)1 << 63 | pcntxt_mask; - init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; - } - - /* - * Init all the features state with header_bv being 0x0 - */ - xrstor_state_booting(init_xstate_buf, -1); - /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. - */ - xsave_state_booting(init_xstate_buf, -1); -} - -static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -static int __init eager_fpu_setup(char *s) -{ - if (!strcmp(s, "on")) - eagerfpu = ENABLE; - else if (!strcmp(s, "off")) - eagerfpu = DISABLE; - else if (!strcmp(s, "auto")) - eagerfpu = AUTO; - return 1; -} -__setup("eagerfpu=", eager_fpu_setup); - - -/* - * Calculate total size of enabled xstates in XCR0/pcntxt_mask. - */ -static void __init init_xstate_size(void) -{ - unsigned int eax, ebx, ecx, edx; - int i; - - if (!cpu_has_xsaves) { - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; - return; - } - - xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = 2; i < 64; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) { - cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); - xstate_size += eax; - } - } -} - -/* - * Enable and initialize the xsave feature. - */ -static void __init xstate_enable_boot_cpu(void) -{ - unsigned int eax, ebx, ecx, edx; - - if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { - WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); - return; - } - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - pcntxt_mask = eax + ((u64)edx << 32); - - if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - pr_err("FP/SSE not shown under xsave features 0x%llx\n", - pcntxt_mask); - BUG(); - } - - /* - * Support only the state known to OS. - */ - pcntxt_mask = pcntxt_mask & XCNTXT_MASK; - - xstate_enable(); - - /* - * Recompute the context size for enabled features - */ - init_xstate_size(); - - update_regset_xstate_info(xstate_size, pcntxt_mask); - prepare_fx_sw_frame(); - setup_init_fpu_buf(); - - /* Auto enable eagerfpu for xsaveopt */ - if (cpu_has_xsaveopt && eagerfpu != DISABLE) - eagerfpu = ENABLE; - - if (pcntxt_mask & XSTATE_EAGER) { - if (eagerfpu == DISABLE) { - pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n", - pcntxt_mask & XSTATE_EAGER); - pcntxt_mask &= ~XSTATE_EAGER; - } else { - eagerfpu = ENABLE; - } - } - - pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", - pcntxt_mask, xstate_size, - cpu_has_xsaves ? "compacted form" : "standard form"); -} - -/* - * For the very first instance, this calls xstate_enable_boot_cpu(); - * for all subsequent instances, this calls xstate_enable(). - * - * This is somewhat obfuscated due to the lack of powerful enough - * overrides for the section checks. - */ -void xsave_init(void) -{ - static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; - void (*this_func)(void); - - if (!cpu_has_xsave) - return; - - this_func = next_func; - next_func = xstate_enable; - this_func(); -} - -/* - * setup_init_fpu_buf() is __init and it is OK to call it here because - * init_xstate_buf will be unset only once during boot. - */ -void __init_refok eager_fpu_init(void) -{ - WARN_ON(used_math()); - current_thread_info()->status = 0; - - if (eagerfpu == ENABLE) - setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); - - if (!cpu_has_eager_fpu) { - stts(); - return; - } - - if (!init_xstate_buf) - setup_init_fpu_buf(); -} - -/* - * Given the xsave area and a state inside, this function returns the - * address of the state. - * - * This is the API that is called to get xstate address in either - * standard format or compacted format of xsave area. - * - * Inputs: - * xsave: base address of the xsave area; - * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, - * etc.) - * Output: - * address of the state in the xsave area. - */ -void *get_xsave_addr(struct xsave_struct *xsave, int xstate) -{ - int feature = fls64(xstate) - 1; - if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) - return NULL; - - return (void *)xsave + xstate_comp_offsets[feature]; -} -EXPORT_SYMBOL_GPL(get_xsave_addr); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1d08ad3582d0..9f705e618af5 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,10 +16,8 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/uaccess.h> -#include <asm/i387.h> /* For use_eager_fpu. Ugh! */ -#include <asm/fpu-internal.h> /* For use_eager_fpu. Ugh! */ #include <asm/user.h> -#include <asm/xsave.h> +#include <asm/fpu/xstate.h> #include "cpuid.h" #include "lapic.h" #include "mmu.h" diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2d73807f0d31..e11dd59398f1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -40,8 +40,7 @@ #include <asm/vmx.h> #include <asm/virtext.h> #include <asm/mce.h> -#include <asm/i387.h> -#include <asm/xcr.h> +#include <asm/fpu/internal.h> #include <asm/perf_event.h> #include <asm/debugreg.h> #include <asm/kexec.h> @@ -1883,7 +1882,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) * If the FPU is not active (through the host task or * the guest vcpu), then restore the cr0.TS bit. */ - if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) + if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded) stts(); load_gdt(this_cpu_ptr(&host_gdt)); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ea306adbbc13..26eaeb522cab 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -59,9 +59,8 @@ #include <asm/desc.h> #include <asm/mtrr.h> #include <asm/mce.h> -#include <asm/i387.h> -#include <asm/fpu-internal.h> /* Ugh! */ -#include <asm/xcr.h> +#include <linux/kernel_stat.h> +#include <asm/fpu/internal.h> /* Ugh! */ #include <asm/pvclock.h> #include <asm/div64.h> @@ -3194,8 +3193,8 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) { - struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; - u64 xstate_bv = xsave->xsave_hdr.xstate_bv; + struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; + u64 xstate_bv = xsave->header.xfeatures; u64 valid; /* @@ -3230,7 +3229,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) { - struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; + struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); u64 valid; @@ -3241,9 +3240,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) memcpy(xsave, src, XSAVE_HDR_OFFSET); /* Set XSTATE_BV and possibly XCOMP_BV. */ - xsave->xsave_hdr.xstate_bv = xstate_bv; + xsave->header.xfeatures = xstate_bv; if (cpu_has_xsaves) - xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; + xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; /* * Copy each region from the non-compacted offset to the @@ -3275,8 +3274,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, fill_xsave((u8 *) guest_xsave->region, vcpu); } else { memcpy(guest_xsave->region, - &vcpu->arch.guest_fpu.state->fxsave, - sizeof(struct i387_fxsave_struct)); + &vcpu->arch.guest_fpu.state.fxsave, + sizeof(struct fxregs_state)); *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = XSTATE_FPSSE; } @@ -3300,8 +3299,8 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, } else { if (xstate_bv & ~XSTATE_FPSSE) return -EINVAL; - memcpy(&vcpu->arch.guest_fpu.state->fxsave, - guest_xsave->region, sizeof(struct i387_fxsave_struct)); + memcpy(&vcpu->arch.guest_fpu.state.fxsave, + guest_xsave->region, sizeof(struct fxregs_state)); } return 0; } @@ -6597,11 +6596,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct fpu *fpu = ¤t->thread.fpu; int r; sigset_t sigsaved; - if (!tsk_used_math(current) && init_fpu(current)) - return -ENOMEM; + fpu__activate_curr(fpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); @@ -6971,8 +6970,8 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct i387_fxsave_struct *fxsave = - &vcpu->arch.guest_fpu.state->fxsave; + struct fxregs_state *fxsave = + &vcpu->arch.guest_fpu.state.fxsave; memcpy(fpu->fpr, fxsave->st_space, 128); fpu->fcw = fxsave->cwd; @@ -6988,8 +6987,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct i387_fxsave_struct *fxsave = - &vcpu->arch.guest_fpu.state->fxsave; + struct fxregs_state *fxsave = + &vcpu->arch.guest_fpu.state.fxsave; memcpy(fxsave->st_space, fpu->fpr, 128); fxsave->cwd = fpu->fcw; @@ -7003,17 +7002,11 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } -int fx_init(struct kvm_vcpu *vcpu) +static void fx_init(struct kvm_vcpu *vcpu) { - int err; - - err = fpu_alloc(&vcpu->arch.guest_fpu); - if (err) - return err; - - fpu_finit(&vcpu->arch.guest_fpu); + fpstate_init(&vcpu->arch.guest_fpu.state); if (cpu_has_xsaves) - vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = + vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; /* @@ -7022,14 +7015,6 @@ int fx_init(struct kvm_vcpu *vcpu) vcpu->arch.xcr0 = XSTATE_FP; vcpu->arch.cr0 |= X86_CR0_ET; - - return 0; -} -EXPORT_SYMBOL_GPL(fx_init); - -static void fx_free(struct kvm_vcpu *vcpu) -{ - fpu_free(&vcpu->arch.guest_fpu); } void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) @@ -7045,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - fpu_restore_checking(&vcpu->arch.guest_fpu); + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); trace_kvm_fpu(1); } @@ -7057,7 +7042,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; - fpu_save_init(&vcpu->arch.guest_fpu); + copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; if (!vcpu->arch.eager_fpu) @@ -7071,7 +7056,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvmclock_reset(vcpu); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); - fx_free(vcpu); kvm_x86_ops->vcpu_free(vcpu); } @@ -7137,7 +7121,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); vcpu_put(vcpu); - fx_free(vcpu); kvm_x86_ops->vcpu_free(vcpu); } @@ -7363,9 +7346,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_mce_banks; } - r = fx_init(vcpu); - if (r) - goto fail_free_wbinvd_dirty_mask; + fx_init(vcpu); vcpu->arch.ia32_tsc_adjust_msr = 0x0; vcpu->arch.pv_time_enabled = false; @@ -7379,8 +7360,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_pmu_init(vcpu); return 0; -fail_free_wbinvd_dirty_mask: - free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); + fail_free_mce_banks: kfree(vcpu->arch.mce_banks); fail_free_lapic: diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133cc099..27f8eea0d6eb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -70,7 +70,7 @@ #include <asm/e820.h> #include <asm/mce.h> #include <asm/io.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #include <asm/stackprotector.h> #include <asm/reboot.h> /* for struct machine_ops */ #include <asm/kvm_para.h> diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index c9f2d9ba8dd8..e5e3ed8dc079 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -22,7 +22,7 @@ #include <linux/sched.h> #include <linux/types.h> -#include <asm/i387.h> +#include <asm/fpu/api.h> #include <asm/asm.h> void *_mmx_memcpy(void *to, const void *from, size_t len) diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index dc8adad10a2f..dd76a05729b0 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -30,7 +30,7 @@ static void fclex(void) } /* Needs to be externally visible */ -void finit_soft_fpu(struct i387_soft_struct *soft) +void fpstate_init_soft(struct swregs_state *soft) { struct address *oaddr, *iaddr; memset(soft, 0, sizeof(*soft)); @@ -52,7 +52,7 @@ void finit_soft_fpu(struct i387_soft_struct *soft) void finit(void) { - finit_soft_fpu(¤t->thread.fpu.state->soft); + fpstate_init_soft(¤t->thread.fpu.state.soft); } /* diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9b868124128d..f37e84ab49f3 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -31,7 +31,7 @@ #include <asm/traps.h> #include <asm/desc.h> #include <asm/user.h> -#include <asm/i387.h> +#include <asm/fpu/internal.h> #include "fpu_system.h" #include "fpu_emu.h" @@ -147,13 +147,9 @@ void math_emulate(struct math_emu_info *info) unsigned long code_base = 0; unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ struct desc_struct code_descriptor; + struct fpu *fpu = ¤t->thread.fpu; - if (!used_math()) { - if (init_fpu(current)) { - do_group_exit(SIGKILL); - return; - } - } + fpu__activate_curr(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { @@ -673,7 +669,7 @@ void math_abort(struct math_emu_info *info, unsigned int signal) #endif /* PARANOID */ } -#define S387 ((struct i387_soft_struct *)s387) +#define S387 ((struct swregs_state *)s387) #define sstatus_word() \ ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) @@ -682,14 +678,14 @@ int fpregs_soft_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; + struct swregs_state *s387 = &target->thread.fpu.state.soft; void *space = s387->st_space; int ret; int offset, other, i, tags, regnr, tag, newtop; RE_ENTRANT_CHECK_OFF; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0, - offsetof(struct i387_soft_struct, st_space)); + offsetof(struct swregs_state, st_space)); RE_ENTRANT_CHECK_ON; if (ret) @@ -734,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; + struct swregs_state *s387 = &target->thread.fpu.state.soft; const void *space = s387->st_space; int ret; int offset = (S387->ftop & 7) * 10, other = 80 - offset; @@ -752,7 +748,7 @@ int fpregs_soft_get(struct task_struct *target, #endif /* PECULIAR_486 */ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0, - offsetof(struct i387_soft_struct, st_space)); + offsetof(struct swregs_state, st_space)); /* Copy all registers in stack order. */ if (!ret) diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 2c614410a5f3..9ccecb61a4fa 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -31,7 +31,7 @@ #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ == (1 << 10)) -#define I387 (current->thread.fpu.state) +#define I387 (¤t->thread.fpu.state) #define FPU_info (I387->soft.info) #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c439ec478216..7a657f58bbea 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -10,13 +10,15 @@ #include <linux/syscalls.h> #include <linux/sched/sysctl.h> -#include <asm/i387.h> #include <asm/insn.h> #include <asm/mman.h> #include <asm/mmu_context.h> #include <asm/mpx.h> #include <asm/processor.h> -#include <asm/fpu-internal.h> +#include <asm/fpu/internal.h> + +#define CREATE_TRACE_POINTS +#include <asm/trace/mpx.h> static const char *mpx_mapping_name(struct vm_area_struct *vma) { @@ -32,6 +34,22 @@ static int is_mpx_vma(struct vm_area_struct *vma) return (vma->vm_ops == &mpx_vma_ops); } +static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BD_SIZE_BYTES_64; + else + return MPX_BD_SIZE_BYTES_32; +} + +static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BT_SIZE_BYTES_64; + else + return MPX_BT_SIZE_BYTES_32; +} + /* * This is really a simplified "vm_mmap". it only handles MPX * bounds tables (the bounds directory is user-allocated). @@ -47,8 +65,8 @@ static unsigned long mpx_mmap(unsigned long len) vm_flags_t vm_flags; struct vm_area_struct *vma; - /* Only bounds table and bounds directory can be allocated here */ - if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES) + /* Only bounds table can be allocated here */ + if (len != mpx_bt_size_bytes(mm)) return -EINVAL; down_write(&mm->mmap_sem); @@ -272,10 +290,9 @@ bad_opcode: * * The caller is expected to kfree() the returned siginfo_t. */ -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xsave_struct *xsave_buf) +siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) { - struct bndreg *bndregs, *bndreg; + const struct bndreg *bndregs, *bndreg; siginfo_t *info = NULL; struct insn insn; uint8_t bndregno; @@ -295,8 +312,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, err = -EINVAL; goto err_out; } - /* get the bndregs _area_ of the xsave structure */ - bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS); + /* get bndregs field from current task's xsave area */ + bndregs = get_xsave_field_ptr(XSTATE_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; @@ -334,6 +351,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, err = -EINVAL; goto err_out; } + trace_mpx_bounds_register_exception(info->si_addr, bndreg); return info; err_out: /* info might be NULL, but kfree() handles that */ @@ -341,25 +359,18 @@ err_out: return ERR_PTR(err); } -static __user void *task_get_bounds_dir(struct task_struct *tsk) +static __user void *mpx_get_bounds_dir(void) { - struct bndcsr *bndcsr; + const struct bndcsr *bndcsr; if (!cpu_feature_enabled(X86_FEATURE_MPX)) return MPX_INVALID_BOUNDS_DIR; /* - * 32-bit binaries on 64-bit kernels are currently - * unsupported. - */ - if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32)) - return MPX_INVALID_BOUNDS_DIR; - /* * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - fpu_save_init(&tsk->thread.fpu); - bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; @@ -378,10 +389,10 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); } -int mpx_enable_management(struct task_struct *tsk) +int mpx_enable_management(void) { void __user *bd_base = MPX_INVALID_BOUNDS_DIR; - struct mm_struct *mm = tsk->mm; + struct mm_struct *mm = current->mm; int ret = 0; /* @@ -390,11 +401,12 @@ int mpx_enable_management(struct task_struct *tsk) * directory into XSAVE/XRSTOR Save Area and enable MPX through * XRSTOR instruction. * - * fpu_xsave() is expected to be very expensive. Storing the bounds - * directory here means that we do not have to do xsave in the unmap - * path; we can just use mm->bd_addr instead. + * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is + * expected to be relatively expensive. Storing the bounds + * directory here means that we do not have to do xsave in the + * unmap path; we can just use mm->bd_addr instead. */ - bd_base = task_get_bounds_dir(tsk); + bd_base = mpx_get_bounds_dir(); down_write(&mm->mmap_sem); mm->bd_addr = bd_base; if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) @@ -404,7 +416,7 @@ int mpx_enable_management(struct task_struct *tsk) return ret; } -int mpx_disable_management(struct task_struct *tsk) +int mpx_disable_management(void) { struct mm_struct *mm = current->mm; @@ -417,29 +429,59 @@ int mpx_disable_management(struct task_struct *tsk) return 0; } +static int mpx_cmpxchg_bd_entry(struct mm_struct *mm, + unsigned long *curval, + unsigned long __user *addr, + unsigned long old_val, unsigned long new_val) +{ + int ret; + /* + * user_atomic_cmpxchg_inatomic() actually uses sizeof() + * the pointer that we pass to it to figure out how much + * data to cmpxchg. We have to be careful here not to + * pass a pointer to a 64-bit data type when we only want + * a 32-bit copy. + */ + if (is_64bit_mm(mm)) { + ret = user_atomic_cmpxchg_inatomic(curval, + addr, old_val, new_val); + } else { + u32 uninitialized_var(curval_32); + u32 old_val_32 = old_val; + u32 new_val_32 = new_val; + u32 __user *addr_32 = (u32 __user *)addr; + + ret = user_atomic_cmpxchg_inatomic(&curval_32, + addr_32, old_val_32, new_val_32); + *curval = curval_32; + } + return ret; +} + /* - * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each - * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, + * With 32-bit mode, a bounds directory is 4MB, and the size of each + * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB, * and the size of each bounds table is 4MB. */ -static int allocate_bt(long __user *bd_entry) +static int allocate_bt(struct mm_struct *mm, long __user *bd_entry) { unsigned long expected_old_val = 0; unsigned long actual_old_val = 0; unsigned long bt_addr; + unsigned long bd_new_entry; int ret = 0; /* * Carve the virtual space out of userspace for the new * bounds table: */ - bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); + bt_addr = mpx_mmap(mpx_bt_size_bytes(mm)); if (IS_ERR((void *)bt_addr)) return PTR_ERR((void *)bt_addr); /* * Set the valid flag (kinda like _PAGE_PRESENT in a pte) */ - bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG; + bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG; /* * Go poke the address of the new bounds table in to the @@ -452,8 +494,8 @@ static int allocate_bt(long __user *bd_entry) * mmap_sem at this point, unlike some of the other part * of the MPX code that have to pagefault_disable(). */ - ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, - expected_old_val, bt_addr); + ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry, + expected_old_val, bd_new_entry); if (ret) goto out_unmap; @@ -481,9 +523,10 @@ static int allocate_bt(long __user *bd_entry) ret = -EINVAL; goto out_unmap; } + trace_mpx_new_bounds_table(bt_addr); return 0; out_unmap: - vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES); + vm_munmap(bt_addr, mpx_bt_size_bytes(mm)); return ret; } @@ -498,12 +541,13 @@ out_unmap: * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, * and the size of each bound table is 4MB. */ -static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) +static int do_mpx_bt_fault(void) { unsigned long bd_entry, bd_base; - struct bndcsr *bndcsr; + const struct bndcsr *bndcsr; + struct mm_struct *mm = current->mm; - bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); if (!bndcsr) return -EINVAL; /* @@ -520,13 +564,13 @@ static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) * the directory is. */ if ((bd_entry < bd_base) || - (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) + (bd_entry >= bd_base + mpx_bd_size_bytes(mm))) return -EINVAL; - return allocate_bt((long __user *)bd_entry); + return allocate_bt(mm, (long __user *)bd_entry); } -int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) +int mpx_handle_bd_fault(void) { /* * Userspace never asked us to manage the bounds tables, @@ -535,7 +579,7 @@ int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) if (!kernel_managing_mpx_tables(current->mm)) return -EINVAL; - if (do_mpx_bt_fault(xsave_buf)) { + if (do_mpx_bt_fault()) { force_sig(SIGSEGV, current); /* * The force_sig() is essentially "handling" this @@ -572,29 +616,55 @@ static int mpx_resolve_fault(long __user *addr, int write) return 0; } +static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, + unsigned long bd_entry) +{ + unsigned long bt_addr = bd_entry; + int align_to_bytes; + /* + * Bit 0 in a bt_entry is always the valid bit. + */ + bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG; + /* + * Tables are naturally aligned at 8-byte boundaries + * on 64-bit and 4-byte boundaries on 32-bit. The + * documentation makes it appear that the low bits + * are ignored by the hardware, so we do the same. + */ + if (is_64bit_mm(mm)) + align_to_bytes = 8; + else + align_to_bytes = 4; + bt_addr &= ~(align_to_bytes-1); + return bt_addr; +} + /* * Get the base of bounds tables pointed by specific bounds * directory entry. */ static int get_bt_addr(struct mm_struct *mm, - long __user *bd_entry, unsigned long *bt_addr) + long __user *bd_entry_ptr, + unsigned long *bt_addr_result) { int ret; int valid_bit; + unsigned long bd_entry; + unsigned long bt_addr; - if (!access_ok(VERIFY_READ, (bd_entry), sizeof(*bd_entry))) + if (!access_ok(VERIFY_READ, (bd_entry_ptr), sizeof(*bd_entry_ptr))) return -EFAULT; while (1) { int need_write = 0; pagefault_disable(); - ret = get_user(*bt_addr, bd_entry); + ret = get_user(bd_entry, bd_entry_ptr); pagefault_enable(); if (!ret) break; if (ret == -EFAULT) - ret = mpx_resolve_fault(bd_entry, need_write); + ret = mpx_resolve_fault(bd_entry_ptr, need_write); /* * If we could not resolve the fault, consider it * userspace's fault and error out. @@ -603,8 +673,8 @@ static int get_bt_addr(struct mm_struct *mm, return ret; } - valid_bit = *bt_addr & MPX_BD_ENTRY_VALID_FLAG; - *bt_addr &= MPX_BT_ADDR_MASK; + valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG; + bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry); /* * When the kernel is managing bounds tables, a bounds directory @@ -613,7 +683,7 @@ static int get_bt_addr(struct mm_struct *mm, * data in the address field, we know something is wrong. This * -EINVAL return will cause a SIGSEGV. */ - if (!valid_bit && *bt_addr) + if (!valid_bit && bt_addr) return -EINVAL; /* * Do we have an completely zeroed bt entry? That is OK. It @@ -624,19 +694,100 @@ static int get_bt_addr(struct mm_struct *mm, if (!valid_bit) return -ENOENT; + *bt_addr_result = bt_addr; return 0; } +static inline int bt_entry_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BT_ENTRY_BYTES_64; + else + return MPX_BT_ENTRY_BYTES_32; +} + +/* + * Take a virtual address and turns it in to the offset in bytes + * inside of the bounds table where the bounds table entry + * controlling 'addr' can be found. + */ +static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, + unsigned long addr) +{ + unsigned long bt_table_nr_entries; + unsigned long offset = addr; + + if (is_64bit_mm(mm)) { + /* Bottom 3 bits are ignored on 64-bit */ + offset >>= 3; + bt_table_nr_entries = MPX_BT_NR_ENTRIES_64; + } else { + /* Bottom 2 bits are ignored on 32-bit */ + offset >>= 2; + bt_table_nr_entries = MPX_BT_NR_ENTRIES_32; + } + /* + * We know the size of the table in to which we are + * indexing, and we have eliminated all the low bits + * which are ignored for indexing. + * + * Mask out all the high bits which we do not need + * to index in to the table. Note that the tables + * are always powers of two so this gives us a proper + * mask. + */ + offset &= (bt_table_nr_entries-1); + /* + * We now have an entry offset in terms of *entries* in + * the table. We need to scale it back up to bytes. + */ + offset *= bt_entry_size_bytes(mm); + return offset; +} + +/* + * How much virtual address space does a single bounds + * directory entry cover? + * + * Note, we need a long long because 4GB doesn't fit in + * to a long on 32-bit. + */ +static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) +{ + unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits); + if (is_64bit_mm(mm)) + return virt_space / MPX_BD_NR_ENTRIES_64; + else + return virt_space / MPX_BD_NR_ENTRIES_32; +} + /* * Free the backing physical pages of bounds table 'bt_addr'. * Assume start...end is within that bounds table. */ -static int zap_bt_entries(struct mm_struct *mm, +static noinline int zap_bt_entries_mapping(struct mm_struct *mm, unsigned long bt_addr, - unsigned long start, unsigned long end) + unsigned long start_mapping, unsigned long end_mapping) { struct vm_area_struct *vma; unsigned long addr, len; + unsigned long start; + unsigned long end; + + /* + * if we 'end' on a boundary, the offset will be 0 which + * is not what we want. Back it up a byte to get the + * last bt entry. Then once we have the entry itself, + * move 'end' back up by the table entry size. + */ + start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping); + end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1); + /* + * Move end back up by one entry. Among other things + * this ensures that it remains page-aligned and does + * not screw up zap_page_range() + */ + end += bt_entry_size_bytes(mm); /* * Find the first overlapping vma. If vma->vm_start > start, there @@ -648,7 +799,7 @@ static int zap_bt_entries(struct mm_struct *mm, return -EINVAL; /* - * A NUMA policy on a VM_MPX VMA could cause this bouds table to + * A NUMA policy on a VM_MPX VMA could cause this bounds table to * be split. So we need to look across the entire 'start -> end' * range of this bounds table, find all of the VM_MPX VMAs, and * zap only those. @@ -666,27 +817,65 @@ static int zap_bt_entries(struct mm_struct *mm, len = min(vma->vm_end, end) - addr; zap_page_range(vma, addr, len, NULL); + trace_mpx_unmap_zap(addr, addr+len); vma = vma->vm_next; addr = vma->vm_start; } - return 0; } -static int unmap_single_bt(struct mm_struct *mm, +static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, + unsigned long addr) +{ + /* + * There are several ways to derive the bd offsets. We + * use the following approach here: + * 1. We know the size of the virtual address space + * 2. We know the number of entries in a bounds table + * 3. We know that each entry covers a fixed amount of + * virtual address space. + * So, we can just divide the virtual address by the + * virtual space used by one entry to determine which + * entry "controls" the given virtual address. + */ + if (is_64bit_mm(mm)) { + int bd_entry_size = 8; /* 64-bit pointer */ + /* + * Take the 64-bit addressing hole in to account. + */ + addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1); + return (addr / bd_entry_virt_space(mm)) * bd_entry_size; + } else { + int bd_entry_size = 4; /* 32-bit pointer */ + /* + * 32-bit has no hole so this case needs no mask + */ + return (addr / bd_entry_virt_space(mm)) * bd_entry_size; + } + /* + * The two return calls above are exact copies. If we + * pull out a single copy and put it in here, gcc won't + * realize that we're doing a power-of-2 divide and use + * shifts. It uses a real divide. If we put them up + * there, it manages to figure it out (gcc 4.8.3). + */ +} + +static int unmap_entire_bt(struct mm_struct *mm, long __user *bd_entry, unsigned long bt_addr) { unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; - unsigned long actual_old_val = 0; + unsigned long uninitialized_var(actual_old_val); int ret; while (1) { int need_write = 1; + unsigned long cleared_bd_entry = 0; pagefault_disable(); - ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, - expected_old_val, 0); + ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, + bd_entry, expected_old_val, cleared_bd_entry); pagefault_enable(); if (!ret) break; @@ -705,9 +894,8 @@ static int unmap_single_bt(struct mm_struct *mm, if (actual_old_val != expected_old_val) { /* * Someone else raced with us to unmap the table. - * There was no bounds table pointed to by the - * directory, so declare success. Somebody freed - * it. + * That is OK, since we were both trying to do + * the same thing. Declare success. */ if (!actual_old_val) return 0; @@ -720,176 +908,113 @@ static int unmap_single_bt(struct mm_struct *mm, */ return -EINVAL; } - /* * Note, we are likely being called under do_munmap() already. To * avoid recursion, do_munmap() will check whether it comes * from one bounds table through VM_MPX flag. */ - return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES); + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); } -/* - * If the bounds table pointed by bounds directory 'bd_entry' is - * not shared, unmap this whole bounds table. Otherwise, only free - * those backing physical pages of bounds table entries covered - * in this virtual address region start...end. - */ -static int unmap_shared_bt(struct mm_struct *mm, - long __user *bd_entry, unsigned long start, - unsigned long end, bool prev_shared, bool next_shared) +static int try_unmap_single_bt(struct mm_struct *mm, + unsigned long start, unsigned long end) { - unsigned long bt_addr; - int ret; - - ret = get_bt_addr(mm, bd_entry, &bt_addr); + struct vm_area_struct *next; + struct vm_area_struct *prev; /* - * We could see an "error" ret for not-present bounds - * tables (not really an error), or actual errors, but - * stop unmapping either way. + * "bta" == Bounds Table Area: the area controlled by the + * bounds table that we are unmapping. */ - if (ret) - return ret; - - if (prev_shared && next_shared) - ret = zap_bt_entries(mm, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); - else if (prev_shared) - ret = zap_bt_entries(mm, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), - bt_addr+MPX_BT_SIZE_BYTES); - else if (next_shared) - ret = zap_bt_entries(mm, bt_addr, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); - else - ret = unmap_single_bt(mm, bd_entry, bt_addr); - - return ret; -} - -/* - * A virtual address region being munmap()ed might share bounds table - * with adjacent VMAs. We only need to free the backing physical - * memory of these shared bounds tables entries covered in this virtual - * address region. - */ -static int unmap_edge_bts(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ + unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1); + unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm); + unsigned long uninitialized_var(bt_addr); + void __user *bde_vaddr; int ret; - long __user *bde_start, *bde_end; - struct vm_area_struct *prev, *next; - bool prev_shared = false, next_shared = false; - - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); - /* - * Check whether bde_start and bde_end are shared with adjacent - * VMAs. - * - * We already unliked the VMAs from the mm's rbtree so 'start' + * We already unlinked the VMAs from the mm's rbtree so 'start' * is guaranteed to be in a hole. This gets us the first VMA * before the hole in to 'prev' and the next VMA after the hole * in to 'next'. */ next = find_vma_prev(mm, start, &prev); - if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1)) - == bde_start) - prev_shared = true; - if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start)) - == bde_end) - next_shared = true; - /* - * This virtual address region being munmap()ed is only - * covered by one bounds table. - * - * In this case, if this table is also shared with adjacent - * VMAs, only part of the backing physical memory of the bounds - * table need be freeed. Otherwise the whole bounds table need - * be unmapped. - */ - if (bde_start == bde_end) { - return unmap_shared_bt(mm, bde_start, start, end, - prev_shared, next_shared); + * Do not count other MPX bounds table VMAs as neighbors. + * Although theoretically possible, we do not allow bounds + * tables for bounds tables so our heads do not explode. + * If we count them as neighbors here, we may end up with + * lots of tables even though we have no actual table + * entries in use. + */ + while (next && is_mpx_vma(next)) + next = next->vm_next; + while (prev && is_mpx_vma(prev)) + prev = prev->vm_prev; + /* + * We know 'start' and 'end' lie within an area controlled + * by a single bounds table. See if there are any other + * VMAs controlled by that bounds table. If there are not + * then we can "expand" the are we are unmapping to possibly + * cover the entire table. + */ + next = find_vma_prev(mm, start, &prev); + if ((!prev || prev->vm_end <= bta_start_vaddr) && + (!next || next->vm_start >= bta_end_vaddr)) { + /* + * No neighbor VMAs controlled by same bounds + * table. Try to unmap the whole thing + */ + start = bta_start_vaddr; + end = bta_end_vaddr; } + bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); + ret = get_bt_addr(mm, bde_vaddr, &bt_addr); /* - * If more than one bounds tables are covered in this virtual - * address region being munmap()ed, we need to separately check - * whether bde_start and bde_end are shared with adjacent VMAs. + * No bounds table there, so nothing to unmap. */ - ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false); - if (ret) - return ret; - ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared); + if (ret == -ENOENT) { + ret = 0; + return 0; + } if (ret) return ret; - - return 0; + /* + * We are unmapping an entire table. Either because the + * unmap that started this whole process was large enough + * to cover an entire table, or that the unmap was small + * but was the area covered by a bounds table. + */ + if ((start == bta_start_vaddr) && + (end == bta_end_vaddr)) + return unmap_entire_bt(mm, bde_vaddr, bt_addr); + return zap_bt_entries_mapping(mm, bt_addr, start, end); } static int mpx_unmap_tables(struct mm_struct *mm, unsigned long start, unsigned long end) { - int ret; - long __user *bd_entry, *bde_start, *bde_end; - unsigned long bt_addr; - - /* - * "Edge" bounds tables are those which are being used by the region - * (start -> end), but that may be shared with adjacent areas. If they - * turn out to be completely unshared, they will be freed. If they are - * shared, we will free the backing store (like an MADV_DONTNEED) for - * areas used by this region. - */ - ret = unmap_edge_bts(mm, start, end); - switch (ret) { - /* non-present tables are OK */ - case 0: - case -ENOENT: - /* Success, or no tables to unmap */ - break; - case -EINVAL: - case -EFAULT: - default: - return ret; - } - - /* - * Only unmap the bounds table that are - * 1. fully covered - * 2. not at the edges of the mapping, even if full aligned - */ - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); - for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) { - ret = get_bt_addr(mm, bd_entry, &bt_addr); - switch (ret) { - case 0: - break; - case -ENOENT: - /* No table here, try the next one */ - continue; - case -EINVAL: - case -EFAULT: - default: - /* - * Note: we are being strict here. - * Any time we run in to an issue - * unmapping tables, we stop and - * SIGSEGV. - */ - return ret; - } - - ret = unmap_single_bt(mm, bd_entry, bt_addr); + unsigned long one_unmap_start; + trace_mpx_unmap_search(start, end); + + one_unmap_start = start; + while (one_unmap_start < end) { + int ret; + unsigned long next_unmap_start = ALIGN(one_unmap_start+1, + bd_entry_virt_space(mm)); + unsigned long one_unmap_end = end; + /* + * if the end is beyond the current bounds table, + * move it back so we only deal with a single one + * at a time + */ + if (one_unmap_end > next_unmap_start) + one_unmap_end = next_unmap_start; + ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end); if (ret) return ret; - } + one_unmap_start = next_unmap_start; + } return 0; } diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 757678fb26e1..0d7dd1f5ac36 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -18,10 +18,9 @@ #include <asm/mtrr.h> #include <asm/page.h> #include <asm/mce.h> -#include <asm/xcr.h> #include <asm/suspend.h> +#include <asm/fpu/internal.h> #include <asm/debugreg.h> -#include <asm/fpu-internal.h> /* pcntxt_mask */ #include <asm/cpu.h> #ifdef CONFIG_X86_32 @@ -155,6 +154,8 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ + + fpu__resume_cpu(); } /** @@ -221,12 +222,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); #endif - /* - * restore XCR0 for xsave capable cpu's. - */ - if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); - fix_processor_context(); do_fpu_end(); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ead3060..98088bf5906a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1423,7 +1423,7 @@ static void xen_pvh_set_cr_flags(int cpu) return; /* * For BSP, PSE PGE are set in probe_page_size_mask(), for APs - * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. + * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). */ if (cpu_has_pse) cr4_set_bits_and_update_boot(X86_CR4_PSE); |