diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/include/asm/i387.h | 53 | ||||
-rw-r--r-- | arch/x86/include/asm/processor.h | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 5 | ||||
-rw-r--r-- | arch/x86/kernel/process_32.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/process_64.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 40 |
6 files changed, 57 insertions, 50 deletions
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a850b4d8d14d..247904945d3f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -29,10 +29,11 @@ extern unsigned int sig_xstate_size; extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); extern int init_fpu(struct task_struct *child); -extern void __math_state_restore(struct task_struct *); extern void math_state_restore(void); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); + extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; @@ -269,6 +270,16 @@ static inline int fpu_restore_checking(struct fpu *fpu) static inline int restore_fpu_checking(struct task_struct *tsk) { + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception + is pending. Clear the x87 state here by setting it to fixed + values. "m" is a random variable that should be in L1 */ + alternative_input( + ASM_NOP8 ASM_NOP2, + "emms\n\t" /* clear stack tags */ + "fildl %P[addr]", /* set F?P to defined value */ + X86_FEATURE_FXSAVE_LEAK, + [addr] "m" (tsk->thread.fpu.has_fpu)); + return fpu_restore_checking(&tsk->thread.fpu); } @@ -279,19 +290,21 @@ static inline int restore_fpu_checking(struct task_struct *tsk) */ static inline int __thread_has_fpu(struct task_struct *tsk) { - return tsk->thread.has_fpu; + return tsk->thread.fpu.has_fpu; } /* Must be paired with an 'stts' after! */ static inline void __thread_clear_has_fpu(struct task_struct *tsk) { - tsk->thread.has_fpu = 0; + tsk->thread.fpu.has_fpu = 0; + percpu_write(fpu_owner_task, NULL); } /* Must be paired with a 'clts' before! */ static inline void __thread_set_has_fpu(struct task_struct *tsk) { - tsk->thread.has_fpu = 1; + tsk->thread.fpu.has_fpu = 1; + percpu_write(fpu_owner_task, tsk); } /* @@ -336,30 +349,36 @@ typedef struct { int preload; } fpu_switch_t; * We don't do that yet, so "fpu_lazy_restore()" always returns * false, but some day.. */ -#define fpu_lazy_restore(tsk) (0) -#define fpu_lazy_state_intact(tsk) do { } while (0) +static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) +{ + return new == percpu_read_stable(fpu_owner_task) && + cpu == new->thread.fpu.last_cpu; +} -static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new) +static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) { fpu_switch_t fpu; fpu.preload = tsk_used_math(new) && new->fpu_counter > 5; if (__thread_has_fpu(old)) { - if (__save_init_fpu(old)) - fpu_lazy_state_intact(old); - __thread_clear_has_fpu(old); - old->fpu_counter++; + if (!__save_init_fpu(old)) + cpu = ~0; + old->thread.fpu.last_cpu = cpu; + old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */ /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { + new->fpu_counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); } else stts(); } else { old->fpu_counter = 0; + old->thread.fpu.last_cpu = ~0; if (fpu.preload) { - if (fpu_lazy_restore(new)) + new->fpu_counter++; + if (fpu_lazy_restore(new, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); @@ -377,8 +396,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta */ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { - if (fpu.preload) - __math_state_restore(new); + if (fpu.preload) { + if (unlikely(restore_fpu_checking(new))) + __thread_fpu_end(new); + } } /* @@ -451,8 +472,10 @@ static inline void kernel_fpu_begin(void) __save_init_fpu(me); __thread_clear_has_fpu(me); /* We do 'stts()' in kernel_fpu_end() */ - } else + } else { + percpu_write(fpu_owner_task, NULL); clts(); + } } static inline void kernel_fpu_end(void) diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f7c89e231c6c..58545c97d071 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -374,6 +374,8 @@ union thread_xstate { }; struct fpu { + unsigned int last_cpu; + unsigned int has_fpu; union thread_xstate *state; }; @@ -454,7 +456,6 @@ struct thread_struct { unsigned long trap_no; unsigned long error_code; /* floating point and extended processor state */ - unsigned long has_fpu; struct fpu fpu; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d43cad74f166..c0f7d68d318f 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1044,6 +1044,9 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) = -1; +DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +EXPORT_PER_CPU_SYMBOL(fpu_owner_task); + /* * Special IST stacks which the CPU switches to when it calls * an IST-marked descriptor entry. Up to 7 stacks (hardware @@ -1111,6 +1114,8 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +EXPORT_PER_CPU_SYMBOL(fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 80bfe1ab0031..c08d1ff12b7c 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -214,6 +214,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; @@ -303,7 +304,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - fpu = switch_fpu_prepare(prev_p, next_p); + fpu = switch_fpu_prepare(prev_p, next_p, cpu); /* * Reload esp0. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 1fd94bc4279d..cfa5c90c01db 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -286,6 +286,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, set_tsk_thread_flag(p, TIF_FORK); + p->fpu_counter = 0; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); @@ -388,7 +389,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unsigned fsindex, gsindex; fpu_switch_t fpu; - fpu = switch_fpu_prepare(prev_p, next_p); + fpu = switch_fpu_prepare(prev_p, next_p, cpu); /* * Reload esp0, LDT and the page table pointer: diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 77da5b475ad2..4bbe04d96744 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -571,37 +571,6 @@ asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) } /* - * This gets called with the process already owning the - * FPU state, and with CR0.TS cleared. It just needs to - * restore the FPU register state. - */ -void __math_state_restore(struct task_struct *tsk) -{ - /* We need a safe address that is cheap to find and that is already - in L1. We've just brought in "tsk->thread.has_fpu", so use that */ -#define safe_address (tsk->thread.has_fpu) - - /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. safe_address is a random variable that should be in L1 */ - alternative_input( - ASM_NOP8 ASM_NOP2, - "emms\n\t" /* clear stack tags */ - "fildl %P[addr]", /* set F?P to defined value */ - X86_FEATURE_FXSAVE_LEAK, - [addr] "m" (safe_address)); - - /* - * Paranoid restore. send a SIGSEGV if we fail to restore the state. - */ - if (unlikely(restore_fpu_checking(tsk))) { - __thread_fpu_end(tsk); - force_sig(SIGSEGV, tsk); - return; - } -} - -/* * 'math_state_restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * @@ -631,7 +600,14 @@ void math_state_restore(void) } __thread_fpu_begin(tsk); - __math_state_restore(tsk); + /* + * Paranoid restore. send a SIGSEGV if we fail to restore the state. + */ + if (unlikely(restore_fpu_checking(tsk))) { + __thread_fpu_end(tsk); + force_sig(SIGSEGV, tsk); + return; + } tsk->fpu_counter++; } |