diff options
Diffstat (limited to 'arch/x86')
28 files changed, 342 insertions, 156 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d700811785ea..c70684f859e1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1506,6 +1506,8 @@ config EFI_STUB This kernel feature allows a bzImage to be loaded directly by EFI firmware without the use of a bootloader. + See Documentation/x86/efi-stub.txt for more information. + config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 2c14e76bb4c7..4e85f5f85837 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -16,6 +16,26 @@ static efi_system_table_t *sys_table; +static void efi_printk(char *str) +{ + char *s8; + + for (s8 = str; *s8; s8++) { + struct efi_simple_text_output_protocol *out; + efi_char16_t ch[2] = { 0 }; + + ch[0] = *s8; + out = (struct efi_simple_text_output_protocol *)sys_table->con_out; + + if (*s8 == '\n') { + efi_char16_t nl[2] = { '\r', 0 }; + efi_call_phys2(out->output_string, out, nl); + } + + efi_call_phys2(out->output_string, out, ch); + } +} + static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size, unsigned long *desc_size) { @@ -531,8 +551,10 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, EFI_LOADER_DATA, nr_initrds * sizeof(*initrds), &initrds); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc mem for initrds\n"); goto fail; + } str = (char *)(unsigned long)hdr->cmd_line_ptr; for (i = 0; i < nr_initrds; i++) { @@ -575,32 +597,42 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image, status = efi_call_phys3(boottime->handle_protocol, image->device_handle, &fs_proto, &io); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to handle fs_proto\n"); goto free_initrds; + } status = efi_call_phys2(io->open_volume, io, &fh); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to open volume\n"); goto free_initrds; + } } status = efi_call_phys5(fh->open, fh, &h, filename_16, EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to open initrd file\n"); goto close_handles; + } initrd->handle = h; info_sz = 0; status = efi_call_phys4(h->get_info, h, &info_guid, &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) + if (status != EFI_BUFFER_TOO_SMALL) { + efi_printk("Failed to get initrd info size\n"); goto close_handles; + } grow: status = efi_call_phys3(sys_table->boottime->allocate_pool, EFI_LOADER_DATA, info_sz, &info); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc mem for initrd info\n"); goto close_handles; + } status = efi_call_phys4(h->get_info, h, &info_guid, &info_sz, info); @@ -612,8 +644,10 @@ grow: file_sz = info->file_size; efi_call_phys1(sys_table->boottime->free_pool, info); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to get initrd info\n"); goto close_handles; + } initrd->size = file_sz; initrd_total += file_sz; @@ -629,11 +663,14 @@ grow: */ status = high_alloc(initrd_total, 0x1000, &initrd_addr, hdr->initrd_addr_max); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc highmem for initrds\n"); goto close_handles; + } /* We've run out of free low memory. */ if (initrd_addr > hdr->initrd_addr_max) { + efi_printk("We've run out of free low memory\n"); status = EFI_INVALID_PARAMETER; goto free_initrd_total; } @@ -652,8 +689,10 @@ grow: status = efi_call_phys3(fh->read, initrds[j].handle, &chunksize, addr); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to read initrd\n"); goto free_initrd_total; + } addr += chunksize; size -= chunksize; } @@ -674,7 +713,7 @@ free_initrd_total: low_free(initrd_total, initrd_addr); close_handles: - for (k = j; k < nr_initrds; k++) + for (k = j; k < i; k++) efi_call_phys1(fh->close, initrds[k].handle); free_initrds: efi_call_phys1(sys_table->boottime->free_pool, initrds); @@ -732,8 +771,10 @@ static efi_status_t make_boot_params(struct boot_params *boot_params, options_size++; /* NUL termination */ status = low_alloc(options_size, 1, &cmdline); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc mem for cmdline\n"); goto fail; + } s1 = (u8 *)(unsigned long)cmdline; s2 = (u16 *)options; @@ -895,12 +936,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) status = efi_call_phys3(sys_table->boottime->handle_protocol, handle, &proto, (void *)&image); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); goto fail; + } status = low_alloc(0x4000, 1, (unsigned long *)&boot_params); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc lowmem for boot params\n"); goto fail; + } memset(boot_params, 0x0, 0x4000); @@ -933,8 +978,10 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) if (status != EFI_SUCCESS) { status = low_alloc(hdr->init_size, hdr->kernel_alignment, &start); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc mem for kernel\n"); goto fail; + } } hdr->code32_start = (__u32)start; @@ -945,19 +992,25 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table) status = efi_call_phys3(sys_table->boottime->allocate_pool, EFI_LOADER_DATA, sizeof(*gdt), (void **)&gdt); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc mem for gdt structure\n"); goto fail; + } gdt->size = 0x800; status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc mem for gdt\n"); goto fail; + } status = efi_call_phys3(sys_table->boottime->allocate_pool, EFI_LOADER_DATA, sizeof(*idt), (void **)&idt); - if (status != EFI_SUCCESS) + if (status != EFI_SUCCESS) { + efi_printk("Failed to alloc mem for idt structure\n"); goto fail; + } idt->size = 0; idt->address = 0; diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 39251663e65b..3b6e15627c55 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h @@ -58,4 +58,10 @@ struct efi_uga_draw_protocol { void *blt; }; +struct efi_simple_text_output_protocol { + void *reset; + void *output_string; + void *test_string; +}; + #endif /* BOOT_COMPRESSED_EBOOT_H */ diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 98bd70faccc5..daeca56211e3 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -273,7 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs) sizeof(frame->extramask)))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (ia32_restore_sigcontext(regs, &frame->sc, &ax)) @@ -299,7 +298,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 724aa441de7d..0c44630d1789 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -29,6 +29,7 @@ #include <asm/processor.h> #include <asm/mmu.h> #include <asm/mpspec.h> +#include <asm/realmode.h> #define COMPILER_DEPENDENT_INT64 long long #define COMPILER_DEPENDENT_UINT64 unsigned long long @@ -116,10 +117,8 @@ static inline void acpi_disable_pci(void) /* Low-level suspend routine. */ extern int acpi_suspend_lowlevel(void); -extern const unsigned char acpi_wakeup_code[]; - -/* early initialization routine */ -extern void acpi_reserve_wakeup_memory(void); +/* Physical address to resume after wakeup */ +#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start)) /* * Check if the CPU can handle C2 and deeper diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index b97596e2b68c..a6983b277220 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -15,6 +15,8 @@ #include <linux/compiler.h> #include <asm/alternative.h> +#define BIT_64(n) (U64_C(1) << (n)) + /* * These have to be done with inline assembly: that way the bit-setting * is guaranteed to be atomic. All bit operations return 0 if the bit diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 18d9005d9e4f..b0767bc08740 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -34,7 +34,7 @@ #ifndef __ASSEMBLY__ extern void mcount(void); -extern int modifying_ftrace_code; +extern atomic_t modifying_ftrace_code; static inline unsigned long ftrace_call_adjust(unsigned long addr) { diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index 99f262e04b91..8e525059e7d8 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index ada93b3b8c66..beff97f7df37 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -7,8 +7,6 @@ #include <asm/processor-flags.h> -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 5c25de07cba8..89f794f007ec 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -248,7 +248,23 @@ static inline void set_restore_sigmask(void) { struct thread_info *ti = current_thread_info(); ti->status |= TS_RESTORE_SIGMASK; - set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags); + WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags)); +} +static inline void clear_restore_sigmask(void) +{ + current_thread_info()->status &= ~TS_RESTORE_SIGMASK; +} +static inline bool test_restore_sigmask(void) +{ + return current_thread_info()->status & TS_RESTORE_SIGMASK; +} +static inline bool test_and_clear_restore_sigmask(void) +{ + struct thread_info *ti = current_thread_info(); + if (!(ti->status & TS_RESTORE_SIGMASK)) + return false; + ti->status &= ~TS_RESTORE_SIGMASK; + return true; } static inline bool is_ia32_task(void) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 82f29e70d058..6b9333b429ba 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr) addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ)); } +static DEFINE_PER_CPU(u32, debug_stack_use_ctr); + void debug_stack_set_zero(void) { + this_cpu_inc(debug_stack_use_ctr); load_idt((const struct desc_ptr *)&nmi_idt_descr); } void debug_stack_reset(void) { - load_idt((const struct desc_ptr *)&idt_descr); + if (WARN_ON(!this_cpu_read(debug_stack_use_ctr))) + return; + if (this_cpu_dec_return(debug_stack_use_ctr) == 0) + load_idt((const struct desc_ptr *)&idt_descr); } #else /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b772dd6ad450..0a687fd185e6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1251,15 +1251,15 @@ void mce_log_therm_throt_event(__u64 status) * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). */ -static int check_interval = 5 * 60; /* 5 minutes */ +static unsigned long check_interval = 5 * 60; /* 5 minutes */ -static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ +static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */ static DEFINE_PER_CPU(struct timer_list, mce_timer); -static void mce_start_timer(unsigned long data) +static void mce_timer_fn(unsigned long data) { - struct timer_list *t = &per_cpu(mce_timer, data); - int *n; + struct timer_list *t = &__get_cpu_var(mce_timer); + unsigned long iv; WARN_ON(smp_processor_id() != data); @@ -1272,13 +1272,14 @@ static void mce_start_timer(unsigned long data) * Alert userspace if needed. If we logged an MCE, reduce the * polling interval, otherwise increase the polling interval. */ - n = &__get_cpu_var(mce_next_interval); + iv = __this_cpu_read(mce_next_interval); if (mce_notify_irq()) - *n = max(*n/2, HZ/100); + iv = max(iv, (unsigned long) HZ/100); else - *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); + iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); + __this_cpu_write(mce_next_interval, iv); - t->expires = jiffies + *n; + t->expires = jiffies + iv; add_timer_on(t, smp_processor_id()); } @@ -1472,9 +1473,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) rdmsrl(msrs[i], val); /* CntP bit set? */ - if (val & BIT(62)) { - val &= ~BIT(62); - wrmsrl(msrs[i], val); + if (val & BIT_64(62)) { + val &= ~BIT_64(62); + wrmsrl(msrs[i], val); } } @@ -1556,17 +1557,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) static void __mcheck_cpu_init_timer(void) { struct timer_list *t = &__get_cpu_var(mce_timer); - int *n = &__get_cpu_var(mce_next_interval); + unsigned long iv = __this_cpu_read(mce_next_interval); - setup_timer(t, mce_start_timer, smp_processor_id()); + setup_timer(t, mce_timer_fn, smp_processor_id()); if (mce_ignore_ce) return; - *n = check_interval * HZ; - if (!*n) + __this_cpu_write(mce_next_interval, iv); + if (!iv) return; - t->expires = round_jiffies(jiffies + *n); + t->expires = round_jiffies(jiffies + iv); add_timer_on(t, smp_processor_id()); } @@ -2276,7 +2277,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) case CPU_DOWN_FAILED_FROZEN: if (!mce_ignore_ce && check_interval) { t->expires = round_jiffies(jiffies + - __get_cpu_var(mce_next_interval)); + per_cpu(mce_next_interval, cpu)); add_timer_on(t, cpu); } smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index ac140c7be396..bdda2e6c673b 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -266,7 +266,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, if (align > max_align) align = max_align; - sizek = 1 << align; + sizek = 1UL << align; if (debug_print) { char start_factor = 'K', size_factor = 'K'; unsigned long start_base, size_base; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 01ccf9b71473..623f28837476 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -316,7 +316,6 @@ ret_from_exception: preempt_stop(CLBR_ANY) ret_from_intr: GET_THREAD_INFO(%ebp) -resume_userspace_sig: #ifdef CONFIG_VM86 movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS movb PT_CS(%esp), %al @@ -615,9 +614,13 @@ work_notifysig: # deal with pending signals and # vm86-space TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) + movb PT_CS(%esp), %bl + andb $SEGMENT_RPL_MASK, %bl + cmpb $USER_RPL, %bl + jb resume_kernel xorl %edx, %edx call do_notify_resume - jmp resume_userspace_sig + jmp resume_userspace ALIGN work_notifysig_v86: @@ -630,9 +633,13 @@ work_notifysig_v86: #endif TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) + movb PT_CS(%esp), %bl + andb $SEGMENT_RPL_MASK, %bl + cmpb $USER_RPL, %bl + jb resume_kernel xorl %edx, %edx call do_notify_resume - jmp resume_userspace_sig + jmp resume_userspace END(work_pending) # perform syscall exit tracing diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 320852d02026..7d65133b51be 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64) .endm /* + * When dynamic function tracer is enabled it will add a breakpoint + * to all locations that it is about to modify, sync CPUs, update + * all the code, sync CPUs, then remove the breakpoints. In this time + * if lockdep is enabled, it might jump back into the debug handler + * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF). + * + * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to + * make sure the stack pointer does not get reset back to the top + * of the debug stack, and instead just reuses the current stack. + */ +#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS) + +.macro TRACE_IRQS_OFF_DEBUG + call debug_stack_set_zero + TRACE_IRQS_OFF + call debug_stack_reset +.endm + +.macro TRACE_IRQS_ON_DEBUG + call debug_stack_set_zero + TRACE_IRQS_ON + call debug_stack_reset +.endm + +.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET + bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */ + jnc 1f + TRACE_IRQS_ON_DEBUG +1: +.endm + +#else +# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF +# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON +# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ +#endif + +/* * C code is not supposed to know about undefined top of stack. Every time * a C function with an pt_regs argument is called from the SYSCALL based * fast path FIXUP_TOP_OF_STACK is needed. @@ -1098,7 +1136,7 @@ ENTRY(\sym) subq $ORIG_RAX-R15, %rsp CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 call save_paranoid - TRACE_IRQS_OFF + TRACE_IRQS_OFF_DEBUG movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist) @@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip) ENTRY(paranoid_exit) DEFAULT_FRAME DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF + TRACE_IRQS_OFF_DEBUG testl %ebx,%ebx /* swapgs needed? */ jnz paranoid_restore testl $3,CS(%rsp) @@ -1404,7 +1442,7 @@ paranoid_swapgs: RESTORE_ALL 8 jmp irq_return paranoid_restore: - TRACE_IRQS_IRETQ 0 + TRACE_IRQS_IRETQ_DEBUG 0 RESTORE_ALL 8 jmp irq_return paranoid_userspace: diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 32ff36596ab1..c3a7cb4bf6e6 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void) } static int -ftrace_modify_code(unsigned long ip, unsigned const char *old_code, +ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, unsigned const char *new_code) { unsigned char replaced[MCOUNT_INSN_SIZE]; @@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod, old = ftrace_call_replace(ip, addr); new = ftrace_nop_replace(); - return ftrace_modify_code(rec->ip, old, new); + /* + * On boot up, and when modules are loaded, the MCOUNT_ADDR + * is converted to a nop, and will never become MCOUNT_ADDR + * again. This code is either running before SMP (on boot up) + * or before the code will ever be executed (module load). + * We do not want to use the breakpoint version in this case, + * just modify the code directly. + */ + if (addr == MCOUNT_ADDR) + return ftrace_modify_code_direct(rec->ip, old, new); + + /* Normal cases use add_brk_on_nop */ + WARN_ONCE(1, "invalid use of ftrace_make_nop"); + return -EINVAL; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) @@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) old = ftrace_nop_replace(); new = ftrace_call_replace(ip, addr); - return ftrace_modify_code(rec->ip, old, new); + /* Should only be called when module is loaded */ + return ftrace_modify_code_direct(rec->ip, old, new); } +/* + * The modifying_ftrace_code is used to tell the breakpoint + * handler to call ftrace_int3_handler(). If it fails to + * call this handler for a breakpoint added by ftrace, then + * the kernel may crash. + * + * As atomic_writes on x86 do not need a barrier, we do not + * need to add smp_mb()s for this to work. It is also considered + * that we can not read the modifying_ftrace_code before + * executing the breakpoint. That would be quite remarkable if + * it could do that. Here's the flow that is required: + * + * CPU-0 CPU-1 + * + * atomic_inc(mfc); + * write int3s + * <trap-int3> // implicit (r)mb + * if (atomic_read(mfc)) + * call ftrace_int3_handler() + * + * Then when we are finished: + * + * atomic_dec(mfc); + * + * If we hit a breakpoint that was not set by ftrace, it does not + * matter if ftrace_int3_handler() is called or not. It will + * simply be ignored. But it is crucial that a ftrace nop/caller + * breakpoint is handled. No other user should ever place a + * breakpoint on an ftrace nop/caller location. It must only + * be done by this code. + */ +atomic_t modifying_ftrace_code __read_mostly; + +static int +ftrace_modify_code(unsigned long ip, unsigned const char *old_code, + unsigned const char *new_code); + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); @@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func) memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(ip, (unsigned long)func); + + /* See comment above by declaration of modifying_ftrace_code */ + atomic_inc(&modifying_ftrace_code); + ret = ftrace_modify_code(ip, old, new); + atomic_dec(&modifying_ftrace_code); + return ret; } -int modifying_ftrace_code __read_mostly; - /* * A breakpoint was added to the code address we are about to * modify, and this is the handle that will just skip over it. @@ -489,13 +544,46 @@ void ftrace_replace_code(int enable) } } +static int +ftrace_modify_code(unsigned long ip, unsigned const char *old_code, + unsigned const char *new_code) +{ + int ret; + + ret = add_break(ip, old_code); + if (ret) + goto out; + + run_sync(); + + ret = add_update_code(ip, new_code); + if (ret) + goto fail_update; + + run_sync(); + + ret = ftrace_write(ip, new_code, 1); + if (ret) { + ret = -EPERM; + goto out; + } + run_sync(); + out: + return ret; + + fail_update: + probe_kernel_write((void *)ip, &old_code[0], 1); + goto out; +} + void arch_ftrace_update_code(int command) { - modifying_ftrace_code++; + /* See comment above by declaration of modifying_ftrace_code */ + atomic_inc(&modifying_ftrace_code); ftrace_modify_all_code(command); - modifying_ftrace_code--; + atomic_dec(&modifying_ftrace_code); } int __init ftrace_dyn_arch_init(void *data) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 9cc7b4392f7c..1460a5df92f7 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -870,7 +870,7 @@ int __init hpet_enable(void) else pr_warn("HPET initial state will not be saved\n"); cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); - hpet_writel(cfg, HPET_Tn_CFG(i)); + hpet_writel(cfg, HPET_CFG); if (cfg) pr_warn("HPET: Unrecognized bits %#x set in global cfg\n", cfg); diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 90875279ef3d..a0b2f84457be 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs) */ if (unlikely(is_debug_stack(regs->sp))) { debug_stack_set_zero(); - __get_cpu_var(update_debug_stack) = 1; + this_cpu_write(update_debug_stack, 1); } } static inline void nmi_nesting_postprocess(void) { - if (unlikely(__get_cpu_var(update_debug_stack))) + if (unlikely(this_cpu_read(update_debug_stack))) { debug_stack_reset(); + this_cpu_write(update_debug_stack, 0); + } } #endif diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 13b1990c7c58..c4c6a5c2bf0f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child, 0, sizeof(struct user_i387_struct), datap); - /* normal 64bit interface to access TLS data. - Works just like arch_prctl, except that the arguments - are reversed. */ - case PTRACE_ARCH_PRCTL: - return do_arch_prctl(child, data, addr); - default: return compat_ptrace_request(child, request, addr, data); } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 965dfda0fd5e..21af737053aa 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -555,7 +555,6 @@ unsigned long sys_sigreturn(struct pt_regs *regs) sizeof(frame->extramask)))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (restore_sigcontext(regs, &frame->sc, &ax)) @@ -581,7 +580,6 @@ long sys_rt_sigreturn(struct pt_regs *regs) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) @@ -647,42 +645,28 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, struct pt_regs *regs) { int usig = signr_convert(sig); - sigset_t *set = ¤t->blocked; - int ret; - - if (current_thread_info()->status & TS_RESTORE_SIGMASK) - set = ¤t->saved_sigmask; + sigset_t *set = sigmask_to_save(); /* Set up the stack frame */ if (is_ia32) { if (ka->sa.sa_flags & SA_SIGINFO) - ret = ia32_setup_rt_frame(usig, ka, info, set, regs); + return ia32_setup_rt_frame(usig, ka, info, set, regs); else - ret = ia32_setup_frame(usig, ka, set, regs); + return ia32_setup_frame(usig, ka, set, regs); #ifdef CONFIG_X86_X32_ABI } else if (is_x32) { - ret = x32_setup_rt_frame(usig, ka, info, + return x32_setup_rt_frame(usig, ka, info, (compat_sigset_t *)set, regs); #endif } else { - ret = __setup_rt_frame(sig, ka, info, set, regs); - } - - if (ret) { - force_sigsegv(sig, current); - return -EFAULT; + return __setup_rt_frame(sig, ka, info, set, regs); } - - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - return ret; } -static int +static void handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs) { - int ret; - /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ @@ -713,10 +697,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, likely(test_and_clear_thread_flag(TIF_FORCED_TF))) regs->flags &= ~X86_EFLAGS_TF; - ret = setup_rt_frame(sig, ka, info, regs); - - if (ret) - return ret; + if (setup_rt_frame(sig, ka, info, regs) < 0) { + force_sigsegv(sig, current); + return; + } /* * Clear the direction flag as per the ABI for function entry. @@ -731,12 +715,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, */ regs->flags &= ~X86_EFLAGS_TF; - block_sigmask(ka, sig); - - tracehook_signal_handler(sig, info, ka, regs, - test_thread_flag(TIF_SINGLESTEP)); - - return 0; + signal_delivered(sig, info, ka, regs, + test_thread_flag(TIF_SINGLESTEP)); } #ifdef CONFIG_X86_32 @@ -757,16 +737,6 @@ static void do_signal(struct pt_regs *regs) siginfo_t info; int signr; - /* - * We want the common case to go fast, which is why we may in certain - * cases get here from kernel mode. Just return without doing anything - * if so. - * X86_32: vm86 regs switched out by assembly code before reaching - * here, so testing against kernel CS suffices. - */ - if (!user_mode(regs)) - return; - signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { /* Whee! Actually deliver the signal. */ @@ -796,10 +766,7 @@ static void do_signal(struct pt_regs *regs) * If there's no signal to deliver, we just put the saved sigmask * back. */ - if (current_thread_info()->status & TS_RESTORE_SIGMASK) { - current_thread_info()->status &= ~TS_RESTORE_SIGMASK; - set_current_blocked(¤t->saved_sigmask); - } + restore_saved_sigmask(); } /* @@ -827,8 +794,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - if (current->replacement_session_keyring) - key_replace_session_keyring(); } if (thread_info_flags & _TIF_USER_RETURN_NOTIFY) fire_user_return_notifiers(); @@ -936,7 +901,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs) if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f56f96da77f5..fd019d78b1f4 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -410,15 +410,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) /* maps the cpu to the sched domain representing multi-core */ const struct cpumask *cpu_coregroup_mask(int cpu) { - struct cpuinfo_x86 *c = &cpu_data(cpu); - /* - * For perf, we return last level cache shared map. - * And for power savings, we return cpu_core_map - */ - if (!(cpu_has(c, X86_FEATURE_AMD_DCM))) - return cpu_core_mask(cpu); - else - return cpu_llc_shared_mask(cpu); + return cpu_llc_shared_mask(cpu); } static void impress_friends(void) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index ff08457a025d..05b31d92f69c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -303,8 +303,12 @@ gp_in_kernel: dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code) { #ifdef CONFIG_DYNAMIC_FTRACE - /* ftrace must be first, everything else may cause a recursive crash */ - if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs)) + /* + * ftrace must be first, everything else may cause a recursive crash. + * See note by declaration of modifying_ftrace_code in ftrace.c + */ + if (unlikely(atomic_read(&modifying_ftrace_code)) && + ftrace_int3_handler(regs)) return; #endif #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 72102e0ab7cb..be3cea4407ff 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2595,8 +2595,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, *gfnp = gfn; kvm_release_pfn_clean(pfn); pfn &= ~mask; - if (!get_page_unless_zero(pfn_to_page(pfn))) - BUG(); + kvm_get_pfn(pfn); *pfnp = pfn; } } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index f11729fd019c..3d68ef6d2266 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -158,31 +158,47 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type) return req_type; } +struct pagerange_state { + unsigned long cur_pfn; + int ram; + int not_ram; +}; + +static int +pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg) +{ + struct pagerange_state *state = arg; + + state->not_ram |= initial_pfn > state->cur_pfn; + state->ram |= total_nr_pages > 0; + state->cur_pfn = initial_pfn + total_nr_pages; + + return state->ram && state->not_ram; +} + static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) { - int ram_page = 0, not_rampage = 0; - unsigned long page_nr; + int ret = 0; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct pagerange_state state = {start_pfn, 0, 0}; - for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT); - ++page_nr) { - /* - * For legacy reasons, physical address range in the legacy ISA - * region is tracked as non-RAM. This will allow users of - * /dev/mem to map portions of legacy ISA region, even when - * some of those portions are listed(or not even listed) with - * different e820 types(RAM/reserved/..) - */ - if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) && - page_is_ram(page_nr)) - ram_page = 1; - else - not_rampage = 1; - - if (ram_page == not_rampage) - return -1; + /* + * For legacy reasons, physical address range in the legacy ISA + * region is tracked as non-RAM. This will allow users of + * /dev/mem to map portions of legacy ISA region, even when + * some of those portions are listed(or not even listed) with + * different e820 types(RAM/reserved/..) + */ + if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT) + start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT; + + if (start_pfn < end_pfn) { + ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn, + &state, pagerange_is_ram_callback); } - return ram_page; + return (ret > 0) ? -1 : (state.ram ? 1 : 0); } /* diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 29f9f0554f7d..7a35a6e71d44 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -355,3 +355,4 @@ 346 i386 setns sys_setns 347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev +349 i386 kcmp sys_kcmp diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index dd29a9ea27c5..51171aeff0dc 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -318,6 +318,8 @@ 309 common getcpu sys_getcpu 310 64 process_vm_readv sys_process_vm_readv 311 64 process_vm_writev sys_process_vm_writev +312 64 kcmp sys_kcmp + # # x32-specific system call numbers start at 512 to avoid cache impact # for native 64-bit operation. diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c index bb0fb03b9f85..a508cea13503 100644 --- a/arch/x86/um/signal.c +++ b/arch/x86/um/signal.c @@ -486,7 +486,6 @@ long sys_sigreturn(struct pt_regs *regs) copy_from_user(&set.sig[1], extramask, sig_size)) goto segfault; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (copy_sc_from_user(¤t->thread.regs, sc)) @@ -600,7 +599,6 @@ long sys_rt_sigreturn(struct pt_regs *regs) if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set))) goto segfault; - sigdelsetmask(&set, ~_BLOCKABLE); set_current_blocked(&set); if (copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext)) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 75f33b2a5933..e74df9548a02 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1116,7 +1116,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .wbinvd = native_wbinvd, .read_msr = native_read_msr_safe, + .rdmsr_regs = native_rdmsr_safe_regs, .write_msr = xen_write_msr_safe, + .wrmsr_regs = native_wrmsr_safe_regs, + .read_tsc = native_read_tsc, .read_pmc = native_read_pmc, |