summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/boot/compressed/eboot.c87
-rw-r--r--arch/x86/boot/compressed/eboot.h6
-rw-r--r--arch/x86/ia32/ia32_signal.c2
-rw-r--r--arch/x86/include/asm/acpi.h7
-rw-r--r--arch/x86/include/asm/bitops.h2
-rw-r--r--arch/x86/include/asm/ftrace.h2
-rw-r--r--arch/x86/include/asm/posix_types_32.h3
-rw-r--r--arch/x86/include/asm/sighandling.h2
-rw-r--r--arch/x86/include/asm/thread_info.h18
-rw-r--r--arch/x86/kernel/cpu/common.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c37
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c2
-rw-r--r--arch/x86/kernel/entry_32.S13
-rw-r--r--arch/x86/kernel/entry_64.S44
-rw-r--r--arch/x86/kernel/ftrace.c102
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/nmi.c6
-rw-r--r--arch/x86/kernel/ptrace.c6
-rw-r--r--arch/x86/kernel/signal.c62
-rw-r--r--arch/x86/kernel/smpboot.c10
-rw-r--r--arch/x86/kernel/traps.c8
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/mm/pat.c56
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/um/signal.c2
-rw-r--r--arch/x86/xen/enlighten.c3
28 files changed, 342 insertions, 156 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d700811785ea..c70684f859e1 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1506,6 +1506,8 @@ config EFI_STUB
This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
+ See Documentation/x86/efi-stub.txt for more information.
+
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 2c14e76bb4c7..4e85f5f85837 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -16,6 +16,26 @@
static efi_system_table_t *sys_table;
+static void efi_printk(char *str)
+{
+ char *s8;
+
+ for (s8 = str; *s8; s8++) {
+ struct efi_simple_text_output_protocol *out;
+ efi_char16_t ch[2] = { 0 };
+
+ ch[0] = *s8;
+ out = (struct efi_simple_text_output_protocol *)sys_table->con_out;
+
+ if (*s8 == '\n') {
+ efi_char16_t nl[2] = { '\r', 0 };
+ efi_call_phys2(out->output_string, out, nl);
+ }
+
+ efi_call_phys2(out->output_string, out, ch);
+ }
+}
+
static efi_status_t __get_map(efi_memory_desc_t **map, unsigned long *map_size,
unsigned long *desc_size)
{
@@ -531,8 +551,10 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
EFI_LOADER_DATA,
nr_initrds * sizeof(*initrds),
&initrds);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for initrds\n");
goto fail;
+ }
str = (char *)(unsigned long)hdr->cmd_line_ptr;
for (i = 0; i < nr_initrds; i++) {
@@ -575,32 +597,42 @@ static efi_status_t handle_ramdisks(efi_loaded_image_t *image,
status = efi_call_phys3(boottime->handle_protocol,
image->device_handle, &fs_proto, &io);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to handle fs_proto\n");
goto free_initrds;
+ }
status = efi_call_phys2(io->open_volume, io, &fh);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to open volume\n");
goto free_initrds;
+ }
}
status = efi_call_phys5(fh->open, fh, &h, filename_16,
EFI_FILE_MODE_READ, (u64)0);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to open initrd file\n");
goto close_handles;
+ }
initrd->handle = h;
info_sz = 0;
status = efi_call_phys4(h->get_info, h, &info_guid,
&info_sz, NULL);
- if (status != EFI_BUFFER_TOO_SMALL)
+ if (status != EFI_BUFFER_TOO_SMALL) {
+ efi_printk("Failed to get initrd info size\n");
goto close_handles;
+ }
grow:
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, info_sz, &info);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for initrd info\n");
goto close_handles;
+ }
status = efi_call_phys4(h->get_info, h, &info_guid,
&info_sz, info);
@@ -612,8 +644,10 @@ grow:
file_sz = info->file_size;
efi_call_phys1(sys_table->boottime->free_pool, info);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to get initrd info\n");
goto close_handles;
+ }
initrd->size = file_sz;
initrd_total += file_sz;
@@ -629,11 +663,14 @@ grow:
*/
status = high_alloc(initrd_total, 0x1000,
&initrd_addr, hdr->initrd_addr_max);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc highmem for initrds\n");
goto close_handles;
+ }
/* We've run out of free low memory. */
if (initrd_addr > hdr->initrd_addr_max) {
+ efi_printk("We've run out of free low memory\n");
status = EFI_INVALID_PARAMETER;
goto free_initrd_total;
}
@@ -652,8 +689,10 @@ grow:
status = efi_call_phys3(fh->read,
initrds[j].handle,
&chunksize, addr);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to read initrd\n");
goto free_initrd_total;
+ }
addr += chunksize;
size -= chunksize;
}
@@ -674,7 +713,7 @@ free_initrd_total:
low_free(initrd_total, initrd_addr);
close_handles:
- for (k = j; k < nr_initrds; k++)
+ for (k = j; k < i; k++)
efi_call_phys1(fh->close, initrds[k].handle);
free_initrds:
efi_call_phys1(sys_table->boottime->free_pool, initrds);
@@ -732,8 +771,10 @@ static efi_status_t make_boot_params(struct boot_params *boot_params,
options_size++; /* NUL termination */
status = low_alloc(options_size, 1, &cmdline);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for cmdline\n");
goto fail;
+ }
s1 = (u8 *)(unsigned long)cmdline;
s2 = (u16 *)options;
@@ -895,12 +936,16 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->handle_protocol,
handle, &proto, (void *)&image);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to get handle for LOADED_IMAGE_PROTOCOL\n");
goto fail;
+ }
status = low_alloc(0x4000, 1, (unsigned long *)&boot_params);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc lowmem for boot params\n");
goto fail;
+ }
memset(boot_params, 0x0, 0x4000);
@@ -933,8 +978,10 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
if (status != EFI_SUCCESS) {
status = low_alloc(hdr->init_size, hdr->kernel_alignment,
&start);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for kernel\n");
goto fail;
+ }
}
hdr->code32_start = (__u32)start;
@@ -945,19 +992,25 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table)
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*gdt),
(void **)&gdt);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for gdt structure\n");
goto fail;
+ }
gdt->size = 0x800;
status = low_alloc(gdt->size, 8, (unsigned long *)&gdt->address);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for gdt\n");
goto fail;
+ }
status = efi_call_phys3(sys_table->boottime->allocate_pool,
EFI_LOADER_DATA, sizeof(*idt),
(void **)&idt);
- if (status != EFI_SUCCESS)
+ if (status != EFI_SUCCESS) {
+ efi_printk("Failed to alloc mem for idt structure\n");
goto fail;
+ }
idt->size = 0;
idt->address = 0;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 39251663e65b..3b6e15627c55 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -58,4 +58,10 @@ struct efi_uga_draw_protocol {
void *blt;
};
+struct efi_simple_text_output_protocol {
+ void *reset;
+ void *output_string;
+ void *test_string;
+};
+
#endif /* BOOT_COMPRESSED_EBOOT_H */
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 98bd70faccc5..daeca56211e3 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -273,7 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
sizeof(frame->extramask))))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
@@ -299,7 +298,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 724aa441de7d..0c44630d1789 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -29,6 +29,7 @@
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
+#include <asm/realmode.h>
#define COMPILER_DEPENDENT_INT64 long long
#define COMPILER_DEPENDENT_UINT64 unsigned long long
@@ -116,10 +117,8 @@ static inline void acpi_disable_pci(void)
/* Low-level suspend routine. */
extern int acpi_suspend_lowlevel(void);
-extern const unsigned char acpi_wakeup_code[];
-
-/* early initialization routine */
-extern void acpi_reserve_wakeup_memory(void);
+/* Physical address to resume after wakeup */
+#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
/*
* Check if the CPU can handle C2 and deeper
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b97596e2b68c..a6983b277220 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -15,6 +15,8 @@
#include <linux/compiler.h>
#include <asm/alternative.h>
+#define BIT_64(n) (U64_C(1) << (n))
+
/*
* These have to be done with inline assembly: that way the bit-setting
* is guaranteed to be atomic. All bit operations return 0 if the bit
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 18d9005d9e4f..b0767bc08740 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -34,7 +34,7 @@
#ifndef __ASSEMBLY__
extern void mcount(void);
-extern int modifying_ftrace_code;
+extern atomic_t modifying_ftrace_code;
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h
index 99f262e04b91..8e525059e7d8 100644
--- a/arch/x86/include/asm/posix_types_32.h
+++ b/arch/x86/include/asm/posix_types_32.h
@@ -10,9 +10,6 @@
typedef unsigned short __kernel_mode_t;
#define __kernel_mode_t __kernel_mode_t
-typedef unsigned short __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
typedef unsigned short __kernel_ipc_pid_t;
#define __kernel_ipc_pid_t __kernel_ipc_pid_t
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index ada93b3b8c66..beff97f7df37 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,8 +7,6 @@
#include <asm/processor-flags.h>
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \
X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5c25de07cba8..89f794f007ec 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -248,7 +248,23 @@ static inline void set_restore_sigmask(void)
{
struct thread_info *ti = current_thread_info();
ti->status |= TS_RESTORE_SIGMASK;
- set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+ WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ struct thread_info *ti = current_thread_info();
+ if (!(ti->status & TS_RESTORE_SIGMASK))
+ return false;
+ ti->status &= ~TS_RESTORE_SIGMASK;
+ return true;
}
static inline bool is_ia32_task(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 82f29e70d058..6b9333b429ba 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1101,14 +1101,20 @@ int is_debug_stack(unsigned long addr)
addr > (__get_cpu_var(debug_stack_addr) - DEBUG_STKSZ));
}
+static DEFINE_PER_CPU(u32, debug_stack_use_ctr);
+
void debug_stack_set_zero(void)
{
+ this_cpu_inc(debug_stack_use_ctr);
load_idt((const struct desc_ptr *)&nmi_idt_descr);
}
void debug_stack_reset(void)
{
- load_idt((const struct desc_ptr *)&idt_descr);
+ if (WARN_ON(!this_cpu_read(debug_stack_use_ctr)))
+ return;
+ if (this_cpu_dec_return(debug_stack_use_ctr) == 0)
+ load_idt((const struct desc_ptr *)&idt_descr);
}
#else /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b772dd6ad450..0a687fd185e6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1251,15 +1251,15 @@ void mce_log_therm_throt_event(__u64 status)
* poller finds an MCE, poll 2x faster. When the poller finds no more
* errors, poll 2x slower (up to check_interval seconds).
*/
-static int check_interval = 5 * 60; /* 5 minutes */
+static unsigned long check_interval = 5 * 60; /* 5 minutes */
-static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
+static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static void mce_start_timer(unsigned long data)
+static void mce_timer_fn(unsigned long data)
{
- struct timer_list *t = &per_cpu(mce_timer, data);
- int *n;
+ struct timer_list *t = &__get_cpu_var(mce_timer);
+ unsigned long iv;
WARN_ON(smp_processor_id() != data);
@@ -1272,13 +1272,14 @@ static void mce_start_timer(unsigned long data)
* Alert userspace if needed. If we logged an MCE, reduce the
* polling interval, otherwise increase the polling interval.
*/
- n = &__get_cpu_var(mce_next_interval);
+ iv = __this_cpu_read(mce_next_interval);
if (mce_notify_irq())
- *n = max(*n/2, HZ/100);
+ iv = max(iv, (unsigned long) HZ/100);
else
- *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ));
+ iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+ __this_cpu_write(mce_next_interval, iv);
- t->expires = jiffies + *n;
+ t->expires = jiffies + iv;
add_timer_on(t, smp_processor_id());
}
@@ -1472,9 +1473,9 @@ static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
rdmsrl(msrs[i], val);
/* CntP bit set? */
- if (val & BIT(62)) {
- val &= ~BIT(62);
- wrmsrl(msrs[i], val);
+ if (val & BIT_64(62)) {
+ val &= ~BIT_64(62);
+ wrmsrl(msrs[i], val);
}
}
@@ -1556,17 +1557,17 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
- int *n = &__get_cpu_var(mce_next_interval);
+ unsigned long iv = __this_cpu_read(mce_next_interval);
- setup_timer(t, mce_start_timer, smp_processor_id());
+ setup_timer(t, mce_timer_fn, smp_processor_id());
if (mce_ignore_ce)
return;
- *n = check_interval * HZ;
- if (!*n)
+ __this_cpu_write(mce_next_interval, iv);
+ if (!iv)
return;
- t->expires = round_jiffies(jiffies + *n);
+ t->expires = round_jiffies(jiffies + iv);
add_timer_on(t, smp_processor_id());
}
@@ -2276,7 +2277,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
case CPU_DOWN_FAILED_FROZEN:
if (!mce_ignore_ce && check_interval) {
t->expires = round_jiffies(jiffies +
- __get_cpu_var(mce_next_interval));
+ per_cpu(mce_next_interval, cpu));
add_timer_on(t, cpu);
}
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index ac140c7be396..bdda2e6c673b 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -266,7 +266,7 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
if (align > max_align)
align = max_align;
- sizek = 1 << align;
+ sizek = 1UL << align;
if (debug_print) {
char start_factor = 'K', size_factor = 'K';
unsigned long start_base, size_base;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 01ccf9b71473..623f28837476 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -316,7 +316,6 @@ ret_from_exception:
preempt_stop(CLBR_ANY)
ret_from_intr:
GET_THREAD_INFO(%ebp)
-resume_userspace_sig:
#ifdef CONFIG_VM86
movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
movb PT_CS(%esp), %al
@@ -615,9 +614,13 @@ work_notifysig: # deal with pending signals and
# vm86-space
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ movb PT_CS(%esp), %bl
+ andb $SEGMENT_RPL_MASK, %bl
+ cmpb $USER_RPL, %bl
+ jb resume_kernel
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace_sig
+ jmp resume_userspace
ALIGN
work_notifysig_v86:
@@ -630,9 +633,13 @@ work_notifysig_v86:
#endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+ movb PT_CS(%esp), %bl
+ andb $SEGMENT_RPL_MASK, %bl
+ cmpb $USER_RPL, %bl
+ jb resume_kernel
xorl %edx, %edx
call do_notify_resume
- jmp resume_userspace_sig
+ jmp resume_userspace
END(work_pending)
# perform syscall exit tracing
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 320852d02026..7d65133b51be 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -191,6 +191,44 @@ ENDPROC(native_usergs_sysret64)
.endm
/*
+ * When dynamic function tracer is enabled it will add a breakpoint
+ * to all locations that it is about to modify, sync CPUs, update
+ * all the code, sync CPUs, then remove the breakpoints. In this time
+ * if lockdep is enabled, it might jump back into the debug handler
+ * outside the updating of the IST protection. (TRACE_IRQS_ON/OFF).
+ *
+ * We need to change the IDT table before calling TRACE_IRQS_ON/OFF to
+ * make sure the stack pointer does not get reset back to the top
+ * of the debug stack, and instead just reuses the current stack.
+ */
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_TRACE_IRQFLAGS)
+
+.macro TRACE_IRQS_OFF_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_OFF
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_ON_DEBUG
+ call debug_stack_set_zero
+ TRACE_IRQS_ON
+ call debug_stack_reset
+.endm
+
+.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
+ bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
+ jnc 1f
+ TRACE_IRQS_ON_DEBUG
+1:
+.endm
+
+#else
+# define TRACE_IRQS_OFF_DEBUG TRACE_IRQS_OFF
+# define TRACE_IRQS_ON_DEBUG TRACE_IRQS_ON
+# define TRACE_IRQS_IRETQ_DEBUG TRACE_IRQS_IRETQ
+#endif
+
+/*
* C code is not supposed to know about undefined top of stack. Every time
* a C function with an pt_regs argument is called from the SYSCALL based
* fast path FIXUP_TOP_OF_STACK is needed.
@@ -1098,7 +1136,7 @@ ENTRY(\sym)
subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
call save_paranoid
- TRACE_IRQS_OFF
+ TRACE_IRQS_OFF_DEBUG
movq %rsp,%rdi /* pt_regs pointer */
xorl %esi,%esi /* no error code */
subq $EXCEPTION_STKSZ, INIT_TSS_IST(\ist)
@@ -1393,7 +1431,7 @@ paranoidzeroentry machine_check *machine_check_vector(%rip)
ENTRY(paranoid_exit)
DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
+ TRACE_IRQS_OFF_DEBUG
testl %ebx,%ebx /* swapgs needed? */
jnz paranoid_restore
testl $3,CS(%rsp)
@@ -1404,7 +1442,7 @@ paranoid_swapgs:
RESTORE_ALL 8
jmp irq_return
paranoid_restore:
- TRACE_IRQS_IRETQ 0
+ TRACE_IRQS_IRETQ_DEBUG 0
RESTORE_ALL 8
jmp irq_return
paranoid_userspace:
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 32ff36596ab1..c3a7cb4bf6e6 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -100,7 +100,7 @@ static const unsigned char *ftrace_nop_replace(void)
}
static int
-ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code,
unsigned const char *new_code)
{
unsigned char replaced[MCOUNT_INSN_SIZE];
@@ -141,7 +141,20 @@ int ftrace_make_nop(struct module *mod,
old = ftrace_call_replace(ip, addr);
new = ftrace_nop_replace();
- return ftrace_modify_code(rec->ip, old, new);
+ /*
+ * On boot up, and when modules are loaded, the MCOUNT_ADDR
+ * is converted to a nop, and will never become MCOUNT_ADDR
+ * again. This code is either running before SMP (on boot up)
+ * or before the code will ever be executed (module load).
+ * We do not want to use the breakpoint version in this case,
+ * just modify the code directly.
+ */
+ if (addr == MCOUNT_ADDR)
+ return ftrace_modify_code_direct(rec->ip, old, new);
+
+ /* Normal cases use add_brk_on_nop */
+ WARN_ONCE(1, "invalid use of ftrace_make_nop");
+ return -EINVAL;
}
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
@@ -152,9 +165,47 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
old = ftrace_nop_replace();
new = ftrace_call_replace(ip, addr);
- return ftrace_modify_code(rec->ip, old, new);
+ /* Should only be called when module is loaded */
+ return ftrace_modify_code_direct(rec->ip, old, new);
}
+/*
+ * The modifying_ftrace_code is used to tell the breakpoint
+ * handler to call ftrace_int3_handler(). If it fails to
+ * call this handler for a breakpoint added by ftrace, then
+ * the kernel may crash.
+ *
+ * As atomic_writes on x86 do not need a barrier, we do not
+ * need to add smp_mb()s for this to work. It is also considered
+ * that we can not read the modifying_ftrace_code before
+ * executing the breakpoint. That would be quite remarkable if
+ * it could do that. Here's the flow that is required:
+ *
+ * CPU-0 CPU-1
+ *
+ * atomic_inc(mfc);
+ * write int3s
+ * <trap-int3> // implicit (r)mb
+ * if (atomic_read(mfc))
+ * call ftrace_int3_handler()
+ *
+ * Then when we are finished:
+ *
+ * atomic_dec(mfc);
+ *
+ * If we hit a breakpoint that was not set by ftrace, it does not
+ * matter if ftrace_int3_handler() is called or not. It will
+ * simply be ignored. But it is crucial that a ftrace nop/caller
+ * breakpoint is handled. No other user should ever place a
+ * breakpoint on an ftrace nop/caller location. It must only
+ * be done by this code.
+ */
+atomic_t modifying_ftrace_code __read_mostly;
+
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code);
+
int ftrace_update_ftrace_func(ftrace_func_t func)
{
unsigned long ip = (unsigned long)(&ftrace_call);
@@ -163,13 +214,17 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
new = ftrace_call_replace(ip, (unsigned long)func);
+
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
+
ret = ftrace_modify_code(ip, old, new);
+ atomic_dec(&modifying_ftrace_code);
+
return ret;
}
-int modifying_ftrace_code __read_mostly;
-
/*
* A breakpoint was added to the code address we are about to
* modify, and this is the handle that will just skip over it.
@@ -489,13 +544,46 @@ void ftrace_replace_code(int enable)
}
}
+static int
+ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
+ unsigned const char *new_code)
+{
+ int ret;
+
+ ret = add_break(ip, old_code);
+ if (ret)
+ goto out;
+
+ run_sync();
+
+ ret = add_update_code(ip, new_code);
+ if (ret)
+ goto fail_update;
+
+ run_sync();
+
+ ret = ftrace_write(ip, new_code, 1);
+ if (ret) {
+ ret = -EPERM;
+ goto out;
+ }
+ run_sync();
+ out:
+ return ret;
+
+ fail_update:
+ probe_kernel_write((void *)ip, &old_code[0], 1);
+ goto out;
+}
+
void arch_ftrace_update_code(int command)
{
- modifying_ftrace_code++;
+ /* See comment above by declaration of modifying_ftrace_code */
+ atomic_inc(&modifying_ftrace_code);
ftrace_modify_all_code(command);
- modifying_ftrace_code--;
+ atomic_dec(&modifying_ftrace_code);
}
int __init ftrace_dyn_arch_init(void *data)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 9cc7b4392f7c..1460a5df92f7 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -870,7 +870,7 @@ int __init hpet_enable(void)
else
pr_warn("HPET initial state will not be saved\n");
cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY);
- hpet_writel(cfg, HPET_Tn_CFG(i));
+ hpet_writel(cfg, HPET_CFG);
if (cfg)
pr_warn("HPET: Unrecognized bits %#x set in global cfg\n",
cfg);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 90875279ef3d..a0b2f84457be 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -444,14 +444,16 @@ static inline void nmi_nesting_preprocess(struct pt_regs *regs)
*/
if (unlikely(is_debug_stack(regs->sp))) {
debug_stack_set_zero();
- __get_cpu_var(update_debug_stack) = 1;
+ this_cpu_write(update_debug_stack, 1);
}
}
static inline void nmi_nesting_postprocess(void)
{
- if (unlikely(__get_cpu_var(update_debug_stack)))
+ if (unlikely(this_cpu_read(update_debug_stack))) {
debug_stack_reset();
+ this_cpu_write(update_debug_stack, 0);
+ }
}
#endif
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 13b1990c7c58..c4c6a5c2bf0f 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1211,12 +1211,6 @@ static long x32_arch_ptrace(struct task_struct *child,
0, sizeof(struct user_i387_struct),
datap);
- /* normal 64bit interface to access TLS data.
- Works just like arch_prctl, except that the arguments
- are reversed. */
- case PTRACE_ARCH_PRCTL:
- return do_arch_prctl(child, data, addr);
-
default:
return compat_ptrace_request(child, request, addr, data);
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 965dfda0fd5e..21af737053aa 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -555,7 +555,6 @@ unsigned long sys_sigreturn(struct pt_regs *regs)
sizeof(frame->extramask))))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->sc, &ax))
@@ -581,7 +580,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
@@ -647,42 +645,28 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct pt_regs *regs)
{
int usig = signr_convert(sig);
- sigset_t *set = &current->blocked;
- int ret;
-
- if (current_thread_info()->status & TS_RESTORE_SIGMASK)
- set = &current->saved_sigmask;
+ sigset_t *set = sigmask_to_save();
/* Set up the stack frame */
if (is_ia32) {
if (ka->sa.sa_flags & SA_SIGINFO)
- ret = ia32_setup_rt_frame(usig, ka, info, set, regs);
+ return ia32_setup_rt_frame(usig, ka, info, set, regs);
else
- ret = ia32_setup_frame(usig, ka, set, regs);
+ return ia32_setup_frame(usig, ka, set, regs);
#ifdef CONFIG_X86_X32_ABI
} else if (is_x32) {
- ret = x32_setup_rt_frame(usig, ka, info,
+ return x32_setup_rt_frame(usig, ka, info,
(compat_sigset_t *)set, regs);
#endif
} else {
- ret = __setup_rt_frame(sig, ka, info, set, regs);
- }
-
- if (ret) {
- force_sigsegv(sig, current);
- return -EFAULT;
+ return __setup_rt_frame(sig, ka, info, set, regs);
}
-
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- return ret;
}
-static int
+static void
handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
struct pt_regs *regs)
{
- int ret;
-
/* Are we from a system call? */
if (syscall_get_nr(current, regs) >= 0) {
/* If so, check system call restarting.. */
@@ -713,10 +697,10 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
likely(test_and_clear_thread_flag(TIF_FORCED_TF)))
regs->flags &= ~X86_EFLAGS_TF;
- ret = setup_rt_frame(sig, ka, info, regs);
-
- if (ret)
- return ret;
+ if (setup_rt_frame(sig, ka, info, regs) < 0) {
+ force_sigsegv(sig, current);
+ return;
+ }
/*
* Clear the direction flag as per the ABI for function entry.
@@ -731,12 +715,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
*/
regs->flags &= ~X86_EFLAGS_TF;
- block_sigmask(ka, sig);
-
- tracehook_signal_handler(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLESTEP));
-
- return 0;
+ signal_delivered(sig, info, ka, regs,
+ test_thread_flag(TIF_SINGLESTEP));
}
#ifdef CONFIG_X86_32
@@ -757,16 +737,6 @@ static void do_signal(struct pt_regs *regs)
siginfo_t info;
int signr;
- /*
- * We want the common case to go fast, which is why we may in certain
- * cases get here from kernel mode. Just return without doing anything
- * if so.
- * X86_32: vm86 regs switched out by assembly code before reaching
- * here, so testing against kernel CS suffices.
- */
- if (!user_mode(regs))
- return;
-
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
/* Whee! Actually deliver the signal. */
@@ -796,10 +766,7 @@ static void do_signal(struct pt_regs *regs)
* If there's no signal to deliver, we just put the saved sigmask
* back.
*/
- if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
- current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
- set_current_blocked(&current->saved_sigmask);
- }
+ restore_saved_sigmask();
}
/*
@@ -827,8 +794,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
- if (current->replacement_session_keyring)
- key_replace_session_keyring();
}
if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
@@ -936,7 +901,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index f56f96da77f5..fd019d78b1f4 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -410,15 +410,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
/* maps the cpu to the sched domain representing multi-core */
const struct cpumask *cpu_coregroup_mask(int cpu)
{
- struct cpuinfo_x86 *c = &cpu_data(cpu);
- /*
- * For perf, we return last level cache shared map.
- * And for power savings, we return cpu_core_map
- */
- if (!(cpu_has(c, X86_FEATURE_AMD_DCM)))
- return cpu_core_mask(cpu);
- else
- return cpu_llc_shared_mask(cpu);
+ return cpu_llc_shared_mask(cpu);
}
static void impress_friends(void)
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index ff08457a025d..05b31d92f69c 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -303,8 +303,12 @@ gp_in_kernel:
dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_code)
{
#ifdef CONFIG_DYNAMIC_FTRACE
- /* ftrace must be first, everything else may cause a recursive crash */
- if (unlikely(modifying_ftrace_code) && ftrace_int3_handler(regs))
+ /*
+ * ftrace must be first, everything else may cause a recursive crash.
+ * See note by declaration of modifying_ftrace_code in ftrace.c
+ */
+ if (unlikely(atomic_read(&modifying_ftrace_code)) &&
+ ftrace_int3_handler(regs))
return;
#endif
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 72102e0ab7cb..be3cea4407ff 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2595,8 +2595,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
*gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
- if (!get_page_unless_zero(pfn_to_page(pfn)))
- BUG();
+ kvm_get_pfn(pfn);
*pfnp = pfn;
}
}
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index f11729fd019c..3d68ef6d2266 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -158,31 +158,47 @@ static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
return req_type;
}
+struct pagerange_state {
+ unsigned long cur_pfn;
+ int ram;
+ int not_ram;
+};
+
+static int
+pagerange_is_ram_callback(unsigned long initial_pfn, unsigned long total_nr_pages, void *arg)
+{
+ struct pagerange_state *state = arg;
+
+ state->not_ram |= initial_pfn > state->cur_pfn;
+ state->ram |= total_nr_pages > 0;
+ state->cur_pfn = initial_pfn + total_nr_pages;
+
+ return state->ram && state->not_ram;
+}
+
static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
{
- int ram_page = 0, not_rampage = 0;
- unsigned long page_nr;
+ int ret = 0;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long end_pfn = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ struct pagerange_state state = {start_pfn, 0, 0};
- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
- ++page_nr) {
- /*
- * For legacy reasons, physical address range in the legacy ISA
- * region is tracked as non-RAM. This will allow users of
- * /dev/mem to map portions of legacy ISA region, even when
- * some of those portions are listed(or not even listed) with
- * different e820 types(RAM/reserved/..)
- */
- if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
- page_is_ram(page_nr))
- ram_page = 1;
- else
- not_rampage = 1;
-
- if (ram_page == not_rampage)
- return -1;
+ /*
+ * For legacy reasons, physical address range in the legacy ISA
+ * region is tracked as non-RAM. This will allow users of
+ * /dev/mem to map portions of legacy ISA region, even when
+ * some of those portions are listed(or not even listed) with
+ * different e820 types(RAM/reserved/..)
+ */
+ if (start_pfn < ISA_END_ADDRESS >> PAGE_SHIFT)
+ start_pfn = ISA_END_ADDRESS >> PAGE_SHIFT;
+
+ if (start_pfn < end_pfn) {
+ ret = walk_system_ram_range(start_pfn, end_pfn - start_pfn,
+ &state, pagerange_is_ram_callback);
}
- return ram_page;
+ return (ret > 0) ? -1 : (state.ram ? 1 : 0);
}
/*
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 29f9f0554f7d..7a35a6e71d44 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -355,3 +355,4 @@
346 i386 setns sys_setns
347 i386 process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+349 i386 kcmp sys_kcmp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index dd29a9ea27c5..51171aeff0dc 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -318,6 +318,8 @@
309 common getcpu sys_getcpu
310 64 process_vm_readv sys_process_vm_readv
311 64 process_vm_writev sys_process_vm_writev
+312 64 kcmp sys_kcmp
+
#
# x32-specific system call numbers start at 512 to avoid cache impact
# for native 64-bit operation.
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index bb0fb03b9f85..a508cea13503 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -486,7 +486,6 @@ long sys_sigreturn(struct pt_regs *regs)
copy_from_user(&set.sig[1], extramask, sig_size))
goto segfault;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (copy_sc_from_user(&current->thread.regs, sc))
@@ -600,7 +599,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
goto segfault;
- sigdelsetmask(&set, ~_BLOCKABLE);
set_current_blocked(&set);
if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75f33b2a5933..e74df9548a02 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1116,7 +1116,10 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.wbinvd = native_wbinvd,
.read_msr = native_read_msr_safe,
+ .rdmsr_regs = native_rdmsr_safe_regs,
.write_msr = xen_write_msr_safe,
+ .wrmsr_regs = native_wrmsr_safe_regs,
+
.read_tsc = native_read_tsc,
.read_pmc = native_read_pmc,