diff options
Diffstat (limited to 'arch/x86')
79 files changed, 2317 insertions, 1331 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 95c36c474766..159c2ff9c127 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -64,8 +64,12 @@ config X86 select HAVE_TEXT_POKE_SMP select HAVE_GENERIC_HARDIRQS select HAVE_SPARSE_IRQ + select GENERIC_FIND_FIRST_BIT + select GENERIC_FIND_NEXT_BIT select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP + select GENERIC_IRQ_SHOW + select IRQ_FORCED_THREADING select USE_GENERIC_SMP_HELPERS if SMP config INSTRUCTION_DECODER @@ -811,7 +815,7 @@ config X86_LOCAL_APIC config X86_IO_APIC def_bool y - depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC + depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC config X86_VISWS_APIC def_bool y diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 283c5a6a03a6..ed47e6e1747f 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -294,11 +294,6 @@ config X86_GENERIC endif -config X86_CPU - def_bool y - select GENERIC_FIND_FIRST_BIT - select GENERIC_FIND_NEXT_BIT - # # Define implied options from the CPU selection here config X86_INTERNODE_CACHE_SHIFT diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 518bb99c3394..430312ba6e3f 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -25,6 +25,8 @@ #define sysretl_audit ia32_ret_from_sys_call #endif + .section .entry.text, "ax" + #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) .macro IA32_ARG_FIXUP noebp=0 @@ -126,26 +128,20 @@ ENTRY(ia32_sysenter_target) */ ENABLE_INTERRUPTS(CLBR_NONE) movl %ebp,%ebp /* zero extension */ - pushq $__USER32_DS - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi $__USER32_DS /*CFI_REL_OFFSET ss,0*/ - pushq %rbp - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %rbp CFI_REL_OFFSET rsp,0 - pushfq - CFI_ADJUST_CFA_OFFSET 8 + pushfq_cfi /*CFI_REL_OFFSET rflags,0*/ movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d CFI_REGISTER rip,r10 - pushq $__USER32_CS - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi $__USER32_CS /*CFI_REL_OFFSET cs,0*/ movl %eax, %eax - pushq %r10 - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %r10 CFI_REL_OFFSET rip,0 - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %rax cld SAVE_ARGS 0,0,1 /* no need to do an access_ok check here because rbp has been @@ -182,11 +178,9 @@ sysexit_from_sys_call: xorq %r9,%r9 xorq %r10,%r10 xorq %r11,%r11 - popfq - CFI_ADJUST_CFA_OFFSET -8 + popfq_cfi /*CFI_RESTORE rflags*/ - popq %rcx /* User %esp */ - CFI_ADJUST_CFA_OFFSET -8 + popq_cfi %rcx /* User %esp */ CFI_REGISTER rsp,rcx TRACE_IRQS_ON ENABLE_INTERRUPTS_SYSEXIT32 @@ -421,8 +415,7 @@ ENTRY(ia32_syscall) */ ENABLE_INTERRUPTS(CLBR_NONE) movl %eax,%eax - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 + pushq_cfi %rax cld /* note the registers are not zero extended to the sf. this could be a problem. */ @@ -851,4 +844,7 @@ ia32_sys_call_table: .quad sys_fanotify_init .quad sys32_fanotify_mark .quad sys_prlimit64 /* 340 */ + .quad sys_name_to_handle_at + .quad compat_sys_open_by_handle_at + .quad compat_sys_clock_adjtime ia32_syscall_end: diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 47a30ff8e517..d87988bacf3e 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -426,4 +426,16 @@ struct local_apic { #else #define BAD_APICID 0xFFFFu #endif + +enum ioapic_irq_destination_types { + dest_Fixed = 0, + dest_LowestPrio = 1, + dest_SMI = 2, + dest__reserved_1 = 3, + dest_NMI = 4, + dest_INIT = 5, + dest__reserved_2 = 6, + dest_ExtINT = 7 +}; + #endif /* _ASM_X86_APICDEF_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 220e2ea08e80..91f3e087cf21 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -160,6 +160,7 @@ #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ #define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE (6*32+23) /* core performance counter extensions */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -279,6 +280,7 @@ extern const char * const x86_power_flags[32]; #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) #define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ) +#define cpu_has_perfctr_core boot_cpu_has(X86_FEATURE_PERFCTR_CORE) #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) # define cpu_has_invlpg 1 diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 06850a7194e1..2c6fc9e62812 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -7,14 +7,12 @@ frame pointer later */ #ifdef CONFIG_FRAME_POINTER .macro FRAME - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebp CFI_REL_OFFSET ebp,0 movl %esp,%ebp .endm .macro ENDFRAME - popl %ebp - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebp CFI_RESTORE ebp .endm #else diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index 1f11ce44e956..d09bb03653f0 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h @@ -37,7 +37,7 @@ "+m" (*uaddr), "=&r" (tem) \ : "r" (oparg), "i" (-EFAULT), "1" (0)) -static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) +static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; @@ -48,7 +48,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) @@ -109,9 +109,10 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr) return ret; } -static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, - int newval) +static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) { + int ret = 0; #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_BSWAP) /* Real i386 machines have no cmpxchg instruction */ @@ -119,21 +120,22 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, return -ENOSYS; #endif - if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; - asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %3, %1\n" + asm volatile("1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" "2:\t.section .fixup, \"ax\"\n" - "3:\tmov %2, %0\n" + "3:\tmov %3, %0\n" "\tjmp 2b\n" "\t.previous\n" _ASM_EXTABLE(1b, 3b) - : "=a" (oldval), "+m" (*uaddr) - : "i" (-EFAULT), "r" (newval), "0" (oldval) + : "+r" (ret), "=a" (oldval), "+m" (*uaddr) + : "i" (-EFAULT), "r" (newval), "1" (oldval) : "memory" ); - return oldval; + *uval = oldval; + return ret; } #endif diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index f327d386d6cc..c4bd267dfc50 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -63,17 +63,6 @@ union IO_APIC_reg_03 { } __attribute__ ((packed)) bits; }; -enum ioapic_irq_destination_types { - dest_Fixed = 0, - dest_LowestPrio = 1, - dest_SMI = 2, - dest__reserved_1 = 3, - dest_NMI = 4, - dest_INIT = 5, - dest__reserved_2 = 6, - dest_ExtINT = 7 -}; - struct IO_APIC_route_entry { __u32 vector : 8, delivery_mode : 3, /* 000: FIXED @@ -106,6 +95,10 @@ struct IR_IO_APIC_route_entry { index : 15; } __attribute__ ((packed)); +#define IOAPIC_AUTO -1 +#define IOAPIC_EDGE 0 +#define IOAPIC_LEVEL 1 + #ifdef CONFIG_X86_IO_APIC /* @@ -150,11 +143,6 @@ extern int timer_through_8259; #define io_apic_assign_pci_irqs \ (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) -extern u8 io_apic_unique_id(u8 id); -extern int io_apic_get_unique_id(int ioapic, int apic_id); -extern int io_apic_get_version(int ioapic); -extern int io_apic_get_redir_entries(int ioapic); - struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); @@ -162,6 +150,8 @@ void setup_IO_APIC_irq_extra(u32 gsi); extern void ioapic_and_gsi_init(void); extern void ioapic_insert_resources(void); +int io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr); + extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); @@ -186,6 +176,8 @@ extern void __init pre_init_apic_IRQ0(void); extern void mp_save_irq(struct mpc_intsrc *m); +extern void disable_ioapic_support(void); + #else /* !CONFIG_X86_IO_APIC */ #define io_apic_assign_pci_irqs 0 @@ -199,6 +191,26 @@ static inline int mp_find_ioapic(u32 gsi) { return 0; } struct io_apic_irq_attr; static inline int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) { return 0; } + +static inline struct IO_APIC_route_entry **alloc_ioapic_entries(void) +{ + return NULL; +} + +static inline void free_ioapic_entries(struct IO_APIC_route_entry **ent) { } +static inline int save_IO_APIC_setup(struct IO_APIC_route_entry **ent) +{ + return -ENOMEM; +} + +static inline void mask_IO_APIC_setup(struct IO_APIC_route_entry **ent) { } +static inline int restore_IO_APIC_setup(struct IO_APIC_route_entry **ent) +{ + return -ENOMEM; +} + +static inline void mp_save_irq(struct mpc_intsrc *m) { }; +static inline void disable_ioapic_support(void) { } #endif #endif /* _ASM_X86_IO_APIC_H */ diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index ca242d35e873..518bbbb9ee59 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -13,7 +13,6 @@ enum die_val { DIE_PANIC, DIE_NMI, DIE_DIE, - DIE_NMIWATCHDOG, DIE_KERNELDEBUG, DIE_TRAP, DIE_GPF, diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 43a18c77676d..823d48223400 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -52,6 +52,9 @@ #define MSR_IA32_MCG_STATUS 0x0000017a #define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 + #define MSR_IA32_PEBS_ENABLE 0x000003f1 #define MSR_IA32_DS_AREA 0x00000600 #define MSR_IA32_PERF_CAPABILITIES 0x00000345 diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c76f5b92b840..07f46016d3ff 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -7,7 +7,6 @@ #ifdef CONFIG_X86_LOCAL_APIC -extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 45636cefa186..4c25ab48257b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -94,10 +94,6 @@ struct cpuinfo_x86 { int x86_cache_alignment; /* In bytes */ int x86_power; unsigned long loops_per_jiffy; -#ifdef CONFIG_SMP - /* cpus sharing the last level cache: */ - cpumask_var_t llc_shared_map; -#endif /* cpuid returned max cores value: */ u16 x86_max_cores; u16 apicid; diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index d1e41b0f9b60..df4cd32b4cc6 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -37,26 +37,9 @@ #endif #ifdef __KERNEL__ - -#include <linux/list.h> -#include <linux/spinlock.h> -#include <linux/lockdep.h> #include <asm/asm.h> -struct rwsem_waiter; - -extern asmregparm struct rw_semaphore * - rwsem_down_read_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_down_write_failed(struct rw_semaphore *sem); -extern asmregparm struct rw_semaphore * - rwsem_wake(struct rw_semaphore *); -extern asmregparm struct rw_semaphore * - rwsem_downgrade_wake(struct rw_semaphore *sem); - /* - * the semaphore definition - * * The bias values and the counter type limits the number of * potential readers/writers to 32767 for 32 bits and 2147483647 * for 64 bits. @@ -74,43 +57,6 @@ extern asmregparm struct rw_semaphore * #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) -typedef signed long rwsem_count_t; - -struct rw_semaphore { - rwsem_count_t count; - spinlock_t wait_lock; - struct list_head wait_list; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif -}; - -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define __RWSEM_DEP_MAP_INIT(lockname) , .dep_map = { .name = #lockname } -#else -# define __RWSEM_DEP_MAP_INIT(lockname) -#endif - - -#define __RWSEM_INITIALIZER(name) \ -{ \ - RWSEM_UNLOCKED_VALUE, __SPIN_LOCK_UNLOCKED((name).wait_lock), \ - LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \ -} - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern void __init_rwsem(struct rw_semaphore *sem, const char *name, - struct lock_class_key *key); - -#define init_rwsem(sem) \ -do { \ - static struct lock_class_key __key; \ - \ - __init_rwsem((sem), #sem, &__key); \ -} while (0) - /* * lock for reading */ @@ -133,7 +79,7 @@ static inline void __down_read(struct rw_semaphore *sem) */ static inline int __down_read_trylock(struct rw_semaphore *sem) { - rwsem_count_t result, tmp; + long result, tmp; asm volatile("# beginning __down_read_trylock\n\t" " mov %0,%1\n\t" "1:\n\t" @@ -155,7 +101,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) */ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) { - rwsem_count_t tmp; + long tmp; asm volatile("# beginning down_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" /* adds 0xffff0001, returns the old value */ @@ -180,9 +126,8 @@ static inline void __down_write(struct rw_semaphore *sem) */ static inline int __down_write_trylock(struct rw_semaphore *sem) { - rwsem_count_t ret = cmpxchg(&sem->count, - RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); + long ret = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, + RWSEM_ACTIVE_WRITE_BIAS); if (ret == RWSEM_UNLOCKED_VALUE) return 1; return 0; @@ -193,7 +138,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) */ static inline void __up_read(struct rw_semaphore *sem) { - rwsem_count_t tmp; + long tmp; asm volatile("# beginning __up_read\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtracts 1, returns the old value */ @@ -211,7 +156,7 @@ static inline void __up_read(struct rw_semaphore *sem) */ static inline void __up_write(struct rw_semaphore *sem) { - rwsem_count_t tmp; + long tmp; asm volatile("# beginning __up_write\n\t" LOCK_PREFIX " xadd %1,(%2)\n\t" /* subtracts 0xffff0001, returns the old value */ @@ -247,8 +192,7 @@ static inline void __downgrade_write(struct rw_semaphore *sem) /* * implement atomic add functionality */ -static inline void rwsem_atomic_add(rwsem_count_t delta, - struct rw_semaphore *sem) +static inline void rwsem_atomic_add(long delta, struct rw_semaphore *sem) { asm volatile(LOCK_PREFIX _ASM_ADD "%1,%0" : "+m" (sem->count) @@ -258,10 +202,9 @@ static inline void rwsem_atomic_add(rwsem_count_t delta, /* * implement exchange and add functionality */ -static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, - struct rw_semaphore *sem) +static inline long rwsem_atomic_update(long delta, struct rw_semaphore *sem) { - rwsem_count_t tmp = delta; + long tmp = delta; asm volatile(LOCK_PREFIX "xadd %0,%1" : "+r" (tmp), "+m" (sem->count) @@ -270,10 +213,5 @@ static inline rwsem_count_t rwsem_atomic_update(rwsem_count_t delta, return tmp + delta; } -static inline int rwsem_is_locked(struct rw_semaphore *sem) -{ - return (sem->count != 0); -} - #endif /* __KERNEL__ */ #endif /* _ASM_X86_RWSEM_H */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index b296ca6f40bb..73b11bc0ae6f 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -17,12 +17,24 @@ #endif #include <asm/thread_info.h> #include <asm/cpumask.h> +#include <asm/cpufeature.h> extern int smp_num_siblings; extern unsigned int num_processors; +static inline bool cpu_has_ht_siblings(void) +{ + bool has_siblings = false; +#ifdef CONFIG_SMP + has_siblings = cpu_has_ht && smp_num_siblings > 1; +#endif + return has_siblings; +} + DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map); DECLARE_PER_CPU(cpumask_var_t, cpu_core_map); +/* cpus sharing the last level cache: */ +DECLARE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU(u16, cpu_llc_id); DECLARE_PER_CPU(int, cpu_number); @@ -36,6 +48,11 @@ static inline struct cpumask *cpu_core_mask(int cpu) return per_cpu(cpu_core_map, cpu); } +static inline struct cpumask *cpu_llc_shared_mask(int cpu) +{ + return per_cpu(cpu_llc_shared_map, cpu); +} + DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 33ecc3ea8782..12569e691ce3 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -98,8 +98,6 @@ do { \ */ #define HAVE_DISABLE_HLT #else -#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" -#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" /* frame pointer must be last for get_wchan */ #define SAVE_CONTEXT "pushf ; pushq %%rbp ; movq %%rsi,%%rbp\n\t" diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index b766a5e8ba0e..ffaf183c619a 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h @@ -346,10 +346,13 @@ #define __NR_fanotify_init 338 #define __NR_fanotify_mark 339 #define __NR_prlimit64 340 +#define __NR_name_to_handle_at 341 +#define __NR_open_by_handle_at 342 +#define __NR_clock_adjtime 343 #ifdef __KERNEL__ -#define NR_syscalls 341 +#define NR_syscalls 344 #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 363e9b8a715b..5466bea670e7 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -669,6 +669,12 @@ __SYSCALL(__NR_fanotify_init, sys_fanotify_init) __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) #define __NR_prlimit64 302 __SYSCALL(__NR_prlimit64, sys_prlimit64) +#define __NR_name_to_handle_at 303 +__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) +#define __NR_open_by_handle_at 304 +__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) +#define __NR_clock_adjtime 305 +__SYSCALL(__NR_clock_adjtime, sys_clock_adjtime) #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a3c28ae4025b..8508bfe52296 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -287,7 +287,7 @@ HYPERVISOR_fpu_taskswitch(int set) static inline int HYPERVISOR_sched_op(int cmd, void *arg) { - return _hypercall2(int, sched_op_new, cmd, arg); + return _hypercall2(int, sched_op, cmd, arg); } static inline long @@ -422,10 +422,17 @@ HYPERVISOR_set_segment_base(int reg, unsigned long value) #endif static inline int -HYPERVISOR_suspend(unsigned long srec) +HYPERVISOR_suspend(unsigned long start_info_mfn) { - return _hypercall3(int, sched_op, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); + struct sched_shutdown r = { .reason = SHUTDOWN_suspend }; + + /* + * For a PV guest the tools require that the start_info mfn be + * present in rdx/edx when the hypercall is made. Per the + * hypercall calling convention this is the third hypercall + * argument, which is start_info_mfn here. + */ + return _hypercall3(int, sched_op, SCHEDOP_shutdown, &r, start_info_mfn); } static inline int diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index f25bdf238a33..c61934fbf22a 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -29,8 +29,10 @@ typedef struct xpaddr { /**** MACHINE <-> PHYSICAL CONVERSION MACROS ****/ #define INVALID_P2M_ENTRY (~0UL) -#define FOREIGN_FRAME_BIT (1UL<<31) +#define FOREIGN_FRAME_BIT (1UL<<(BITS_PER_LONG-1)) +#define IDENTITY_FRAME_BIT (1UL<<(BITS_PER_LONG-2)) #define FOREIGN_FRAME(m) ((m) | FOREIGN_FRAME_BIT) +#define IDENTITY_FRAME(m) ((m) | IDENTITY_FRAME_BIT) /* Maximum amount of memory we can handle in a domain in pages */ #define MAX_DOMAIN_PAGES \ @@ -41,12 +43,18 @@ extern unsigned int machine_to_phys_order; extern unsigned long get_phys_to_machine(unsigned long pfn); extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); +extern unsigned long set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e); extern int m2p_add_override(unsigned long mfn, struct page *page); extern int m2p_remove_override(struct page *page); extern struct page *m2p_find_override(unsigned long mfn); extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); +#ifdef CONFIG_XEN_DEBUG_FS +extern int p2m_dump_show(struct seq_file *m, void *v); +#endif static inline unsigned long pfn_to_mfn(unsigned long pfn) { unsigned long mfn; @@ -57,7 +65,7 @@ static inline unsigned long pfn_to_mfn(unsigned long pfn) mfn = get_phys_to_machine(pfn); if (mfn != INVALID_P2M_ENTRY) - mfn &= ~FOREIGN_FRAME_BIT; + mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT); return mfn; } @@ -73,25 +81,44 @@ static inline int phys_to_machine_mapping_valid(unsigned long pfn) static inline unsigned long mfn_to_pfn(unsigned long mfn) { unsigned long pfn; + int ret = 0; if (xen_feature(XENFEAT_auto_translated_physmap)) return mfn; + if (unlikely((mfn >> machine_to_phys_order) != 0)) { + pfn = ~0; + goto try_override; + } pfn = 0; /* * The array access can fail (e.g., device space beyond end of RAM). * In such cases it doesn't matter what we return (we return garbage), * but we must handle the fault without crashing! */ - __get_user(pfn, &machine_to_phys_mapping[mfn]); - - /* - * If this appears to be a foreign mfn (because the pfn - * doesn't map back to the mfn), then check the local override - * table to see if there's a better pfn to use. + ret = __get_user(pfn, &machine_to_phys_mapping[mfn]); +try_override: + /* ret might be < 0 if there are no entries in the m2p for mfn */ + if (ret < 0) + pfn = ~0; + else if (get_phys_to_machine(pfn) != mfn) + /* + * If this appears to be a foreign mfn (because the pfn + * doesn't map back to the mfn), then check the local override + * table to see if there's a better pfn to use. + * + * m2p_find_override_pfn returns ~0 if it doesn't find anything. + */ + pfn = m2p_find_override_pfn(mfn, ~0); + + /* + * pfn is ~0 if there are no entries in the m2p for mfn or if the + * entry doesn't map back to the mfn and m2p_override doesn't have a + * valid entry for it. */ - if (get_phys_to_machine(pfn) != mfn) - pfn = m2p_find_override_pfn(mfn, pfn); + if (pfn == ~0 && + get_phys_to_machine(mfn) == IDENTITY_FRAME(mfn)) + pfn = mfn; return pfn; } diff --git a/arch/x86/include/asm/xen/pci.h b/arch/x86/include/asm/xen/pci.h index 2329b3eaf8d3..aa8620989162 100644 --- a/arch/x86/include/asm/xen/pci.h +++ b/arch/x86/include/asm/xen/pci.h @@ -27,16 +27,16 @@ static inline void __init xen_setup_pirqs(void) * its own functions. */ struct xen_pci_frontend_ops { - int (*enable_msi)(struct pci_dev *dev, int **vectors); + int (*enable_msi)(struct pci_dev *dev, int vectors[]); void (*disable_msi)(struct pci_dev *dev); - int (*enable_msix)(struct pci_dev *dev, int **vectors, int nvec); + int (*enable_msix)(struct pci_dev *dev, int vectors[], int nvec); void (*disable_msix)(struct pci_dev *dev); }; extern struct xen_pci_frontend_ops *xen_pci_frontend; static inline int xen_pci_frontend_enable_msi(struct pci_dev *dev, - int **vectors) + int vectors[]) { if (xen_pci_frontend && xen_pci_frontend->enable_msi) return xen_pci_frontend->enable_msi(dev, vectors); @@ -48,7 +48,7 @@ static inline void xen_pci_frontend_disable_msi(struct pci_dev *dev) xen_pci_frontend->disable_msi(dev); } static inline int xen_pci_frontend_enable_msix(struct pci_dev *dev, - int **vectors, int nvec) + int vectors[], int nvec) { if (xen_pci_frontend && xen_pci_frontend->enable_msix) return xen_pci_frontend->enable_msix(dev, vectors, nvec); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 306386fbc105..562a8325cc1c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -43,6 +43,7 @@ #include <asm/i8259.h> #include <asm/proto.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/desc.h> #include <asm/hpet.h> #include <asm/idle.h> @@ -1218,7 +1219,7 @@ void __cpuinit setup_local_APIC(void) rdtscll(tsc); if (disable_apic) { - arch_disable_smp_support(); + disable_ioapic_support(); return; } @@ -1470,7 +1471,7 @@ int __init enable_IR(void) void __init enable_IR_x2apic(void) { unsigned long flags; - struct IO_APIC_route_entry **ioapic_entries = NULL; + struct IO_APIC_route_entry **ioapic_entries; int ret, x2apic_enabled = 0; int dmar_table_init_ret; @@ -1952,17 +1953,6 @@ void __cpuinit generic_processor_info(int apicid, int version) { int cpu; - /* - * Validate version - */ - if (version == 0x0) { - pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " - "fixing up to 0x10. (tell your hw vendor)\n", - version); - version = 0x10; - } - apic_version[apicid] = version; - if (num_processors >= nr_cpu_ids) { int max = nr_cpu_ids; int thiscpu = max + disabled_cpus; @@ -1976,22 +1966,34 @@ void __cpuinit generic_processor_info(int apicid, int version) } num_processors++; - cpu = cpumask_next_zero(-1, cpu_present_mask); - - if (version != apic_version[boot_cpu_physical_apicid]) - WARN_ONCE(1, - "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n", - apic_version[boot_cpu_physical_apicid], cpu, version); - - physid_set(apicid, phys_cpu_present_map); if (apicid == boot_cpu_physical_apicid) { /* * x86_bios_cpu_apicid is required to have processors listed * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. + * boot_cpu_init() already hold bit 0 in cpu_present_mask + * for BSP. */ cpu = 0; + } else + cpu = cpumask_next_zero(-1, cpu_present_mask); + + /* + * Validate version + */ + if (version == 0x0) { + pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n", + cpu, apicid); + version = 0x10; } + apic_version[apicid] = version; + + if (version != apic_version[boot_cpu_physical_apicid]) { + pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n", + apic_version[boot_cpu_physical_apicid], cpu, version); + } + + physid_set(apicid, phys_cpu_present_map); if (apicid > max_physical_apicid) max_physical_apicid = apicid; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 79fd43ca6f96..c4e557a1ebb6 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -83,7 +83,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, arch_spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); show_regs(regs); - dump_stack(); arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return NOTIFY_STOP; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ca9e2a3545a9..4b5ebd26f565 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -108,7 +108,10 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES); int skip_ioapic_setup; -void arch_disable_smp_support(void) +/** + * disable_ioapic_support() - disables ioapic support at runtime + */ +void disable_ioapic_support(void) { #ifdef CONFIG_PCI noioapicquirk = 1; @@ -120,11 +123,14 @@ void arch_disable_smp_support(void) static int __init parse_noapic(char *str) { /* disable IO-APIC */ - arch_disable_smp_support(); + disable_ioapic_support(); return 0; } early_param("noapic", parse_noapic); +static int io_apic_setup_irq_pin_once(unsigned int irq, int node, + struct io_apic_irq_attr *attr); + /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ void mp_save_irq(struct mpc_intsrc *m) { @@ -181,7 +187,7 @@ int __init arch_early_irq_init(void) irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs); for (i = 0; i < count; i++) { - set_irq_chip_data(i, &cfg[i]); + irq_set_chip_data(i, &cfg[i]); zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node); /* @@ -200,7 +206,7 @@ int __init arch_early_irq_init(void) #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg *irq_cfg(unsigned int irq) { - return get_irq_chip_data(irq); + return irq_get_chip_data(irq); } static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node) @@ -226,7 +232,7 @@ static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { if (!cfg) return; - set_irq_chip_data(at, NULL); + irq_set_chip_data(at, NULL); free_cpumask_var(cfg->domain); free_cpumask_var(cfg->old_domain); kfree(cfg); @@ -256,14 +262,14 @@ static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node) if (res < 0) { if (res != -EEXIST) return NULL; - cfg = get_irq_chip_data(at); + cfg = irq_get_chip_data(at); if (cfg) return cfg; } cfg = alloc_irq_cfg(at, node); if (cfg) - set_irq_chip_data(at, cfg); + irq_set_chip_data(at, cfg); else irq_free_desc(at); return cfg; @@ -818,7 +824,7 @@ static int EISA_ELCR(unsigned int irq) #define default_MCA_trigger(idx) (1) #define default_MCA_polarity(idx) default_ISA_polarity(idx) -static int MPBIOS_polarity(int idx) +static int irq_polarity(int idx) { int bus = mp_irqs[idx].srcbus; int polarity; @@ -860,7 +866,7 @@ static int MPBIOS_polarity(int idx) return polarity; } -static int MPBIOS_trigger(int idx) +static int irq_trigger(int idx) { int bus = mp_irqs[idx].srcbus; int trigger; @@ -932,16 +938,6 @@ static int MPBIOS_trigger(int idx) return trigger; } -static inline int irq_polarity(int idx) -{ - return MPBIOS_polarity(idx); -} - -static inline int irq_trigger(int idx) -{ - return MPBIOS_trigger(idx); -} - static int pin_2_irq(int idx, int apic, int pin) { int irq; @@ -1189,7 +1185,7 @@ void __setup_vector_irq(int cpu) raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { - cfg = get_irq_chip_data(irq); + cfg = irq_get_chip_data(irq); if (!cfg) continue; /* @@ -1220,10 +1216,6 @@ void __setup_vector_irq(int cpu) static struct irq_chip ioapic_chip; static struct irq_chip ir_ioapic_chip; -#define IOAPIC_AUTO -1 -#define IOAPIC_EDGE 0 -#define IOAPIC_LEVEL 1 - #ifdef CONFIG_X86_32 static inline int IO_APIC_irq_trigger(int irq) { @@ -1248,35 +1240,31 @@ static inline int IO_APIC_irq_trigger(int irq) } #endif -static void ioapic_register_intr(unsigned int irq, unsigned long trigger) +static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg, + unsigned long trigger) { + struct irq_chip *chip = &ioapic_chip; + irq_flow_handler_t hdl; + bool fasteoi; if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) + trigger == IOAPIC_LEVEL) { irq_set_status_flags(irq, IRQ_LEVEL); - else + fasteoi = true; + } else { irq_clear_status_flags(irq, IRQ_LEVEL); + fasteoi = false; + } - if (irq_remapped(get_irq_chip_data(irq))) { + if (irq_remapped(cfg)) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (trigger) - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ir_ioapic_chip, - handle_edge_irq, "edge"); - return; + chip = &ir_ioapic_chip; + fasteoi = trigger != 0; } - if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || - trigger == IOAPIC_LEVEL) - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_fasteoi_irq, - "fasteoi"); - else - set_irq_chip_and_handler_name(irq, &ioapic_chip, - handle_edge_irq, "edge"); + hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq; + irq_set_chip_and_handler_name(irq, chip, hdl, + fasteoi ? "fasteoi" : "edge"); } static int setup_ioapic_entry(int apic_id, int irq, @@ -1374,7 +1362,7 @@ static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq, return; } - ioapic_register_intr(irq, trigger); + ioapic_register_intr(irq, cfg, trigger); if (irq < legacy_pic->nr_legacy_irqs) legacy_pic->mask(irq); @@ -1385,33 +1373,26 @@ static struct { DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); } mp_ioapic_routing[MAX_IO_APICS]; -static void __init setup_IO_APIC_irqs(void) +static bool __init io_apic_pin_not_connected(int idx, int apic_id, int pin) { - int apic_id, pin, idx, irq, notcon = 0; - int node = cpu_to_node(0); - struct irq_cfg *cfg; + if (idx != -1) + return false; - apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + apic_printk(APIC_VERBOSE, KERN_DEBUG " apic %d pin %d not connected\n", + mp_ioapics[apic_id].apicid, pin); + return true; +} + +static void __init __io_apic_setup_irqs(unsigned int apic_id) +{ + int idx, node = cpu_to_node(0); + struct io_apic_irq_attr attr; + unsigned int pin, irq; - for (apic_id = 0; apic_id < nr_ioapics; apic_id++) for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { idx = find_irq_entry(apic_id, pin, mp_INT); - if (idx == -1) { - if (!notcon) { - notcon = 1; - apic_printk(APIC_VERBOSE, - KERN_DEBUG " %d-%d", - mp_ioapics[apic_id].apicid, pin); - } else - apic_printk(APIC_VERBOSE, " %d-%d", - mp_ioapics[apic_id].apicid, pin); + if (io_apic_pin_not_connected(idx, apic_id, pin)) continue; - } - if (notcon) { - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); - notcon = 0; - } irq = pin_2_irq(idx, apic_id, pin); @@ -1423,25 +1404,24 @@ static void __init setup_IO_APIC_irqs(void) * installed and if it returns 1: */ if (apic->multi_timer_check && - apic->multi_timer_check(apic_id, irq)) + apic->multi_timer_check(apic_id, irq)) continue; - cfg = alloc_irq_and_cfg_at(irq, node); - if (!cfg) - continue; + set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), + irq_polarity(idx)); - add_pin_to_irq_node(cfg, node, apic_id, pin); - /* - * don't mark it in pin_programmed, so later acpi could - * set it correctly when irq < 16 - */ - setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx), - irq_polarity(idx)); + io_apic_setup_irq_pin(irq, node, &attr); } +} - if (notcon) - apic_printk(APIC_VERBOSE, - " (apicid-pin) not connected\n"); +static void __init setup_IO_APIC_irqs(void) +{ + unsigned int apic_id; + + apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); + + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) + __io_apic_setup_irqs(apic_id); } /* @@ -1452,7 +1432,7 @@ static void __init setup_IO_APIC_irqs(void) void setup_IO_APIC_irq_extra(u32 gsi) { int apic_id = 0, pin, idx, irq, node = cpu_to_node(0); - struct irq_cfg *cfg; + struct io_apic_irq_attr attr; /* * Convert 'gsi' to 'ioapic.pin'. @@ -1472,21 +1452,10 @@ void setup_IO_APIC_irq_extra(u32 gsi) if (apic_id == 0 || irq < NR_IRQS_LEGACY) return; - cfg = alloc_irq_and_cfg_at(irq, node); - if (!cfg) - return; - - add_pin_to_irq_node(cfg, node, apic_id, pin); - - if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", - mp_ioapics[apic_id].apicid, pin); - return; - } - set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); + set_io_apic_irq_attr(&attr, apic_id, pin, irq_trigger(idx), + irq_polarity(idx)); - setup_ioapic_irq(apic_id, pin, irq, cfg, - irq_trigger(idx), irq_polarity(idx)); + io_apic_setup_irq_pin_once(irq, node, &attr); } /* @@ -1518,7 +1487,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, * The timer IRQ doesn't have to know that behind the * scene we may have a 8259A-master in AEOI mode ... */ - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); + irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, + "edge"); /* * Add it to the IO-APIC irq-routing table: @@ -1625,7 +1595,7 @@ __apicdebuginit(void) print_IO_APIC(void) for_each_active_irq(irq) { struct irq_pin_list *entry; - cfg = get_irq_chip_data(irq); + cfg = irq_get_chip_data(irq); if (!cfg) continue; entry = cfg->irq_2_pin; @@ -2391,7 +2361,7 @@ static void irq_complete_move(struct irq_cfg *cfg) void irq_force_complete_move(int irq) { - struct irq_cfg *cfg = get_irq_chip_data(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); if (!cfg) return; @@ -2405,7 +2375,7 @@ static inline void irq_complete_move(struct irq_cfg *cfg) { } static void ack_apic_edge(struct irq_data *data) { irq_complete_move(data->chip_data); - move_native_irq(data->irq); + irq_move_irq(data); ack_APIC_irq(); } @@ -2462,7 +2432,7 @@ static void ack_apic_level(struct irq_data *data) irq_complete_move(cfg); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { + if (unlikely(irqd_is_setaffinity_pending(data))) { do_unmask_irq = 1; mask_ioapic(cfg); } @@ -2551,7 +2521,7 @@ static void ack_apic_level(struct irq_data *data) * and you can go talk to the chipset vendor about it. */ if (!io_apic_level_ack_pending(cfg)) - move_masked_irq(irq); + irq_move_masked_irq(data); unmask_ioapic(cfg); } } @@ -2614,7 +2584,7 @@ static inline void init_IO_APIC_traps(void) * 0x80, because int 0x80 is hm, kind of importantish. ;) */ for_each_active_irq(irq) { - cfg = get_irq_chip_data(irq); + cfg = irq_get_chip_data(irq); if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { /* * Hmm.. We don't have an entry for this, @@ -2625,7 +2595,7 @@ static inline void init_IO_APIC_traps(void) legacy_pic->make_irq(irq); else /* Strange. Oh, well.. */ - set_irq_chip(irq, &no_irq_chip); + irq_set_chip(irq, &no_irq_chip); } } } @@ -2665,7 +2635,7 @@ static struct irq_chip lapic_chip __read_mostly = { static void lapic_register_intr(int irq) { irq_clear_status_flags(irq, IRQ_LEVEL); - set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, + irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, "edge"); } @@ -2749,7 +2719,7 @@ int timer_through_8259 __initdata; */ static inline void __init check_timer(void) { - struct irq_cfg *cfg = get_irq_chip_data(0); + struct irq_cfg *cfg = irq_get_chip_data(0); int node = cpu_to_node(0); int apic1, pin1, apic2, pin2; unsigned long flags; @@ -3060,7 +3030,7 @@ unsigned int create_irq_nr(unsigned int from, int node) raw_spin_unlock_irqrestore(&vector_lock, flags); if (ret) { - set_irq_chip_data(irq, cfg); + irq_set_chip_data(irq, cfg); irq_clear_status_flags(irq, IRQ_NOREQUEST); } else { free_irq_at(irq, cfg); @@ -3085,7 +3055,7 @@ int create_irq(void) void destroy_irq(unsigned int irq) { - struct irq_cfg *cfg = get_irq_chip_data(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); unsigned long flags; irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE); @@ -3119,7 +3089,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus()); - if (irq_remapped(get_irq_chip_data(irq))) { + if (irq_remapped(cfg)) { struct irte irte; int ir_index; u16 sub_handle; @@ -3291,6 +3261,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) { + struct irq_chip *chip = &msi_chip; struct msi_msg msg; int ret; @@ -3298,14 +3269,15 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) if (ret < 0) return ret; - set_irq_msi(irq, msidesc); + irq_set_msi_desc(irq, msidesc); write_msi_msg(irq, &msg); - if (irq_remapped(get_irq_chip_data(irq))) { + if (irq_remapped(irq_get_chip_data(irq))) { irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge"); - } else - set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); + chip = &msi_ir_chip; + } + + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq); @@ -3423,8 +3395,8 @@ int arch_setup_dmar_msi(unsigned int irq) if (ret < 0) return ret; dmar_msi_write(irq, &msg); - set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, - "edge"); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); return 0; } #endif @@ -3482,6 +3454,7 @@ static struct irq_chip hpet_msi_type = { int arch_setup_hpet_msi(unsigned int irq, unsigned int id) { + struct irq_chip *chip = &hpet_msi_type; struct msi_msg msg; int ret; @@ -3501,15 +3474,12 @@ int arch_setup_hpet_msi(unsigned int irq, unsigned int id) if (ret < 0) return ret; - hpet_msi_write(get_irq_data(irq), &msg); + hpet_msi_write(irq_get_handler_data(irq), &msg); irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - if (irq_remapped(get_irq_chip_data(irq))) - set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type, - handle_edge_irq, "edge"); - else - set_irq_chip_and_handler_name(irq, &hpet_msi_type, - handle_edge_irq, "edge"); + if (irq_remapped(irq_get_chip_data(irq))) + chip = &ir_hpet_msi_type; + irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge"); return 0; } #endif @@ -3596,7 +3566,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) write_ht_irq_msg(irq, &msg); - set_irq_chip_and_handler_name(irq, &ht_irq_chip, + irq_set_chip_and_handler_name(irq, &ht_irq_chip, handle_edge_irq, "edge"); dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq); @@ -3605,7 +3575,40 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -int __init io_apic_get_redir_entries (int ioapic) +int +io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr) +{ + struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node); + int ret; + + if (!cfg) + return -EINVAL; + ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin); + if (!ret) + setup_ioapic_irq(attr->ioapic, attr->ioapic_pin, irq, cfg, + attr->trigger, attr->polarity); + return ret; +} + +static int io_apic_setup_irq_pin_once(unsigned int irq, int node, + struct io_apic_irq_attr *attr) +{ + unsigned int id = attr->ioapic, pin = attr->ioapic_pin; + int ret; + + /* Avoid redundant programming */ + if (test_bit(pin, mp_ioapic_routing[id].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[id].apicid, pin); + return 0; + } + ret = io_apic_setup_irq_pin(irq, node, attr); + if (!ret) + set_bit(pin, mp_ioapic_routing[id].pin_programmed); + return ret; +} + +static int __init io_apic_get_redir_entries(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3659,96 +3662,24 @@ int __init arch_probe_nr_irqs(void) } #endif -static int __io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) +int io_apic_set_pci_routing(struct device *dev, int irq, + struct io_apic_irq_attr *irq_attr) { - struct irq_cfg *cfg; int node; - int ioapic, pin; - int trigger, polarity; - ioapic = irq_attr->ioapic; if (!IO_APIC_IRQ(irq)) { apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", - ioapic); + irq_attr->ioapic); return -EINVAL; } - if (dev) - node = dev_to_node(dev); - else - node = cpu_to_node(0); - - cfg = alloc_irq_and_cfg_at(irq, node); - if (!cfg) - return 0; - - pin = irq_attr->ioapic_pin; - trigger = irq_attr->trigger; - polarity = irq_attr->polarity; + node = dev ? dev_to_node(dev) : cpu_to_node(0); - /* - * IRQs < 16 are already in the irq_2_pin[] map - */ - if (irq >= legacy_pic->nr_legacy_irqs) { - if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) { - printk(KERN_INFO "can not add pin %d for irq %d\n", - pin, irq); - return 0; - } - } - - setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity); - - return 0; + return io_apic_setup_irq_pin_once(irq, node, irq_attr); } -int io_apic_set_pci_routing(struct device *dev, int irq, - struct io_apic_irq_attr *irq_attr) -{ - int ioapic, pin; - /* - * Avoid pin reprogramming. PRTs typically include entries - * with redundant pin->gsi mappings (but unique PCI devices); - * we only program the IOAPIC on the first. - */ - ioapic = irq_attr->ioapic; - pin = irq_attr->ioapic_pin; - if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { - pr_debug("Pin %d-%d already programmed\n", - mp_ioapics[ioapic].apicid, pin); - return 0; - } - set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); - - return __io_apic_set_pci_routing(dev, irq, irq_attr); -} - -u8 __init io_apic_unique_id(u8 id) -{ #ifdef CONFIG_X86_32 - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && - !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return io_apic_get_unique_id(nr_ioapics, id); - else - return id; -#else - int i; - DECLARE_BITMAP(used, 256); - - bitmap_zero(used, 256); - for (i = 0; i < nr_ioapics; i++) { - struct mpc_ioapic *ia = &mp_ioapics[i]; - __set_bit(ia->apicid, used); - } - if (!test_bit(id, used)) - return id; - return find_first_zero_bit(used, 256); -#endif -} - -#ifdef CONFIG_X86_32 -int __init io_apic_get_unique_id(int ioapic, int apic_id) +static int __init io_apic_get_unique_id(int ioapic, int apic_id) { union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; @@ -3821,9 +3752,33 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) return apic_id; } + +static u8 __init io_apic_unique_id(u8 id) +{ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return io_apic_get_unique_id(nr_ioapics, id); + else + return id; +} +#else +static u8 __init io_apic_unique_id(u8 id) +{ + int i; + DECLARE_BITMAP(used, 256); + + bitmap_zero(used, 256); + for (i = 0; i < nr_ioapics; i++) { + struct mpc_ioapic *ia = &mp_ioapics[i]; + __set_bit(ia->apicid, used); + } + if (!test_bit(id, used)) + return id; + return find_first_zero_bit(used, 256); +} #endif -int __init io_apic_get_version(int ioapic) +static int __init io_apic_get_version(int ioapic) { union IO_APIC_reg_01 reg_01; unsigned long flags; @@ -3868,8 +3823,8 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; - struct irq_desc *desc; const struct cpumask *mask; + struct irq_data *idata; if (skip_ioapic_setup == 1) return; @@ -3884,21 +3839,20 @@ void __init setup_ioapic_dest(void) if ((ioapic > 0) && (irq > 16)) continue; - desc = irq_to_desc(irq); + idata = irq_get_irq_data(irq); /* * Honour affinities which have been set in early boot */ - if (desc->status & - (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) - mask = desc->irq_data.affinity; + if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata)) + mask = idata->affinity; else mask = apic->target_cpus(); if (intr_remapping_enabled) - ir_ioapic_set_affinity(&desc->irq_data, mask, false); + ir_ioapic_set_affinity(idata, mask, false); else - ioapic_set_affinity(&desc->irq_data, mask, false); + ioapic_set_affinity(idata, mask, false); } } @@ -4026,7 +3980,7 @@ int mp_find_ioapic_pin(int ioapic, u32 gsi) return gsi - mp_gsi_routing[ioapic].gsi_base; } -static int bad_ioapic(unsigned long address) +static __init int bad_ioapic(unsigned long address) { if (nr_ioapics >= MAX_IO_APICS) { printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded " @@ -4086,20 +4040,16 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) /* Enable IOAPIC early just for system timer */ void __init pre_init_apic_IRQ0(void) { - struct irq_cfg *cfg; + struct io_apic_irq_attr attr = { 0, 0, 0, 0 }; printk(KERN_INFO "Early APIC setup for system timer0\n"); #ifndef CONFIG_SMP physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); #endif - /* Make sure the irq descriptor is set up */ - cfg = alloc_irq_and_cfg_at(0, 0); - setup_local_APIC(); - add_pin_to_irq_node(cfg, 0, 0, 0); - set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); - - setup_ioapic_irq(0, 0, 0, cfg, 0, 0); + io_apic_setup_irq_pin(0, 0, &attr); + irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, + "edge"); } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index cfa82c899f47..4f13fafc5264 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -1,5 +1,70 @@ +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed to extract + * and format the required data. + */ +#define COMPILE_OFFSETS + +#include <linux/crypto.h> +#include <linux/sched.h> +#include <linux/stddef.h> +#include <linux/hardirq.h> +#include <linux/suspend.h> +#include <linux/kbuild.h> +#include <asm/processor.h> +#include <asm/thread_info.h> +#include <asm/sigframe.h> +#include <asm/bootparam.h> +#include <asm/suspend.h> + +#ifdef CONFIG_XEN +#include <xen/interface/xen.h> +#endif + #ifdef CONFIG_X86_32 # include "asm-offsets_32.c" #else # include "asm-offsets_64.c" #endif + +void common(void) { + BLANK(); + OFFSET(TI_flags, thread_info, flags); + OFFSET(TI_status, thread_info, status); + OFFSET(TI_addr_limit, thread_info, addr_limit); + OFFSET(TI_preempt_count, thread_info, preempt_count); + + BLANK(); + OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); + + BLANK(); + OFFSET(pbe_address, pbe, address); + OFFSET(pbe_orig_address, pbe, orig_address); + OFFSET(pbe_next, pbe, next); + +#ifdef CONFIG_PARAVIRT + BLANK(); + OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); + OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); + OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); + OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); + OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); + OFFSET(PV_CPU_iret, pv_cpu_ops, iret); + OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); + OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); + OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); +#endif + +#ifdef CONFIG_XEN + BLANK(); + OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); + OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); +#endif + + BLANK(); + OFFSET(BP_scratch, boot_params, scratch); + OFFSET(BP_loadflags, boot_params, hdr.loadflags); + OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); + OFFSET(BP_version, boot_params, hdr.version); + OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); +} diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 1a4088dda37a..c29d631af6fc 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c @@ -1,26 +1,4 @@ -/* - * Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed - * to extract and format the required data. - */ - -#include <linux/crypto.h> -#include <linux/sched.h> -#include <linux/signal.h> -#include <linux/personality.h> -#include <linux/suspend.h> -#include <linux/kbuild.h> #include <asm/ucontext.h> -#include <asm/sigframe.h> -#include <asm/pgtable.h> -#include <asm/fixmap.h> -#include <asm/processor.h> -#include <asm/thread_info.h> -#include <asm/bootparam.h> -#include <asm/elf.h> -#include <asm/suspend.h> - -#include <xen/interface/xen.h> #include <linux/lguest.h> #include "../../../drivers/lguest/lg.h" @@ -51,21 +29,10 @@ void foo(void) OFFSET(CPUINFO_x86_vendor_id, cpuinfo_x86, x86_vendor_id); BLANK(); - OFFSET(TI_task, thread_info, task); - OFFSET(TI_exec_domain, thread_info, exec_domain); - OFFSET(TI_flags, thread_info, flags); - OFFSET(TI_status, thread_info, status); - OFFSET(TI_preempt_count, thread_info, preempt_count); - OFFSET(TI_addr_limit, thread_info, addr_limit); - OFFSET(TI_restart_block, thread_info, restart_block); OFFSET(TI_sysenter_return, thread_info, sysenter_return); OFFSET(TI_cpu, thread_info, cpu); BLANK(); - OFFSET(GDS_size, desc_ptr, size); - OFFSET(GDS_address, desc_ptr, address); - BLANK(); - OFFSET(PT_EBX, pt_regs, bx); OFFSET(PT_ECX, pt_regs, cx); OFFSET(PT_EDX, pt_regs, dx); @@ -85,42 +52,13 @@ void foo(void) OFFSET(PT_OLDSS, pt_regs, ss); BLANK(); - OFFSET(EXEC_DOMAIN_handler, exec_domain, handler); OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext); BLANK(); - OFFSET(pbe_address, pbe, address); - OFFSET(pbe_orig_address, pbe, orig_address); - OFFSET(pbe_next, pbe, next); - /* Offset from the sysenter stack to tss.sp0 */ DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) - sizeof(struct tss_struct)); - DEFINE(PAGE_SIZE_asm, PAGE_SIZE); - DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); - DEFINE(THREAD_SIZE_asm, THREAD_SIZE); - - OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); - -#ifdef CONFIG_PARAVIRT - BLANK(); - OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); - OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); - OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); - OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); - OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); - OFFSET(PV_CPU_iret, pv_cpu_ops, iret); - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); - OFFSET(PV_CPU_read_cr0, pv_cpu_ops, read_cr0); -#endif - -#ifdef CONFIG_XEN - BLANK(); - OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); - OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); -#endif - #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) BLANK(); OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); @@ -139,11 +77,4 @@ void foo(void) OFFSET(LGUEST_PAGES_regs_errcode, lguest_pages, regs.errcode); OFFSET(LGUEST_PAGES_regs, lguest_pages, regs); #endif - - BLANK(); - OFFSET(BP_scratch, boot_params, scratch); - OFFSET(BP_loadflags, boot_params, hdr.loadflags); - OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); - OFFSET(BP_version, boot_params, hdr.version); - OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 4a6aeedcd965..e72a1194af22 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -1,27 +1,4 @@ -/* - * Generate definitions needed by assembly language modules. - * This code generates raw asm output which is post-processed to extract - * and format the required data. - */ -#define COMPILE_OFFSETS - -#include <linux/crypto.h> -#include <linux/sched.h> -#include <linux/stddef.h> -#include <linux/errno.h> -#include <linux/hardirq.h> -#include <linux/suspend.h> -#include <linux/kbuild.h> -#include <asm/processor.h> -#include <asm/segment.h> -#include <asm/thread_info.h> #include <asm/ia32.h> -#include <asm/bootparam.h> -#include <asm/suspend.h> - -#include <xen/interface/xen.h> - -#include <asm/sigframe.h> #define __NO_STUBS 1 #undef __SYSCALL @@ -33,41 +10,19 @@ static char syscalls[] = { int main(void) { -#define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) - ENTRY(state); - ENTRY(flags); - ENTRY(pid); - BLANK(); -#undef ENTRY -#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry)) - ENTRY(flags); - ENTRY(addr_limit); - ENTRY(preempt_count); - ENTRY(status); -#ifdef CONFIG_IA32_EMULATION - ENTRY(sysenter_return); -#endif - BLANK(); -#undef ENTRY #ifdef CONFIG_PARAVIRT - BLANK(); - OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); - OFFSET(PARAVIRT_PATCH_pv_cpu_ops, paravirt_patch_template, pv_cpu_ops); - OFFSET(PARAVIRT_PATCH_pv_irq_ops, paravirt_patch_template, pv_irq_ops); - OFFSET(PV_IRQ_irq_disable, pv_irq_ops, irq_disable); - OFFSET(PV_IRQ_irq_enable, pv_irq_ops, irq_enable); OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); - OFFSET(PV_CPU_iret, pv_cpu_ops, iret); OFFSET(PV_CPU_usergs_sysret32, pv_cpu_ops, usergs_sysret32); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); - OFFSET(PV_CPU_irq_enable_sysexit, pv_cpu_ops, irq_enable_sysexit); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); - OFFSET(PV_MMU_read_cr2, pv_mmu_ops, read_cr2); + BLANK(); #endif - #ifdef CONFIG_IA32_EMULATION -#define ENTRY(entry) DEFINE(IA32_SIGCONTEXT_ ## entry, offsetof(struct sigcontext_ia32, entry)) + OFFSET(TI_sysenter_return, thread_info, sysenter_return); + BLANK(); + +#define ENTRY(entry) OFFSET(IA32_SIGCONTEXT_ ## entry, sigcontext_ia32, entry) ENTRY(ax); ENTRY(bx); ENTRY(cx); @@ -79,15 +34,12 @@ int main(void) ENTRY(ip); BLANK(); #undef ENTRY - DEFINE(IA32_RT_SIGFRAME_sigcontext, - offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); + + OFFSET(IA32_RT_SIGFRAME_sigcontext, rt_sigframe_ia32, uc.uc_mcontext); BLANK(); #endif - DEFINE(pbe_address, offsetof(struct pbe, address)); - DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); - DEFINE(pbe_next, offsetof(struct pbe, next)); - BLANK(); -#define ENTRY(entry) DEFINE(pt_regs_ ## entry, offsetof(struct pt_regs, entry)) + +#define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) ENTRY(bx); ENTRY(bx); ENTRY(cx); @@ -107,7 +59,8 @@ int main(void) ENTRY(flags); BLANK(); #undef ENTRY -#define ENTRY(entry) DEFINE(saved_context_ ## entry, offsetof(struct saved_context, entry)) + +#define ENTRY(entry) OFFSET(saved_context_ ## entry, saved_context, entry) ENTRY(cr0); ENTRY(cr2); ENTRY(cr3); @@ -115,26 +68,11 @@ int main(void) ENTRY(cr8); BLANK(); #undef ENTRY - DEFINE(TSS_ist, offsetof(struct tss_struct, x86_tss.ist)); - BLANK(); - DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); - BLANK(); - DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); + OFFSET(TSS_ist, tss_struct, x86_tss.ist); BLANK(); - OFFSET(BP_scratch, boot_params, scratch); - OFFSET(BP_loadflags, boot_params, hdr.loadflags); - OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); - OFFSET(BP_version, boot_params, hdr.version); - OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); - BLANK(); - DEFINE(PAGE_SIZE_asm, PAGE_SIZE); -#ifdef CONFIG_XEN - BLANK(); - OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); - OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); -#undef ENTRY -#endif + DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); + return 0; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a2559c3ed500..e2ced0074a45 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -675,7 +675,7 @@ void __init early_cpu_init(void) const struct cpu_dev *const *cdev; int count = 0; -#ifdef PROCESSOR_SELECT +#ifdef CONFIG_PROCESSOR_SELECT printk(KERN_INFO "KERNEL supported cpus:\n"); #endif @@ -687,7 +687,7 @@ void __init early_cpu_init(void) cpu_devs[count] = cpudev; count++; -#ifdef PROCESSOR_SELECT +#ifdef CONFIG_PROCESSOR_SELECT { unsigned int j; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 90cc675ac746..1ce1af2899df 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -768,11 +768,11 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) struct cpuinfo_x86 *c = &cpu_data(cpu); if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { - for_each_cpu(i, c->llc_shared_map) { + for_each_cpu(i, cpu_llc_shared_mask(cpu)) { if (!per_cpu(ici_cpuid4_info, i)) continue; this_leaf = CPUID4_INFO_IDX(i, index); - for_each_cpu(sibling, c->llc_shared_map) { + for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { if (!cpu_online(sibling)) continue; set_bit(sibling, this_leaf->shared_cpu_map); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5bf2fac52aca..167f97b5596e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -527,15 +527,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) int i, err = 0; struct threshold_bank *b = NULL; char name[32]; -#ifdef CONFIG_SMP - struct cpuinfo_x86 *c = &cpu_data(cpu); -#endif sprintf(name, "threshold_bank%i", bank); #ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ - i = cpumask_first(c->llc_shared_map); + i = cpumask_first(cpu_llc_shared_mask(cpu)); /* first core not up yet */ if (cpu_data(i).cpu_core_id) @@ -555,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) if (err) goto out; - cpumask_copy(b->cpus, c->llc_shared_map); + cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu)); per_cpu(threshold_banks, cpu)[bank] = b; goto out; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9d977a2ea693..26604188aa49 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -30,6 +30,7 @@ #include <asm/stacktrace.h> #include <asm/nmi.h> #include <asm/compat.h> +#include <asm/smp.h> #if 0 #undef wrmsrl @@ -93,6 +94,8 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; +struct intel_percore; + #define MAX_LBR_ENTRIES 16 struct cpu_hw_events { @@ -128,6 +131,13 @@ struct cpu_hw_events { struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; /* + * Intel percore register state. + * Coordinate shared resources between HT threads. + */ + int percore_used; /* Used by this CPU? */ + struct intel_percore *per_core; + + /* * AMD specific bits */ struct amd_nb *amd_nb; @@ -166,8 +176,10 @@ struct cpu_hw_events { /* * Constraint on the Event code + UMask */ -#define PEBS_EVENT_CONSTRAINT(c, n) \ +#define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define PEBS_EVENT_CONSTRAINT(c, n) \ + INTEL_UEVENT_CONSTRAINT(c, n) #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -175,6 +187,28 @@ struct cpu_hw_events { #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->weight; (e)++) +/* + * Extra registers for specific events. + * Some events need large masks and require external MSRs. + * Define a mapping to these extra registers. + */ +struct extra_reg { + unsigned int event; + unsigned int msr; + u64 config_mask; + u64 valid_mask; +}; + +#define EVENT_EXTRA_REG(e, ms, m, vm) { \ + .event = (e), \ + .msr = (ms), \ + .config_mask = (m), \ + .valid_mask = (vm), \ + } +#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm) +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0) + union perf_capabilities { struct { u64 lbr_format : 6; @@ -219,6 +253,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; + struct event_constraint *percore_constraints; void (*quirks)(void); int perfctr_second_write; @@ -247,6 +282,11 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ + + /* + * Extra registers for events + */ + struct extra_reg *extra_regs; }; static struct x86_pmu x86_pmu __read_mostly; @@ -271,6 +311,10 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +static u64 __read_mostly hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; /* * Propagate event elapsed time into the generic event. @@ -298,7 +342,7 @@ x86_perf_event_update(struct perf_event *event) */ again: prev_raw_count = local64_read(&hwc->prev_count); - rdmsrl(hwc->event_base + idx, new_raw_count); + rdmsrl(hwc->event_base, new_raw_count); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) @@ -321,6 +365,49 @@ again: return new_raw_count; } +/* using X86_FEATURE_PERFCTR_CORE to later implement ALTERNATIVE() here */ +static inline int x86_pmu_addr_offset(int index) +{ + if (boot_cpu_has(X86_FEATURE_PERFCTR_CORE)) + return index << 1; + return index; +} + +static inline unsigned int x86_pmu_config_addr(int index) +{ + return x86_pmu.eventsel + x86_pmu_addr_offset(index); +} + +static inline unsigned int x86_pmu_event_addr(int index) +{ + return x86_pmu.perfctr + x86_pmu_addr_offset(index); +} + +/* + * Find and validate any extra registers to set up. + */ +static int x86_pmu_extra_regs(u64 config, struct perf_event *event) +{ + struct extra_reg *er; + + event->hw.extra_reg = 0; + event->hw.extra_config = 0; + + if (!x86_pmu.extra_regs) + return 0; + + for (er = x86_pmu.extra_regs; er->msr; er++) { + if (er->event != (config & er->config_mask)) + continue; + if (event->attr.config1 & ~er->valid_mask) + return -EINVAL; + event->hw.extra_reg = er->msr; + event->hw.extra_config = event->attr.config1; + break; + } + return 0; +} + static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); @@ -331,12 +418,12 @@ static bool reserve_pmc_hardware(void) int i; for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) + if (!reserve_perfctr_nmi(x86_pmu_event_addr(i))) goto perfctr_fail; } for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) + if (!reserve_evntsel_nmi(x86_pmu_config_addr(i))) goto eventsel_fail; } @@ -344,13 +431,13 @@ static bool reserve_pmc_hardware(void) eventsel_fail: for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu.eventsel + i); + release_evntsel_nmi(x86_pmu_config_addr(i)); i = x86_pmu.num_counters; perfctr_fail: for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu.perfctr + i); + release_perfctr_nmi(x86_pmu_event_addr(i)); return false; } @@ -360,8 +447,8 @@ static void release_pmc_hardware(void) int i; for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu.perfctr + i); - release_evntsel_nmi(x86_pmu.eventsel + i); + release_perfctr_nmi(x86_pmu_event_addr(i)); + release_evntsel_nmi(x86_pmu_config_addr(i)); } } @@ -382,7 +469,7 @@ static bool check_hw_exists(void) * complain and bail. */ for (i = 0; i < x86_pmu.num_counters; i++) { - reg = x86_pmu.eventsel + i; + reg = x86_pmu_config_addr(i); ret = rdmsrl_safe(reg, &val); if (ret) goto msr_fail; @@ -407,8 +494,8 @@ static bool check_hw_exists(void) * that don't trap on the MSR access and always return 0s. */ val = 0xabcdUL; - ret = checking_wrmsrl(x86_pmu.perfctr, val); - ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new); + ret = checking_wrmsrl(x86_pmu_event_addr(0), val); + ret |= rdmsrl_safe(x86_pmu_event_addr(0), &val_new); if (ret || val != val_new) goto msr_fail; @@ -442,8 +529,9 @@ static inline int x86_pmu_initialized(void) } static inline int -set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) +set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) { + struct perf_event_attr *attr = &event->attr; unsigned int cache_type, cache_op, cache_result; u64 config, val; @@ -470,8 +558,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return -EINVAL; hwc->config |= val; - - return 0; + attr->config1 = hw_cache_extra_regs[cache_type][cache_op][cache_result]; + return x86_pmu_extra_regs(val, event); } static int x86_setup_perfctr(struct perf_event *event) @@ -496,10 +584,10 @@ static int x86_setup_perfctr(struct perf_event *event) } if (attr->type == PERF_TYPE_RAW) - return 0; + return x86_pmu_extra_regs(event->attr.config, event); if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); + return set_ext_hw_attr(hwc, event); if (attr->config >= x86_pmu.max_events) return -EINVAL; @@ -617,11 +705,11 @@ static void x86_pmu_disable_all(void) if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(x86_pmu.eventsel + idx, val); + rdmsrl(x86_pmu_config_addr(idx), val); if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) continue; val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu.eventsel + idx, val); + wrmsrl(x86_pmu_config_addr(idx), val); } } @@ -642,21 +730,26 @@ static void x86_pmu_disable(struct pmu *pmu) x86_pmu.disable_all(); } +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, + u64 enable_mask) +{ + if (hwc->extra_reg) + wrmsrl(hwc->extra_reg, hwc->extra_config); + wrmsrl(hwc->config_base, hwc->config | enable_mask); +} + static void x86_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; for (idx = 0; idx < x86_pmu.num_counters; idx++) { - struct perf_event *event = cpuc->events[idx]; - u64 val; + struct hw_perf_event *hwc = &cpuc->events[idx]->hw; if (!test_bit(idx, cpuc->active_mask)) continue; - val = event->hw.config; - val |= ARCH_PERFMON_EVENTSEL_ENABLE; - wrmsrl(x86_pmu.eventsel + idx, val); + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); } } @@ -821,15 +914,10 @@ static inline void x86_assign_hw_event(struct perf_event *event, hwc->event_base = 0; } else if (hwc->idx >= X86_PMC_IDX_FIXED) { hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that event_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->event_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0; } else { - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; + hwc->config_base = x86_pmu_config_addr(hwc->idx); + hwc->event_base = x86_pmu_event_addr(hwc->idx); } } @@ -915,17 +1003,11 @@ static void x86_pmu_enable(struct pmu *pmu) x86_pmu.enable_all(added); } -static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, - u64 enable_mask) -{ - wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask); -} - static inline void x86_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - wrmsrl(hwc->config_base + hwc->idx, hwc->config); + wrmsrl(hwc->config_base, hwc->config); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -978,7 +1060,7 @@ x86_perf_event_set_period(struct perf_event *event) */ local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask); + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Due to erratum on certan cpu we need @@ -986,7 +1068,7 @@ x86_perf_event_set_period(struct perf_event *event) * is updated properly */ if (x86_pmu.perfctr_second_write) { - wrmsrl(hwc->event_base + idx, + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); } @@ -1113,8 +1195,8 @@ void perf_event_print_debug(void) pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu.perfctr + idx, pmc_count); + rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl); + rdmsrl(x86_pmu_event_addr(idx), pmc_count); prev_left = per_cpu(pmc_prev_left[idx], cpu); @@ -1389,7 +1471,7 @@ static void __init pmu_check_apic(void) pr_info("no hardware sampling interrupt available.\n"); } -int __init init_hw_perf_events(void) +static int __init init_hw_perf_events(void) { struct event_constraint *c; int err; @@ -1608,7 +1690,7 @@ out: return ret; } -int x86_pmu_event_init(struct perf_event *event) +static int x86_pmu_event_init(struct perf_event *event) { struct pmu *tmp; int err; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 67e2202a6039..461f62bbd774 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -127,6 +127,11 @@ static int amd_pmu_hw_config(struct perf_event *event) /* * AMD64 events are detected based on their event codes. */ +static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) +{ + return ((hwc->config >> 24) & 0x0f00) | (hwc->config & 0x00ff); +} + static inline int amd_is_nb_event(struct hw_perf_event *hwc) { return (hwc->config & 0xe0) == 0xe0; @@ -385,13 +390,181 @@ static __initconst const struct x86_pmu amd_pmu = { .cpu_dead = amd_pmu_cpu_dead, }; +/* AMD Family 15h */ + +#define AMD_EVENT_TYPE_MASK 0x000000F0ULL + +#define AMD_EVENT_FP 0x00000000ULL ... 0x00000010ULL +#define AMD_EVENT_LS 0x00000020ULL ... 0x00000030ULL +#define AMD_EVENT_DC 0x00000040ULL ... 0x00000050ULL +#define AMD_EVENT_CU 0x00000060ULL ... 0x00000070ULL +#define AMD_EVENT_IC_DE 0x00000080ULL ... 0x00000090ULL +#define AMD_EVENT_EX_LS 0x000000C0ULL +#define AMD_EVENT_DE 0x000000D0ULL +#define AMD_EVENT_NB 0x000000E0ULL ... 0x000000F0ULL + +/* + * AMD family 15h event code/PMC mappings: + * + * type = event_code & 0x0F0: + * + * 0x000 FP PERF_CTL[5:3] + * 0x010 FP PERF_CTL[5:3] + * 0x020 LS PERF_CTL[5:0] + * 0x030 LS PERF_CTL[5:0] + * 0x040 DC PERF_CTL[5:0] + * 0x050 DC PERF_CTL[5:0] + * 0x060 CU PERF_CTL[2:0] + * 0x070 CU PERF_CTL[2:0] + * 0x080 IC/DE PERF_CTL[2:0] + * 0x090 IC/DE PERF_CTL[2:0] + * 0x0A0 --- + * 0x0B0 --- + * 0x0C0 EX/LS PERF_CTL[5:0] + * 0x0D0 DE PERF_CTL[2:0] + * 0x0E0 NB NB_PERF_CTL[3:0] + * 0x0F0 NB NB_PERF_CTL[3:0] + * + * Exceptions: + * + * 0x003 FP PERF_CTL[3] + * 0x00B FP PERF_CTL[3] + * 0x00D FP PERF_CTL[3] + * 0x023 DE PERF_CTL[2:0] + * 0x02D LS PERF_CTL[3] + * 0x02E LS PERF_CTL[3,0] + * 0x043 CU PERF_CTL[2:0] + * 0x045 CU PERF_CTL[2:0] + * 0x046 CU PERF_CTL[2:0] + * 0x054 CU PERF_CTL[2:0] + * 0x055 CU PERF_CTL[2:0] + * 0x08F IC PERF_CTL[0] + * 0x187 DE PERF_CTL[0] + * 0x188 DE PERF_CTL[0] + * 0x0DB EX PERF_CTL[5:0] + * 0x0DC LS PERF_CTL[5:0] + * 0x0DD LS PERF_CTL[5:0] + * 0x0DE LS PERF_CTL[5:0] + * 0x0DF LS PERF_CTL[5:0] + * 0x1D6 EX PERF_CTL[5:0] + * 0x1D8 EX PERF_CTL[5:0] + */ + +static struct event_constraint amd_f15_PMC0 = EVENT_CONSTRAINT(0, 0x01, 0); +static struct event_constraint amd_f15_PMC20 = EVENT_CONSTRAINT(0, 0x07, 0); +static struct event_constraint amd_f15_PMC3 = EVENT_CONSTRAINT(0, 0x08, 0); +static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT(0, 0x09, 0); +static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); +static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); + +static struct event_constraint * +amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + unsigned int event_code = amd_get_event_code(&event->hw); + + switch (event_code & AMD_EVENT_TYPE_MASK) { + case AMD_EVENT_FP: + switch (event_code) { + case 0x003: + case 0x00B: + case 0x00D: + return &amd_f15_PMC3; + default: + return &amd_f15_PMC53; + } + case AMD_EVENT_LS: + case AMD_EVENT_DC: + case AMD_EVENT_EX_LS: + switch (event_code) { + case 0x023: + case 0x043: + case 0x045: + case 0x046: + case 0x054: + case 0x055: + return &amd_f15_PMC20; + case 0x02D: + return &amd_f15_PMC3; + case 0x02E: + return &amd_f15_PMC30; + default: + return &amd_f15_PMC50; + } + case AMD_EVENT_CU: + case AMD_EVENT_IC_DE: + case AMD_EVENT_DE: + switch (event_code) { + case 0x08F: + case 0x187: + case 0x188: + return &amd_f15_PMC0; + case 0x0DB ... 0x0DF: + case 0x1D6: + case 0x1D8: + return &amd_f15_PMC50; + default: + return &amd_f15_PMC20; + } + case AMD_EVENT_NB: + /* not yet implemented */ + return &emptyconstraint; + default: + return &emptyconstraint; + } +} + +static __initconst const struct x86_pmu amd_pmu_f15h = { + .name = "AMD Family 15h", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .hw_config = amd_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_F15H_PERF_CTL, + .perfctr = MSR_F15H_PERF_CTR, + .event_map = amd_pmu_event_map, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_counters = 6, + .cntval_bits = 48, + .cntval_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = amd_get_event_constraints_f15h, + /* nortbridge counters not yet implemented: */ +#if 0 + .put_event_constraints = amd_put_event_constraints, + + .cpu_prepare = amd_pmu_cpu_prepare, + .cpu_starting = amd_pmu_cpu_starting, + .cpu_dead = amd_pmu_cpu_dead, +#endif +}; + static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ if (boot_cpu_data.x86 < 6) return -ENODEV; - x86_pmu = amd_pmu; + /* + * If core performance counter extensions exists, it must be + * family 15h, otherwise fail. See x86_pmu_addr_offset(). + */ + switch (boot_cpu_data.x86) { + case 0x15: + if (!cpu_has_perfctr_core) + return -ENODEV; + x86_pmu = amd_pmu_f15h; + break; + default: + if (cpu_has_perfctr_core) + return -ENODEV; + x86_pmu = amd_pmu; + break; + } /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 008835c1d79c..8fc2b2cee1da 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1,5 +1,27 @@ #ifdef CONFIG_CPU_SUP_INTEL +#define MAX_EXTRA_REGS 2 + +/* + * Per register state. + */ +struct er_account { + int ref; /* reference count */ + unsigned int extra_reg; /* extra MSR number */ + u64 extra_config; /* extra MSR config */ +}; + +/* + * Per core state + * This used to coordinate shared registers for HT threads. + */ +struct intel_percore { + raw_spinlock_t lock; /* protect structure */ + struct er_account regs[MAX_EXTRA_REGS]; + int refcnt; /* number of threads */ + unsigned core_id; +}; + /* * Intel PerfMon, used on Core and later. */ @@ -64,6 +86,18 @@ static struct event_constraint intel_nehalem_event_constraints[] = EVENT_CONSTRAINT_END }; +static struct extra_reg intel_nehalem_extra_regs[] = +{ + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_nehalem_percore_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xb7, 0), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_westmere_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -76,6 +110,33 @@ static struct event_constraint intel_westmere_event_constraints[] = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_snb_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */ + INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */ + INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */ + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */ + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + EVENT_CONSTRAINT_END +}; + +static struct extra_reg intel_westmere_extra_regs[] = +{ + INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0xffff), + INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0xffff), + EVENT_EXTRA_END +}; + +static struct event_constraint intel_westmere_percore_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xb7, 0), + INTEL_EVENT_CONSTRAINT(0xbb, 0), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_gen_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -89,6 +150,106 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } +static __initconst const u64 snb_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */ + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + /* + * TBD: Need Off-core Response Performance Monitoring support + */ + [ C(OP_READ) ] = { + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + [ C(OP_WRITE) ] = { + /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + [ C(OP_PREFETCH) ] = { + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */ + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + static __initconst const u64 westmere_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -124,16 +285,26 @@ static __initconst const u64 westmere_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + /* OFFCORE_RESPONSE_1.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01bb, + /* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01bb, }, }, [ C(DTLB) ] = { @@ -180,6 +351,39 @@ static __initconst const u64 westmere_hw_cache_event_ids }, }; +/* + * OFFCORE_RESPONSE MSR bits (subset), See IA32 SDM Vol 3 30.6.1.3 + */ + +#define DMND_DATA_RD (1 << 0) +#define DMND_RFO (1 << 1) +#define DMND_WB (1 << 3) +#define PF_DATA_RD (1 << 4) +#define PF_DATA_RFO (1 << 5) +#define RESP_UNCORE_HIT (1 << 8) +#define RESP_MISS (0xf600) /* non uncore hit */ + +static __initconst const u64 nehalem_hw_cache_extra_regs + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = DMND_DATA_RD|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = DMND_DATA_RD|RESP_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = DMND_RFO|DMND_WB|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = DMND_RFO|DMND_WB|RESP_MISS, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_UNCORE_HIT, + [ C(RESULT_MISS) ] = PF_DATA_RD|PF_DATA_RFO|RESP_MISS, + }, + } +}; + static __initconst const u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -215,16 +419,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids }, [ C(LL ) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, + /* + * Use RFO, not WRITEBACK, because a write miss would typically occur + * on RFO. + */ [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */ + [ C(RESULT_ACCESS) ] = 0x01b7, + /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */ + [ C(RESULT_MISS) ] = 0x01b7, }, }, [ C(DTLB) ] = { @@ -691,8 +905,8 @@ static void intel_pmu_reset(void) printk("clearing PMU state on CPU#%d\n", smp_processor_id()); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); + checking_wrmsrl(x86_pmu_config_addr(idx), 0ull); + checking_wrmsrl(x86_pmu_event_addr(idx), 0ull); } for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); @@ -794,6 +1008,67 @@ intel_bts_constraints(struct perf_event *event) } static struct event_constraint * +intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT; + struct event_constraint *c; + struct intel_percore *pc; + struct er_account *era; + int i; + int free_slot; + int found; + + if (!x86_pmu.percore_constraints || hwc->extra_alloc) + return NULL; + + for (c = x86_pmu.percore_constraints; c->cmask; c++) { + if (e != c->code) + continue; + + /* + * Allocate resource per core. + */ + pc = cpuc->per_core; + if (!pc) + break; + c = &emptyconstraint; + raw_spin_lock(&pc->lock); + free_slot = -1; + found = 0; + for (i = 0; i < MAX_EXTRA_REGS; i++) { + era = &pc->regs[i]; + if (era->ref > 0 && hwc->extra_reg == era->extra_reg) { + /* Allow sharing same config */ + if (hwc->extra_config == era->extra_config) { + era->ref++; + cpuc->percore_used = 1; + hwc->extra_alloc = 1; + c = NULL; + } + /* else conflict */ + found = 1; + break; + } else if (era->ref == 0 && free_slot == -1) + free_slot = i; + } + if (!found && free_slot != -1) { + era = &pc->regs[free_slot]; + era->ref = 1; + era->extra_reg = hwc->extra_reg; + era->extra_config = hwc->extra_config; + cpuc->percore_used = 1; + hwc->extra_alloc = 1; + c = NULL; + } + raw_spin_unlock(&pc->lock); + return c; + } + + return NULL; +} + +static struct event_constraint * intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { struct event_constraint *c; @@ -806,9 +1081,51 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event if (c) return c; + c = intel_percore_constraints(cpuc, event); + if (c) + return c; + return x86_get_event_constraints(cpuc, event); } +static void intel_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct extra_reg *er; + struct intel_percore *pc; + struct er_account *era; + struct hw_perf_event *hwc = &event->hw; + int i, allref; + + if (!cpuc->percore_used) + return; + + for (er = x86_pmu.extra_regs; er->msr; er++) { + if (er->event != (hwc->config & er->config_mask)) + continue; + + pc = cpuc->per_core; + raw_spin_lock(&pc->lock); + for (i = 0; i < MAX_EXTRA_REGS; i++) { + era = &pc->regs[i]; + if (era->ref > 0 && + era->extra_config == hwc->extra_config && + era->extra_reg == er->msr) { + era->ref--; + hwc->extra_alloc = 0; + break; + } + } + allref = 0; + for (i = 0; i < MAX_EXTRA_REGS; i++) + allref += pc->regs[i].ref; + if (allref == 0) + cpuc->percore_used = 0; + raw_spin_unlock(&pc->lock); + break; + } +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); @@ -880,20 +1197,67 @@ static __initconst const struct x86_pmu core_pmu = { */ .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, .event_constraints = intel_core_event_constraints, }; +static int intel_pmu_cpu_prepare(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + + if (!cpu_has_ht_siblings()) + return NOTIFY_OK; + + cpuc->per_core = kzalloc_node(sizeof(struct intel_percore), + GFP_KERNEL, cpu_to_node(cpu)); + if (!cpuc->per_core) + return NOTIFY_BAD; + + raw_spin_lock_init(&cpuc->per_core->lock); + cpuc->per_core->core_id = -1; + return NOTIFY_OK; +} + static void intel_pmu_cpu_starting(int cpu) { + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + int core_id = topology_core_id(cpu); + int i; + init_debug_store_on_cpu(cpu); /* * Deal with CPUs that don't clear their LBRs on power-up. */ intel_pmu_lbr_reset(); + + if (!cpu_has_ht_siblings()) + return; + + for_each_cpu(i, topology_thread_cpumask(cpu)) { + struct intel_percore *pc = per_cpu(cpu_hw_events, i).per_core; + + if (pc && pc->core_id == core_id) { + kfree(cpuc->per_core); + cpuc->per_core = pc; + break; + } + } + + cpuc->per_core->core_id = core_id; + cpuc->per_core->refcnt++; } static void intel_pmu_cpu_dying(int cpu) { + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct intel_percore *pc = cpuc->per_core; + + if (pc) { + if (pc->core_id == -1 || --pc->refcnt == 0) + kfree(pc); + cpuc->per_core = NULL; + } + fini_debug_store_on_cpu(cpu); } @@ -918,7 +1282,9 @@ static __initconst const struct x86_pmu intel_pmu = { */ .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, + .put_event_constraints = intel_put_event_constraints, + .cpu_prepare = intel_pmu_cpu_prepare, .cpu_starting = intel_pmu_cpu_starting, .cpu_dying = intel_pmu_cpu_dying, }; @@ -1024,6 +1390,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_core(); x86_pmu.event_constraints = intel_core2_event_constraints; + x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints; pr_cont("Core2 events, "); break; @@ -1032,11 +1399,16 @@ static __init int intel_pmu_init(void) case 46: /* 45 nm nehalem-ex, "Beckton" */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_nehalem_event_constraints; + x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints; + x86_pmu.percore_constraints = intel_nehalem_percore_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.extra_regs = intel_nehalem_extra_regs; pr_cont("Nehalem events, "); break; @@ -1047,6 +1419,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_atom(); x86_pmu.event_constraints = intel_gen_event_constraints; + x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints; pr_cont("Atom events, "); break; @@ -1054,14 +1427,30 @@ static __init int intel_pmu_init(void) case 44: /* 32 nm nehalem, "Gulftown" */ memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_westmere_event_constraints; + x86_pmu.percore_constraints = intel_westmere_percore_constraints; x86_pmu.enable_all = intel_pmu_nhm_enable_all; + x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints; + x86_pmu.extra_regs = intel_westmere_extra_regs; pr_cont("Westmere events, "); break; + case 42: /* SandyBridge */ + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + intel_pmu_lbr_init_nhm(); + + x86_pmu.event_constraints = intel_snb_event_constraints; + x86_pmu.pebs_constraints = intel_snb_pebs_events; + pr_cont("SandyBridge events, "); + break; + default: /* * default constraints for v2 and up diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b7dcd9f2b8a0..b95c66ae4a2a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -361,30 +361,88 @@ static int intel_pmu_drain_bts_buffer(void) /* * PEBS */ - -static struct event_constraint intel_core_pebs_events[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ +static struct event_constraint intel_core2_pebs_event_constraints[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ - PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ - PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ - PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_atom_pebs_event_constraints[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* MISPREDICTED_BRANCH_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED.* */ EVENT_CONSTRAINT_END }; -static struct event_constraint intel_nehalem_pebs_events[] = { - PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ - PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ - PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ - PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ - PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ - PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ - PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ +static struct event_constraint intel_nehalem_pebs_event_constraints[] = { + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INST_RETIRED.ANY */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_westmere_pebs_event_constraints[] = { + INTEL_EVENT_CONSTRAINT(0x0b, 0xf), /* MEM_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x010c, 0xf), /* MEM_STORE_RETIRED.DTLB_MISS */ + INTEL_EVENT_CONSTRAINT(0xc0, 0xf), /* INSTR_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc2, 0xf), /* UOPS_RETIRED.* */ + + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc5, 0xf), /* BR_MISP_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xc7, 0xf), /* SSEX_UOPS_RETIRED.* */ + PEBS_EVENT_CONSTRAINT(0x20c8, 0xf), /* ITLB_MISS_RETIRED */ + INTEL_EVENT_CONSTRAINT(0xcb, 0xf), /* MEM_LOAD_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xf7, 0xf), /* FP_ASSIST.* */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_snb_pebs_events[] = { + PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */ + PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */ + PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */ + PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */ + PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */ + PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */ + PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ + PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */ + PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */ + PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */ + PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */ + PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */ + PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */ + PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */ + PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */ + PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */ + PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */ + PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */ + PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */ + PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ + PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ + PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */ + PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ + PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ + PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ + PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */ + PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */ + PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; @@ -695,20 +753,17 @@ static void intel_ds_init(void) printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; - x86_pmu.pebs_constraints = intel_core_pebs_events; break; case 1: printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; - x86_pmu.pebs_constraints = intel_nehalem_pebs_events; break; default: printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; - break; } } } diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ff751a9f182b..3769ac822f96 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -764,9 +764,9 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) u64 v; /* an official way for overflow indication */ - rdmsrl(hwc->config_base + hwc->idx, v); + rdmsrl(hwc->config_base, v); if (v & P4_CCCR_OVF) { - wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF); + wrmsrl(hwc->config_base, v & ~P4_CCCR_OVF); return 1; } @@ -815,7 +815,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) * state we need to clear P4_CCCR_OVF, otherwise interrupt get * asserted again and again */ - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (void)checking_wrmsrl(hwc->config_base, (u64)(p4_config_unpack_cccr(hwc->config)) & ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } @@ -885,7 +885,7 @@ static void p4_pmu_enable_event(struct perf_event *event) p4_pmu_enable_pebs(hwc->config); (void)checking_wrmsrl(escr_addr, escr_conf); - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (void)checking_wrmsrl(hwc->config_base, (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 34ba07be2cda..20c097e33860 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -68,7 +68,7 @@ p6_pmu_disable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); + (void)checking_wrmsrl(hwc->config_base, val); } static void p6_pmu_enable_event(struct perf_event *event) @@ -81,7 +81,7 @@ static void p6_pmu_enable_event(struct perf_event *event) if (cpuc->enabled) val |= ARCH_PERFMON_EVENTSEL_ENABLE; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); + (void)checking_wrmsrl(hwc->config_base, val); } static __initconst const struct x86_pmu p6_pmu = { diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index d5a236615501..966512b2cacf 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -46,6 +46,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) /* returns the bit offset of the performance counter register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: + if (msr >= MSR_F15H_PERF_CTR) + return (msr - MSR_F15H_PERF_CTR) >> 1; return msr - MSR_K7_PERFCTR0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) @@ -70,6 +72,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) /* returns the bit offset of the event selection register */ switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: + if (msr >= MSR_F15H_PERF_CTL) + return (msr - MSR_F15H_PERF_CTL) >> 1; return msr - MSR_K7_EVNTSEL0; case X86_VENDOR_INTEL: if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index df20723a6a1b..220a1c11cfde 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -320,31 +320,6 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } -void notrace __kprobes -die_nmi(char *str, struct pt_regs *regs, int do_panic) -{ - unsigned long flags; - - if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - - /* - * We are in trouble anyway, lets at least try - * to get a message out. - */ - flags = oops_begin(); - printk(KERN_EMERG "%s", str); - printk(" on CPU%d, ip %08lx, registers:\n", - smp_processor_id(), regs->ip); - show_registers(regs); - oops_end(flags, regs, 0); - if (do_panic || panic_on_oops) - panic("Non maskable interrupt"); - nmi_exit(); - local_irq_enable(); - do_exit(SIGBUS); -} - static int __init oops_setup(char *s) { if (!s) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c8b4efad7ebb..fa41f7298c84 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -65,6 +65,8 @@ #define sysexit_audit syscall_exit_work #endif + .section .entry.text, "ax" + /* * We use macros for low-level operations which need to be overridden * for paravirtualization. The following will never clobber any registers: @@ -395,7 +397,7 @@ sysenter_past_esp: * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp) + pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) CFI_REL_OFFSET eip, 0 pushl_cfi %eax @@ -788,7 +790,7 @@ ENDPROC(ptregs_clone) */ .section .init.rodata,"a" ENTRY(interrupt) -.text +.section .entry.text, "ax" .p2align 5 .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) @@ -807,7 +809,7 @@ vector=FIRST_EXTERNAL_VECTOR .endif .previous .long 1b - .text + .section .entry.text, "ax" vector=vector+1 .endif .endr @@ -1409,8 +1411,7 @@ END(general_protection) #ifdef CONFIG_KVM_GUEST ENTRY(async_page_fault) RING0_EC_FRAME - pushl $do_async_page_fault - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi $do_async_page_fault jmp error_code CFI_ENDPROC END(apf_page_fault) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 891268c645ea..b72b4a6466a9 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -61,6 +61,8 @@ #define __AUDIT_ARCH_LE 0x40000000 .code64 + .section .entry.text, "ax" + #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -744,7 +746,7 @@ END(stub_rt_sigreturn) */ .section .init.rodata,"a" ENTRY(interrupt) - .text + .section .entry.text .p2align 5 .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) @@ -763,7 +765,7 @@ vector=FIRST_EXTERNAL_VECTOR .endif .previous .quad 1b - .text + .section .entry.text vector=vector+1 .endif .endr @@ -1251,7 +1253,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) decl PER_CPU_VAR(irq_count) jmp error_exit CFI_ENDPROC -END(do_hypervisor_callback) +END(xen_do_hypervisor_callback) /* * Hypervisor uses this for application faults while it executes. diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 382eb2936d4d..a93742a57468 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -437,18 +437,19 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, return; } - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer) == -EBUSY) { - *parent = old; - return; - } - trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; /* Only trace if the calling function expects to */ if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; *parent = old; + return; + } + + if (ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer) == -EBUSY) { + *parent = old; + return; } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 767d6c43de37..187aa63b321f 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -73,7 +73,7 @@ MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT */ KERNEL_PAGES = LOWMEM_PAGES -INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm +INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE RESERVE_BRK(pagetables, INIT_MAP_SIZE) /* @@ -623,7 +623,7 @@ ENTRY(initial_code) * BSS section */ __PAGE_ALIGNED_BSS - .align PAGE_SIZE_asm + .align PAGE_SIZE #ifdef CONFIG_X86_PAE initial_pg_pmd: .fill 1024*KPMDS,4,0 @@ -644,7 +644,7 @@ ENTRY(swapper_pg_dir) #ifdef CONFIG_X86_PAE __PAGE_ALIGNED_DATA /* Page-aligned for the benefit of paravirt? */ - .align PAGE_SIZE_asm + .align PAGE_SIZE ENTRY(initial_page_table) .long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */ # if KPMDS == 3 @@ -662,7 +662,7 @@ ENTRY(initial_page_table) # else # error "Kernel PMDs should be 1, 2 or 3" # endif - .align PAGE_SIZE_asm /* needs to be page-sized too */ + .align PAGE_SIZE /* needs to be page-sized too */ #endif .data diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 4ff5968f12d2..bfe8f729e086 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -503,7 +503,7 @@ static int hpet_assign_irq(struct hpet_dev *dev) if (!irq) return -EINVAL; - set_irq_data(irq, dev); + irq_set_handler_data(irq, dev); if (hpet_setup_msi_irq(irq)) return -EINVAL; diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 20757cb2efa3..d9ca749c123b 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -112,7 +112,7 @@ static void make_8259A_irq(unsigned int irq) { disable_irq_nosync(irq); io_apic_irqs &= ~(1<<irq); - set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, + irq_set_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq, i8259A_chip.name); enable_irq(irq); } diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 8eec0ec59af2..8c968974253d 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,22 +14,9 @@ #include <linux/slab.h> #include <linux/thread_info.h> #include <linux/syscalls.h> +#include <linux/bitmap.h> #include <asm/syscalls.h> -/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ -static void set_bitmap(unsigned long *bitmap, unsigned int base, - unsigned int extent, int new_value) -{ - unsigned int i; - - for (i = base; i < base + extent; i++) { - if (new_value) - __set_bit(i, bitmap); - else - __clear_bit(i, bitmap); - } -} - /* * this changes the io permissions bitmap in the current task. */ @@ -69,7 +56,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) */ tss = &per_cpu(init_tss, get_cpu()); - set_bitmap(t->io_bitmap_ptr, from, num, !turn_on); + if (turn_on) + bitmap_clear(t->io_bitmap_ptr, from, num); + else + bitmap_set(t->io_bitmap_ptr, from, num); /* * Search for a (possibly new) maximum. This is simple and stupid, diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 387b6a0c9e81..5ee693faa111 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -44,9 +44,9 @@ void ack_bad_irq(unsigned int irq) #define irq_stats(x) (&per_cpu(irq_stat, x)) /* - * /proc/interrupts printing: + * /proc/interrupts printing for arch specific interrupts */ -static int show_other_interrupts(struct seq_file *p, int prec) +int arch_show_interrupts(struct seq_file *p, int prec) { int j; @@ -122,59 +122,6 @@ static int show_other_interrupts(struct seq_file *p, int prec) return 0; } -int show_interrupts(struct seq_file *p, void *v) -{ - unsigned long flags, any_count = 0; - int i = *(loff_t *) v, j, prec; - struct irqaction *action; - struct irq_desc *desc; - - if (i > nr_irqs) - return 0; - - for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec) - j *= 10; - - if (i == nr_irqs) - return show_other_interrupts(p, prec); - - /* print header */ - if (i == 0) { - seq_printf(p, "%*s", prec + 8, ""); - for_each_online_cpu(j) - seq_printf(p, "CPU%-8d", j); - seq_putc(p, '\n'); - } - - desc = irq_to_desc(i); - if (!desc) - return 0; - - raw_spin_lock_irqsave(&desc->lock, flags); - for_each_online_cpu(j) - any_count |= kstat_irqs_cpu(i, j); - action = desc->action; - if (!action && !any_count) - goto out; - - seq_printf(p, "%*d: ", prec, i); - for_each_online_cpu(j) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, j)); - seq_printf(p, " %8s", desc->irq_data.chip->name); - seq_printf(p, "-%-8s", desc->name); - - if (action) { - seq_printf(p, " %s", action->name); - while ((action = action->next) != NULL) - seq_printf(p, ", %s", action->name); - } - - seq_putc(p, '\n'); -out: - raw_spin_unlock_irqrestore(&desc->lock, flags); - return 0; -} - /* * /proc/stat helpers */ @@ -293,6 +240,7 @@ void fixup_irqs(void) static int warned; struct irq_desc *desc; struct irq_data *data; + struct irq_chip *chip; for_each_irq_desc(irq, desc) { int break_affinity = 0; @@ -307,10 +255,10 @@ void fixup_irqs(void) /* interrupt's are disabled at this point */ raw_spin_lock(&desc->lock); - data = &desc->irq_data; + data = irq_desc_get_irq_data(desc); affinity = data->affinity; if (!irq_has_action(irq) || - cpumask_equal(affinity, cpu_online_mask)) { + cpumask_subset(affinity, cpu_online_mask)) { raw_spin_unlock(&desc->lock); continue; } @@ -327,16 +275,17 @@ void fixup_irqs(void) affinity = cpu_all_mask; } - if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask) - data->chip->irq_mask(data); + chip = irq_data_get_irq_chip(data); + if (!irqd_can_move_in_process_context(data) && chip->irq_mask) + chip->irq_mask(data); - if (data->chip->irq_set_affinity) - data->chip->irq_set_affinity(data, affinity, true); + if (chip->irq_set_affinity) + chip->irq_set_affinity(data, affinity, true); else if (!(warned++)) set_affinity = 0; - if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask) - data->chip->irq_unmask(data); + if (!irqd_can_move_in_process_context(data) && chip->irq_unmask) + chip->irq_unmask(data); raw_spin_unlock(&desc->lock); @@ -368,10 +317,11 @@ void fixup_irqs(void) irq = __this_cpu_read(vector_irq[vector]); desc = irq_to_desc(irq); - data = &desc->irq_data; + data = irq_desc_get_irq_data(desc); + chip = irq_data_get_irq_chip(data); raw_spin_lock(&desc->lock); - if (data->chip->irq_retrigger) - data->chip->irq_retrigger(data); + if (chip->irq_retrigger) + chip->irq_retrigger(data); raw_spin_unlock(&desc->lock); } } diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 7aad10a63e04..d30854b18d25 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -71,6 +71,7 @@ static irqreturn_t math_error_irq(int cpl, void *dev_id) static struct irqaction fpu_irq = { .handler = math_error_irq, .name = "fpu", + .flags = IRQF_NO_THREAD, }; #endif @@ -80,6 +81,7 @@ static struct irqaction fpu_irq = { static struct irqaction irq2 = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { @@ -110,7 +112,7 @@ void __init init_ISA_irqs(void) legacy_pic->init(0); for (i = 0; i < legacy_pic->nr_legacy_irqs; i++) - set_irq_chip_and_handler_name(i, chip, handle_level_irq, name); + irq_set_chip_and_handler_name(i, chip, handle_level_irq, name); } void __init init_IRQ(void) diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index a4130005028a..7c64c420a9f6 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -533,15 +533,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMIWATCHDOG: - if (atomic_read(&kgdb_active) != -1) { - /* KGDB CPU roundup: */ - kgdb_nmicallback(raw_smp_processor_id(), regs); - return NOTIFY_STOP; - } - /* Enter debugger: */ - break; - case DIE_DEBUG: if (atomic_read(&kgdb_cpu_doing_single_step) != -1) { if (user_mode(regs)) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index d91c477b3f62..c969fd9d1566 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -1276,6 +1276,14 @@ static int __kprobes can_optimize(unsigned long paddr) if (!kallsyms_lookup_size_offset(paddr, &size, &offset)) return 0; + /* + * Do not optimize in the entry code due to the unstable + * stack handling. + */ + if ((paddr >= (unsigned long )__entry_text_start) && + (paddr < (unsigned long )__entry_text_end)) + return 0; + /* Check there is enough space for a relative jump. */ if (size - offset < RELATIVEJUMP_SIZE) return 0; diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 0fe6d1a66c38..c5610384ab16 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -66,7 +66,6 @@ struct microcode_amd { unsigned int mpb[0]; }; -#define UCODE_MAX_SIZE 2048 #define UCODE_CONTAINER_SECTION_HDR 8 #define UCODE_CONTAINER_HEADER_SIZE 12 @@ -77,20 +76,20 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) struct cpuinfo_x86 *c = &cpu_data(cpu); u32 dummy; - memset(csig, 0, sizeof(*csig)); if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { - pr_warning("microcode: CPU%d: AMD CPU family 0x%x not " - "supported\n", cpu, c->x86); + pr_warning("CPU%d: family %d not supported\n", cpu, c->x86); return -1; } + rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); - pr_info("CPU%d: patch_level=0x%x\n", cpu, csig->rev); + pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); + return 0; } -static int get_matching_microcode(int cpu, void *mc, int rev) +static int get_matching_microcode(int cpu, struct microcode_header_amd *mc_hdr, + int rev) { - struct microcode_header_amd *mc_header = mc; unsigned int current_cpu_id; u16 equiv_cpu_id = 0; unsigned int i = 0; @@ -109,17 +108,17 @@ static int get_matching_microcode(int cpu, void *mc, int rev) if (!equiv_cpu_id) return 0; - if (mc_header->processor_rev_id != equiv_cpu_id) + if (mc_hdr->processor_rev_id != equiv_cpu_id) return 0; /* ucode might be chipset specific -- currently we don't support this */ - if (mc_header->nb_dev_id || mc_header->sb_dev_id) { - pr_err("CPU%d: loading of chipset specific code not yet supported\n", + if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) { + pr_err("CPU%d: chipset specific code not yet supported\n", cpu); return 0; } - if (mc_header->patch_id <= rev) + if (mc_hdr->patch_id <= rev) return 0; return 1; @@ -144,71 +143,93 @@ static int apply_microcode_amd(int cpu) /* check current patch id and patch's id for match */ if (rev != mc_amd->hdr.patch_id) { - pr_err("CPU%d: update failed (for patch_level=0x%x)\n", + pr_err("CPU%d: update failed for patch_level=0x%08x\n", cpu, mc_amd->hdr.patch_id); return -1; } - pr_info("CPU%d: updated (new patch_level=0x%x)\n", cpu, rev); + pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); uci->cpu_sig.rev = rev; return 0; } -static void * -get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size) +static unsigned int verify_ucode_size(int cpu, const u8 *buf, unsigned int size) { - unsigned int total_size; - u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; - void *mc; + struct cpuinfo_x86 *c = &cpu_data(cpu); + unsigned int max_size, actual_size; + +#define F1XH_MPB_MAX_SIZE 2048 +#define F14H_MPB_MAX_SIZE 1824 +#define F15H_MPB_MAX_SIZE 4096 + + switch (c->x86) { + case 0x14: + max_size = F14H_MPB_MAX_SIZE; + break; + case 0x15: + max_size = F15H_MPB_MAX_SIZE; + break; + default: + max_size = F1XH_MPB_MAX_SIZE; + break; + } - get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR); + actual_size = buf[4] + (buf[5] << 8); - if (section_hdr[0] != UCODE_UCODE_TYPE) { - pr_err("error: invalid type field in container file section header\n"); - return NULL; + if (actual_size > size || actual_size > max_size) { + pr_err("section size mismatch\n"); + return 0; } - total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); + return actual_size; +} - if (total_size > size || total_size > UCODE_MAX_SIZE) { - pr_err("error: size mismatch\n"); - return NULL; +static struct microcode_header_amd * +get_next_ucode(int cpu, const u8 *buf, unsigned int size, unsigned int *mc_size) +{ + struct microcode_header_amd *mc = NULL; + unsigned int actual_size = 0; + + if (buf[0] != UCODE_UCODE_TYPE) { + pr_err("invalid type field in container file section header\n"); + goto out; } - mc = vzalloc(UCODE_MAX_SIZE); + actual_size = verify_ucode_size(cpu, buf, size); + if (!actual_size) + goto out; + + mc = vzalloc(actual_size); if (!mc) - return NULL; + goto out; - get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size); - *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; + get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, actual_size); + *mc_size = actual_size + UCODE_CONTAINER_SECTION_HDR; +out: return mc; } static int install_equiv_cpu_table(const u8 *buf) { - u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; - unsigned int *buf_pos = (unsigned int *)container_hdr; - unsigned long size; - - get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE); - - size = buf_pos[2]; - - if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { - pr_err("error: invalid type field in container file section header\n"); - return 0; + unsigned int *ibuf = (unsigned int *)buf; + unsigned int type = ibuf[1]; + unsigned int size = ibuf[2]; + + if (type != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { + pr_err("empty section/" + "invalid type field in container file section header\n"); + return -EINVAL; } equiv_cpu_table = vmalloc(size); if (!equiv_cpu_table) { pr_err("failed to allocate equivalent CPU table\n"); - return 0; + return -ENOMEM; } - buf += UCODE_CONTAINER_HEADER_SIZE; - get_ucode_data(equiv_cpu_table, buf, size); + get_ucode_data(equiv_cpu_table, buf + UCODE_CONTAINER_HEADER_SIZE, size); return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ } @@ -223,16 +244,16 @@ static enum ucode_state generic_load_microcode(int cpu, const u8 *data, size_t size) { struct ucode_cpu_info *uci = ucode_cpu_info + cpu; + struct microcode_header_amd *mc_hdr = NULL; + unsigned int mc_size, leftover; + int offset; const u8 *ucode_ptr = data; void *new_mc = NULL; - void *mc; - int new_rev = uci->cpu_sig.rev; - unsigned int leftover; - unsigned long offset; + unsigned int new_rev = uci->cpu_sig.rev; enum ucode_state state = UCODE_OK; offset = install_equiv_cpu_table(ucode_ptr); - if (!offset) { + if (offset < 0) { pr_err("failed to create equivalent cpu table\n"); return UCODE_ERROR; } @@ -241,64 +262,65 @@ generic_load_microcode(int cpu, const u8 *data, size_t size) leftover = size - offset; while (leftover) { - unsigned int uninitialized_var(mc_size); - struct microcode_header_amd *mc_header; - - mc = get_next_ucode(ucode_ptr, leftover, &mc_size); - if (!mc) + mc_hdr = get_next_ucode(cpu, ucode_ptr, leftover, &mc_size); + if (!mc_hdr) break; - mc_header = (struct microcode_header_amd *)mc; - if (get_matching_microcode(cpu, mc, new_rev)) { + if (get_matching_microcode(cpu, mc_hdr, new_rev)) { vfree(new_mc); - new_rev = mc_header->patch_id; - new_mc = mc; + new_rev = mc_hdr->patch_id; + new_mc = mc_hdr; } else - vfree(mc); + vfree(mc_hdr); ucode_ptr += mc_size; leftover -= mc_size; } - if (new_mc) { - if (!leftover) { - vfree(uci->mc); - uci->mc = new_mc; - pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", - cpu, new_rev, uci->cpu_sig.rev); - } else { - vfree(new_mc); - state = UCODE_ERROR; - } - } else + if (!new_mc) { state = UCODE_NFOUND; + goto free_table; + } + if (!leftover) { + vfree(uci->mc); + uci->mc = new_mc; + pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n", + cpu, uci->cpu_sig.rev, new_rev); + } else { + vfree(new_mc); + state = UCODE_ERROR; + } + +free_table: free_equiv_cpu_table(); return state; } -static enum ucode_state request_microcode_fw(int cpu, struct device *device) +static enum ucode_state request_microcode_amd(int cpu, struct device *device) { const char *fw_name = "amd-ucode/microcode_amd.bin"; - const struct firmware *firmware; - enum ucode_state ret; + const struct firmware *fw; + enum ucode_state ret = UCODE_NFOUND; - if (request_firmware(&firmware, fw_name, device)) { - printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); - return UCODE_NFOUND; + if (request_firmware(&fw, fw_name, device)) { + pr_err("failed to load file %s\n", fw_name); + goto out; } - if (*(u32 *)firmware->data != UCODE_MAGIC) { - pr_err("invalid UCODE_MAGIC (0x%08x)\n", - *(u32 *)firmware->data); - return UCODE_ERROR; + ret = UCODE_ERROR; + if (*(u32 *)fw->data != UCODE_MAGIC) { + pr_err("invalid magic value (0x%08x)\n", *(u32 *)fw->data); + goto fw_release; } - ret = generic_load_microcode(cpu, firmware->data, firmware->size); + ret = generic_load_microcode(cpu, fw->data, fw->size); - release_firmware(firmware); +fw_release: + release_firmware(fw); +out: return ret; } @@ -319,7 +341,7 @@ static void microcode_fini_cpu_amd(int cpu) static struct microcode_ops microcode_amd_ops = { .request_microcode_user = request_microcode_user, - .request_microcode_fw = request_microcode_fw, + .request_microcode_fw = request_microcode_amd, .collect_cpu_info = collect_cpu_info_amd, .apply_microcode = apply_microcode_amd, .microcode_fini_cpu = microcode_fini_cpu_amd, diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 1cca374a2bac..87af68e0e1e1 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -417,8 +417,10 @@ static int mc_sysdev_add(struct sys_device *sys_dev) if (err) return err; - if (microcode_init_cpu(cpu) == UCODE_ERROR) - err = -EINVAL; + if (microcode_init_cpu(cpu) == UCODE_ERROR) { + sysfs_remove_group(&sys_dev->kobj, &mc_attr_group); + return -EINVAL; + } return err; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ff4554198981..99fa3adf0141 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -110,12 +110,9 @@ void show_regs_common(void) init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); - printk(KERN_CONT " "); - printk(KERN_CONT "%s %s", vendor, product); - if (board) { - printk(KERN_CONT "/"); - printk(KERN_CONT "%s", board); - } + printk(KERN_CONT " %s %s", vendor, product); + if (board) + printk(KERN_CONT "/%s", board); printk(KERN_CONT "\n"); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a9d805f3920c..e9efdfd51c8d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -64,6 +64,7 @@ #include <asm/mtrr.h> #include <asm/mwait.h> #include <asm/apic.h> +#include <asm/io_apic.h> #include <asm/setup.h> #include <asm/uv/uv.h> #include <linux/mc146818rtc.h> @@ -126,6 +127,8 @@ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); DEFINE_PER_CPU(cpumask_var_t, cpu_core_map); EXPORT_PER_CPU_SYMBOL(cpu_core_map); +DEFINE_PER_CPU(cpumask_var_t, cpu_llc_shared_map); + /* Per CPU bogomips and other parameters */ DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); @@ -294,23 +297,6 @@ notrace static void __cpuinit start_secondary(void *unused) cpu_idle(); } -#ifdef CONFIG_CPUMASK_OFFSTACK -/* In this case, llc_shared_map is a pointer to a cpumask. */ -static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, - const struct cpuinfo_x86 *src) -{ - struct cpumask *llc = dst->llc_shared_map; - *dst = *src; - dst->llc_shared_map = llc; -} -#else -static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst, - const struct cpuinfo_x86 *src) -{ - *dst = *src; -} -#endif /* CONFIG_CPUMASK_OFFSTACK */ - /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -320,7 +306,7 @@ void __cpuinit smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = &cpu_data(id); - copy_cpuinfo_x86(c, &boot_cpu_data); + *c = boot_cpu_data; c->cpu_index = id; if (id != 0) identify_secondary_cpu(c); @@ -328,15 +314,12 @@ void __cpuinit smp_store_cpu_info(int id) static void __cpuinit link_thread_siblings(int cpu1, int cpu2) { - struct cpuinfo_x86 *c1 = &cpu_data(cpu1); - struct cpuinfo_x86 *c2 = &cpu_data(cpu2); - cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); - cpumask_set_cpu(cpu1, c2->llc_shared_map); - cpumask_set_cpu(cpu2, c1->llc_shared_map); + cpumask_set_cpu(cpu1, cpu_llc_shared_mask(cpu2)); + cpumask_set_cpu(cpu2, cpu_llc_shared_mask(cpu1)); } @@ -365,7 +348,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); } - cpumask_set_cpu(cpu, c->llc_shared_map); + cpumask_set_cpu(cpu, cpu_llc_shared_mask(cpu)); if (__this_cpu_read(cpu_info.x86_max_cores) == 1) { cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu)); @@ -376,8 +359,8 @@ void __cpuinit set_cpu_sibling_map(int cpu) for_each_cpu(i, cpu_sibling_setup_mask) { if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { - cpumask_set_cpu(i, c->llc_shared_map); - cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map); + cpumask_set_cpu(i, cpu_llc_shared_mask(cpu)); + cpumask_set_cpu(cpu, cpu_llc_shared_mask(i)); } if (c->phys_proc_id == cpu_data(i).phys_proc_id) { cpumask_set_cpu(i, cpu_core_mask(cpu)); @@ -416,7 +399,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu) !(cpu_has(c, X86_FEATURE_AMD_DCM))) return cpu_core_mask(cpu); else - return c->llc_shared_map; + return cpu_llc_shared_mask(cpu); } static void impress_friends(void) @@ -885,6 +868,14 @@ int __cpuinit native_cpu_up(unsigned int cpu) return 0; } +/** + * arch_disable_smp_support() - disables SMP support for x86 at runtime + */ +void arch_disable_smp_support(void) +{ + disable_ioapic_support(); +} + /* * Fall back to non SMP mode after errors. * @@ -984,7 +975,7 @@ static int __init smp_sanity_check(unsigned max_cpus) "(tell your hw vendor)\n"); } smpboot_clear_io_apic(); - arch_disable_smp_support(); + disable_ioapic_support(); return -1; } @@ -1028,19 +1019,19 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) preempt_disable(); smp_cpu_index_default(); - memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info)); - cpumask_copy(cpu_callin_mask, cpumask_of(0)); - mb(); + /* * Setup boot CPU information */ smp_store_cpu_info(0); /* Final full version of the data */ + cpumask_copy(cpu_callin_mask, cpumask_of(0)); + mb(); current_thread_info()->cpu = 0; /* needed? */ for_each_possible_cpu(i) { zalloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL); zalloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL); - zalloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL); + zalloc_cpumask_var(&per_cpu(cpu_llc_shared_map, i), GFP_KERNEL); } set_cpu_sibling_map(0); diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index b35786dc9b8f..5f181742e8f9 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -340,3 +340,6 @@ ENTRY(sys_call_table) .long sys_fanotify_init .long sys_fanotify_mark .long sys_prlimit64 /* 340 */ + .long sys_name_to_handle_at + .long sys_open_by_handle_at + .long sys_clock_adjtime diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index bf4700755184..0381e1f3baed 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -105,6 +105,7 @@ SECTIONS SCHED_TEXT LOCK_TEXT KPROBES_TEXT + ENTRY_TEXT IRQENTRY_TEXT *(.fixup) *(.gnu.warning) @@ -305,7 +306,7 @@ SECTIONS } #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP) - PERCPU(THREAD_SIZE) + PERCPU(PAGE_SIZE) #endif . = ALIGN(PAGE_SIZE); diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 1b950d151e58..9796c2f3d074 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -52,6 +52,7 @@ extern void *__memcpy(void *, const void *, __kernel_size_t); EXPORT_SYMBOL(memset); EXPORT_SYMBOL(memcpy); EXPORT_SYMBOL(__memcpy); +EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(empty_zero_page); #ifndef CONFIG_PARAVIRT diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 1357d7cf4ec8..db932760ea82 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -62,21 +62,21 @@ TRACE_EVENT(kvm_hv_hypercall, TP_ARGS(code, fast, rep_cnt, rep_idx, ingpa, outgpa), TP_STRUCT__entry( - __field( __u16, code ) - __field( bool, fast ) __field( __u16, rep_cnt ) __field( __u16, rep_idx ) __field( __u64, ingpa ) __field( __u64, outgpa ) + __field( __u16, code ) + __field( bool, fast ) ), TP_fast_assign( - __entry->code = code; - __entry->fast = fast; __entry->rep_cnt = rep_cnt; __entry->rep_idx = rep_idx; __entry->ingpa = ingpa; __entry->outgpa = outgpa; + __entry->code = code; + __entry->fast = fast; ), TP_printk("code 0x%x %s cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index eba687f0cc0c..b9ec1c74943c 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -847,7 +847,7 @@ static void __init lguest_init_IRQ(void) void lguest_setup_irq(unsigned int irq) { irq_alloc_desc_at(irq, 0); - set_irq_chip_and_handler_name(irq, &lguest_irq_controller, + irq_set_chip_and_handler_name(irq, &lguest_irq_controller, handle_level_irq, "level"); } @@ -995,7 +995,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) static void lguest_time_init(void) { /* Set up the timer interrupt (0) to go to our simple timer routine */ - set_irq_handler(0, lguest_time_irq); + irq_set_handler(0, lguest_time_irq); clocksource_register(&lguest_clock); diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S index 2cda60a06e65..e8e7e0d06f42 100644 --- a/arch/x86/lib/atomic64_386_32.S +++ b/arch/x86/lib/atomic64_386_32.S @@ -15,14 +15,12 @@ /* if you want SMP support, implement these with real spinlocks */ .macro LOCK reg - pushfl - CFI_ADJUST_CFA_OFFSET 4 + pushfl_cfi cli .endm .macro UNLOCK reg - popfl - CFI_ADJUST_CFA_OFFSET -4 + popfl_cfi .endm #define BEGIN(op) \ diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S index 71e080de3352..391a083674b4 100644 --- a/arch/x86/lib/atomic64_cx8_32.S +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -14,14 +14,12 @@ #include <asm/dwarf2.h> .macro SAVE reg - pushl %\reg - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %\reg CFI_REL_OFFSET \reg, 0 .endm .macro RESTORE reg - popl %\reg - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %\reg CFI_RESTORE \reg .endm diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index adbccd0bbb78..78d16a554db0 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -50,11 +50,9 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) */ ENTRY(csum_partial) CFI_STARTPROC - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len @@ -132,11 +130,9 @@ ENTRY(csum_partial) jz 8f roll $8, %eax 8: - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi ret CFI_ENDPROC @@ -148,11 +144,9 @@ ENDPROC(csum_partial) ENTRY(csum_partial) CFI_STARTPROC - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 movl 20(%esp),%eax # Function arg: unsigned int sum movl 16(%esp),%ecx # Function arg: int len @@ -260,11 +254,9 @@ ENTRY(csum_partial) jz 90f roll $8, %eax 90: - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi ret CFI_ENDPROC @@ -309,14 +301,11 @@ ENTRY(csum_partial_copy_generic) CFI_STARTPROC subl $4,%esp CFI_ADJUST_CFA_OFFSET 4 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edi CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 movl ARGBASE+16(%esp),%eax # sum movl ARGBASE+12(%esp),%ecx # len @@ -426,17 +415,13 @@ DST( movb %cl, (%edi) ) .previous - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi - popl %edi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edi CFI_RESTORE edi - popl %ecx # equivalent to addl $4,%esp - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ecx # equivalent to addl $4,%esp ret CFI_ENDPROC ENDPROC(csum_partial_copy_generic) @@ -459,14 +444,11 @@ ENDPROC(csum_partial_copy_generic) ENTRY(csum_partial_copy_generic) CFI_STARTPROC - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ebx CFI_REL_OFFSET ebx, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edi CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %esi CFI_REL_OFFSET esi, 0 movl ARGBASE+4(%esp),%esi #src movl ARGBASE+8(%esp),%edi #dst @@ -527,14 +509,11 @@ DST( movb %dl, (%edi) ) jmp 7b .previous - popl %esi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %esi CFI_RESTORE esi - popl %edi - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edi CFI_RESTORE edi - popl %ebx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ebx CFI_RESTORE ebx ret CFI_ENDPROC diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S new file mode 100644 index 000000000000..0ecb8433e5a8 --- /dev/null +++ b/arch/x86/lib/memmove_64.S @@ -0,0 +1,197 @@ +/* + * Normally compiler builtins are used, but sometimes the compiler calls out + * of line code. Based on asm-i386/string.h. + * + * This assembly file is re-written from memmove_64.c file. + * - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com> + */ +#define _STRING_C +#include <linux/linkage.h> +#include <asm/dwarf2.h> + +#undef memmove + +/* + * Implement memmove(). This can handle overlap between src and dst. + * + * Input: + * rdi: dest + * rsi: src + * rdx: count + * + * Output: + * rax: dest + */ +ENTRY(memmove) + CFI_STARTPROC + /* Handle more 32bytes in loop */ + mov %rdi, %rax + cmp $0x20, %rdx + jb 1f + + /* Decide forward/backward copy mode */ + cmp %rdi, %rsi + jb 2f + + /* + * movsq instruction have many startup latency + * so we handle small size by general register. + */ + cmp $680, %rdx + jb 3f + /* + * movsq instruction is only good for aligned case. + */ + + cmpb %dil, %sil + je 4f +3: + sub $0x20, %rdx + /* + * We gobble 32byts forward in each loop. + */ +5: + sub $0x20, %rdx + movq 0*8(%rsi), %r11 + movq 1*8(%rsi), %r10 + movq 2*8(%rsi), %r9 + movq 3*8(%rsi), %r8 + leaq 4*8(%rsi), %rsi + + movq %r11, 0*8(%rdi) + movq %r10, 1*8(%rdi) + movq %r9, 2*8(%rdi) + movq %r8, 3*8(%rdi) + leaq 4*8(%rdi), %rdi + jae 5b + addq $0x20, %rdx + jmp 1f + /* + * Handle data forward by movsq. + */ + .p2align 4 +4: + movq %rdx, %rcx + movq -8(%rsi, %rdx), %r11 + lea -8(%rdi, %rdx), %r10 + shrq $3, %rcx + rep movsq + movq %r11, (%r10) + jmp 13f + /* + * Handle data backward by movsq. + */ + .p2align 4 +7: + movq %rdx, %rcx + movq (%rsi), %r11 + movq %rdi, %r10 + leaq -8(%rsi, %rdx), %rsi + leaq -8(%rdi, %rdx), %rdi + shrq $3, %rcx + std + rep movsq + cld + movq %r11, (%r10) + jmp 13f + + /* + * Start to prepare for backward copy. + */ + .p2align 4 +2: + cmp $680, %rdx + jb 6f + cmp %dil, %sil + je 7b +6: + /* + * Calculate copy position to tail. + */ + addq %rdx, %rsi + addq %rdx, %rdi + subq $0x20, %rdx + /* + * We gobble 32byts backward in each loop. + */ +8: + subq $0x20, %rdx + movq -1*8(%rsi), %r11 + movq -2*8(%rsi), %r10 + movq -3*8(%rsi), %r9 + movq -4*8(%rsi), %r8 + leaq -4*8(%rsi), %rsi + + movq %r11, -1*8(%rdi) + movq %r10, -2*8(%rdi) + movq %r9, -3*8(%rdi) + movq %r8, -4*8(%rdi) + leaq -4*8(%rdi), %rdi + jae 8b + /* + * Calculate copy position to head. + */ + addq $0x20, %rdx + subq %rdx, %rsi + subq %rdx, %rdi +1: + cmpq $16, %rdx + jb 9f + /* + * Move data from 16 bytes to 31 bytes. + */ + movq 0*8(%rsi), %r11 + movq 1*8(%rsi), %r10 + movq -2*8(%rsi, %rdx), %r9 + movq -1*8(%rsi, %rdx), %r8 + movq %r11, 0*8(%rdi) + movq %r10, 1*8(%rdi) + movq %r9, -2*8(%rdi, %rdx) + movq %r8, -1*8(%rdi, %rdx) + jmp 13f + .p2align 4 +9: + cmpq $8, %rdx + jb 10f + /* + * Move data from 8 bytes to 15 bytes. + */ + movq 0*8(%rsi), %r11 + movq -1*8(%rsi, %rdx), %r10 + movq %r11, 0*8(%rdi) + movq %r10, -1*8(%rdi, %rdx) + jmp 13f +10: + cmpq $4, %rdx + jb 11f + /* + * Move data from 4 bytes to 7 bytes. + */ + movl (%rsi), %r11d + movl -4(%rsi, %rdx), %r10d + movl %r11d, (%rdi) + movl %r10d, -4(%rdi, %rdx) + jmp 13f +11: + cmp $2, %rdx + jb 12f + /* + * Move data from 2 bytes to 3 bytes. + */ + movw (%rsi), %r11w + movw -2(%rsi, %rdx), %r10w + movw %r11w, (%rdi) + movw %r10w, -2(%rdi, %rdx) + jmp 13f +12: + cmp $1, %rdx + jb 13f + /* + * Move data for 1 byte. + */ + movb (%rsi), %r11b + movb %r11b, (%rdi) +13: + retq + CFI_ENDPROC +ENDPROC(memmove) diff --git a/arch/x86/lib/memmove_64.c b/arch/x86/lib/memmove_64.c deleted file mode 100644 index 6d0f0ec41b34..000000000000 --- a/arch/x86/lib/memmove_64.c +++ /dev/null @@ -1,192 +0,0 @@ -/* Normally compiler builtins are used, but sometimes the compiler calls out - of line code. Based on asm-i386/string.h. - */ -#define _STRING_C -#include <linux/string.h> -#include <linux/module.h> - -#undef memmove -void *memmove(void *dest, const void *src, size_t count) -{ - unsigned long d0,d1,d2,d3,d4,d5,d6,d7; - char *ret; - - __asm__ __volatile__( - /* Handle more 32bytes in loop */ - "mov %2, %3\n\t" - "cmp $0x20, %0\n\t" - "jb 1f\n\t" - - /* Decide forward/backward copy mode */ - "cmp %2, %1\n\t" - "jb 2f\n\t" - - /* - * movsq instruction have many startup latency - * so we handle small size by general register. - */ - "cmp $680, %0\n\t" - "jb 3f\n\t" - /* - * movsq instruction is only good for aligned case. - */ - "cmpb %%dil, %%sil\n\t" - "je 4f\n\t" - "3:\n\t" - "sub $0x20, %0\n\t" - /* - * We gobble 32byts forward in each loop. - */ - "5:\n\t" - "sub $0x20, %0\n\t" - "movq 0*8(%1), %4\n\t" - "movq 1*8(%1), %5\n\t" - "movq 2*8(%1), %6\n\t" - "movq 3*8(%1), %7\n\t" - "leaq 4*8(%1), %1\n\t" - - "movq %4, 0*8(%2)\n\t" - "movq %5, 1*8(%2)\n\t" - "movq %6, 2*8(%2)\n\t" - "movq %7, 3*8(%2)\n\t" - "leaq 4*8(%2), %2\n\t" - "jae 5b\n\t" - "addq $0x20, %0\n\t" - "jmp 1f\n\t" - /* - * Handle data forward by movsq. - */ - ".p2align 4\n\t" - "4:\n\t" - "movq %0, %8\n\t" - "movq -8(%1, %0), %4\n\t" - "lea -8(%2, %0), %5\n\t" - "shrq $3, %8\n\t" - "rep movsq\n\t" - "movq %4, (%5)\n\t" - "jmp 13f\n\t" - /* - * Handle data backward by movsq. - */ - ".p2align 4\n\t" - "7:\n\t" - "movq %0, %8\n\t" - "movq (%1), %4\n\t" - "movq %2, %5\n\t" - "leaq -8(%1, %0), %1\n\t" - "leaq -8(%2, %0), %2\n\t" - "shrq $3, %8\n\t" - "std\n\t" - "rep movsq\n\t" - "cld\n\t" - "movq %4, (%5)\n\t" - "jmp 13f\n\t" - - /* - * Start to prepare for backward copy. - */ - ".p2align 4\n\t" - "2:\n\t" - "cmp $680, %0\n\t" - "jb 6f \n\t" - "cmp %%dil, %%sil\n\t" - "je 7b \n\t" - "6:\n\t" - /* - * Calculate copy position to tail. - */ - "addq %0, %1\n\t" - "addq %0, %2\n\t" - "subq $0x20, %0\n\t" - /* - * We gobble 32byts backward in each loop. - */ - "8:\n\t" - "subq $0x20, %0\n\t" - "movq -1*8(%1), %4\n\t" - "movq -2*8(%1), %5\n\t" - "movq -3*8(%1), %6\n\t" - "movq -4*8(%1), %7\n\t" - "leaq -4*8(%1), %1\n\t" - - "movq %4, -1*8(%2)\n\t" - "movq %5, -2*8(%2)\n\t" - "movq %6, -3*8(%2)\n\t" - "movq %7, -4*8(%2)\n\t" - "leaq -4*8(%2), %2\n\t" - "jae 8b\n\t" - /* - * Calculate copy position to head. - */ - "addq $0x20, %0\n\t" - "subq %0, %1\n\t" - "subq %0, %2\n\t" - "1:\n\t" - "cmpq $16, %0\n\t" - "jb 9f\n\t" - /* - * Move data from 16 bytes to 31 bytes. - */ - "movq 0*8(%1), %4\n\t" - "movq 1*8(%1), %5\n\t" - "movq -2*8(%1, %0), %6\n\t" - "movq -1*8(%1, %0), %7\n\t" - "movq %4, 0*8(%2)\n\t" - "movq %5, 1*8(%2)\n\t" - "movq %6, -2*8(%2, %0)\n\t" - "movq %7, -1*8(%2, %0)\n\t" - "jmp 13f\n\t" - ".p2align 4\n\t" - "9:\n\t" - "cmpq $8, %0\n\t" - "jb 10f\n\t" - /* - * Move data from 8 bytes to 15 bytes. - */ - "movq 0*8(%1), %4\n\t" - "movq -1*8(%1, %0), %5\n\t" - "movq %4, 0*8(%2)\n\t" - "movq %5, -1*8(%2, %0)\n\t" - "jmp 13f\n\t" - "10:\n\t" - "cmpq $4, %0\n\t" - "jb 11f\n\t" - /* - * Move data from 4 bytes to 7 bytes. - */ - "movl (%1), %4d\n\t" - "movl -4(%1, %0), %5d\n\t" - "movl %4d, (%2)\n\t" - "movl %5d, -4(%2, %0)\n\t" - "jmp 13f\n\t" - "11:\n\t" - "cmp $2, %0\n\t" - "jb 12f\n\t" - /* - * Move data from 2 bytes to 3 bytes. - */ - "movw (%1), %4w\n\t" - "movw -2(%1, %0), %5w\n\t" - "movw %4w, (%2)\n\t" - "movw %5w, -2(%2, %0)\n\t" - "jmp 13f\n\t" - "12:\n\t" - "cmp $1, %0\n\t" - "jb 13f\n\t" - /* - * Move data for 1 byte. - */ - "movb (%1), %4b\n\t" - "movb %4b, (%2)\n\t" - "13:\n\t" - : "=&d" (d0), "=&S" (d1), "=&D" (d2), "=&a" (ret) , - "=r"(d3), "=r"(d4), "=r"(d5), "=r"(d6), "=&c" (d7) - :"0" (count), - "1" (src), - "2" (dest) - :"memory"); - - return ret; - -} -EXPORT_SYMBOL(memmove); diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S index 41fcf00e49df..67743977398b 100644 --- a/arch/x86/lib/rwsem_64.S +++ b/arch/x86/lib/rwsem_64.S @@ -23,43 +23,50 @@ #include <asm/dwarf2.h> #define save_common_regs \ - pushq %rdi; \ - pushq %rsi; \ - pushq %rcx; \ - pushq %r8; \ - pushq %r9; \ - pushq %r10; \ - pushq %r11 + pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ + pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ + pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ + pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ + pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ + pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ + pushq_cfi %r11; CFI_REL_OFFSET r11, 0 #define restore_common_regs \ - popq %r11; \ - popq %r10; \ - popq %r9; \ - popq %r8; \ - popq %rcx; \ - popq %rsi; \ - popq %rdi + popq_cfi %r11; CFI_RESTORE r11; \ + popq_cfi %r10; CFI_RESTORE r10; \ + popq_cfi %r9; CFI_RESTORE r9; \ + popq_cfi %r8; CFI_RESTORE r8; \ + popq_cfi %rcx; CFI_RESTORE rcx; \ + popq_cfi %rsi; CFI_RESTORE rsi; \ + popq_cfi %rdi; CFI_RESTORE rdi /* Fix up special calling conventions */ ENTRY(call_rwsem_down_read_failed) + CFI_STARTPROC save_common_regs - pushq %rdx + pushq_cfi %rdx + CFI_REL_OFFSET rdx, 0 movq %rax,%rdi call rwsem_down_read_failed - popq %rdx + popq_cfi %rdx + CFI_RESTORE rdx restore_common_regs ret - ENDPROC(call_rwsem_down_read_failed) + CFI_ENDPROC +ENDPROC(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_write_failed) + CFI_STARTPROC save_common_regs movq %rax,%rdi call rwsem_down_write_failed restore_common_regs ret - ENDPROC(call_rwsem_down_write_failed) + CFI_ENDPROC +ENDPROC(call_rwsem_down_write_failed) ENTRY(call_rwsem_wake) + CFI_STARTPROC decl %edx /* do nothing if still outstanding active readers */ jnz 1f save_common_regs @@ -67,15 +74,20 @@ ENTRY(call_rwsem_wake) call rwsem_wake restore_common_regs 1: ret - ENDPROC(call_rwsem_wake) + CFI_ENDPROC +ENDPROC(call_rwsem_wake) /* Fix up special calling conventions */ ENTRY(call_rwsem_downgrade_wake) + CFI_STARTPROC save_common_regs - pushq %rdx + pushq_cfi %rdx + CFI_REL_OFFSET rdx, 0 movq %rax,%rdi call rwsem_downgrade_wake - popq %rdx + popq_cfi %rdx + CFI_RESTORE rdx restore_common_regs ret - ENDPROC(call_rwsem_downgrade_wake) + CFI_ENDPROC +ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index 648fe4741782..06691daa4108 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S @@ -36,7 +36,7 @@ */ #ifdef CONFIG_SMP ENTRY(__write_lock_failed) - CFI_STARTPROC simple + CFI_STARTPROC FRAME 2: LOCK_PREFIX addl $ RW_LOCK_BIAS,(%eax) @@ -74,29 +74,23 @@ ENTRY(__read_lock_failed) /* Fix up special calling conventions */ ENTRY(call_rwsem_down_read_failed) CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 - push %edx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edx CFI_REL_OFFSET edx,0 call rwsem_down_read_failed - pop %edx - CFI_ADJUST_CFA_OFFSET -4 - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edx + popl_cfi %ecx ret CFI_ENDPROC ENDPROC(call_rwsem_down_read_failed) ENTRY(call_rwsem_down_write_failed) CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 calll rwsem_down_write_failed - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ecx ret CFI_ENDPROC ENDPROC(call_rwsem_down_write_failed) @@ -105,12 +99,10 @@ ENTRY(call_rwsem_wake) CFI_STARTPROC decw %dx /* do nothing if still outstanding active readers */ jnz 1f - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 call rwsem_wake - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %ecx 1: ret CFI_ENDPROC ENDPROC(call_rwsem_wake) @@ -118,17 +110,13 @@ ENTRY(call_rwsem_wake) /* Fix up special calling conventions */ ENTRY(call_rwsem_downgrade_wake) CFI_STARTPROC - push %ecx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %ecx CFI_REL_OFFSET ecx,0 - push %edx - CFI_ADJUST_CFA_OFFSET 4 + pushl_cfi %edx CFI_REL_OFFSET edx,0 call rwsem_downgrade_wake - pop %edx - CFI_ADJUST_CFA_OFFSET -4 - pop %ecx - CFI_ADJUST_CFA_OFFSET -4 + popl_cfi %edx + popl_cfi %ecx ret CFI_ENDPROC ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S index 650b11e00ecc..2930ae05d773 100644 --- a/arch/x86/lib/thunk_32.S +++ b/arch/x86/lib/thunk_32.S @@ -7,24 +7,6 @@ #include <linux/linkage.h> -#define ARCH_TRACE_IRQS_ON \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_on; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - -#define ARCH_TRACE_IRQS_OFF \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call trace_hardirqs_off; \ - popl %edx; \ - popl %ecx; \ - popl %eax; - #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in eax (arg1) */ .macro thunk_ra name,func diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index bf9a7d5a5428..782b082c9ff7 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -22,26 +22,6 @@ CFI_ENDPROC .endm - /* rdi: arg1 ... normal C conventions. rax is passed from C. */ - .macro thunk_retrax name,func - .globl \name -\name: - CFI_STARTPROC - SAVE_ARGS - call \func - jmp restore_norax - CFI_ENDPROC - .endm - - - .section .sched.text, "ax" -#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM - thunk rwsem_down_read_failed_thunk,rwsem_down_read_failed - thunk rwsem_down_write_failed_thunk,rwsem_down_write_failed - thunk rwsem_wake_thunk,rwsem_wake - thunk rwsem_downgrade_thunk,rwsem_downgrade_wake -#endif - #ifdef CONFIG_TRACE_IRQFLAGS /* put return address in rdi (arg1) */ .macro thunk_ra name,func @@ -72,10 +52,3 @@ restore: RESTORE_ARGS ret CFI_ENDPROC - - CFI_STARTPROC - SAVE_ARGS -restore_norax: - RESTORE_ARGS 1 - ret - CFI_ENDPROC diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 25cd4a07d09f..8c4085a95ef1 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -20,7 +20,8 @@ #include <asm/xen/pci.h> #ifdef CONFIG_ACPI -static int xen_hvm_register_pirq(u32 gsi, int triggering) +static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, + int trigger, int polarity) { int rc, irq; struct physdev_map_pirq map_irq; @@ -41,7 +42,7 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) return -1; } - if (triggering == ACPI_EDGE_SENSITIVE) { + if (trigger == ACPI_EDGE_SENSITIVE) { shareable = 0; name = "ioapic-edge"; } else { @@ -55,12 +56,6 @@ static int xen_hvm_register_pirq(u32 gsi, int triggering) return irq; } - -static int acpi_register_gsi_xen_hvm(struct device *dev, u32 gsi, - int trigger, int polarity) -{ - return xen_hvm_register_pirq(gsi, trigger); -} #endif #if defined(CONFIG_PCI_MSI) @@ -91,7 +86,7 @@ static void xen_msi_compose_msg(struct pci_dev *pdev, unsigned int pirq, static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, pirq, ret = 0; + int irq, pirq; struct msi_desc *msidesc; struct msi_msg msg; @@ -99,39 +94,32 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) __read_msi_msg(msidesc, &msg); pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (xen_irq_from_pirq(pirq) >= 0 && msg.data == XEN_PIRQ_MSI_DATA) { - xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq, XEN_ALLOC_IRQ); - if (irq < 0) + if (msg.data != XEN_PIRQ_MSI_DATA || + xen_irq_from_pirq(pirq) < 0) { + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) goto error; - ret = set_irq_msi(irq, msidesc); - if (ret < 0) - goto error_while; - printk(KERN_DEBUG "xen: msi already setup: msi --> irq=%d" - " pirq=%d\n", irq, pirq); - return 0; + xen_msi_compose_msg(dev, pirq, &msg); + __write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); + } else { + dev_dbg(&dev->dev, + "xen: msi already bound to pirq=%d\n", pirq); } - xen_allocate_pirq_msi((type == PCI_CAP_ID_MSIX) ? - "msi-x" : "msi", &irq, &pirq, (XEN_ALLOC_IRQ | XEN_ALLOC_PIRQ)); - if (irq < 0 || pirq < 0) + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0, + (type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi"); + if (irq < 0) goto error; - printk(KERN_DEBUG "xen: msi --> irq=%d, pirq=%d\n", irq, pirq); - xen_msi_compose_msg(dev, pirq, &msg); - ret = set_irq_msi(irq, msidesc); - if (ret < 0) - goto error_while; - write_msi_msg(irq, &msg); + dev_dbg(&dev->dev, + "xen: msi --> pirq=%d --> irq=%d\n", pirq, irq); } return 0; -error_while: - unbind_from_irqhandler(irq, NULL); error: - if (ret == -ENODEV) - dev_err(&dev->dev, "Xen PCI frontend has not registered" \ - " MSI/MSI-X support!\n"); - - return ret; + dev_err(&dev->dev, + "Xen PCI frontend has not registered MSI/MSI-X support!\n"); + return -ENODEV; } /* @@ -150,35 +138,26 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return -ENOMEM; if (type == PCI_CAP_ID_MSIX) - ret = xen_pci_frontend_enable_msix(dev, &v, nvec); + ret = xen_pci_frontend_enable_msix(dev, v, nvec); else - ret = xen_pci_frontend_enable_msi(dev, &v); + ret = xen_pci_frontend_enable_msi(dev, v); if (ret) goto error; i = 0; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_allocate_pirq(v[i], 0, /* not sharable */ - (type == PCI_CAP_ID_MSIX) ? - "pcifront-msi-x" : "pcifront-msi"); - if (irq < 0) { - ret = -1; + irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0, + (type == PCI_CAP_ID_MSIX) ? + "pcifront-msi-x" : + "pcifront-msi"); + if (irq < 0) goto free; - } - - ret = set_irq_msi(irq, msidesc); - if (ret) - goto error_while; i++; } kfree(v); return 0; -error_while: - unbind_from_irqhandler(irq, NULL); error: - if (ret == -ENODEV) - dev_err(&dev->dev, "Xen PCI frontend has not registered" \ - " MSI/MSI-X support!\n"); + dev_err(&dev->dev, "Xen PCI frontend has not registered MSI/MSI-X support!\n"); free: kfree(v); return ret; @@ -193,6 +172,9 @@ static void xen_teardown_msi_irqs(struct pci_dev *dev) xen_pci_frontend_disable_msix(dev); else xen_pci_frontend_disable_msi(dev); + + /* Free the IRQ's and the msidesc using the generic code. */ + default_teardown_msi_irqs(dev); } static void xen_teardown_msi_irq(unsigned int irq) @@ -200,47 +182,82 @@ static void xen_teardown_msi_irq(unsigned int irq) xen_destroy_irq(irq); } +#ifdef CONFIG_XEN_DOM0 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { - int irq, ret; + int ret = 0; struct msi_desc *msidesc; list_for_each_entry(msidesc, &dev->msi_list, list) { - irq = xen_create_msi_irq(dev, msidesc, type); - if (irq < 0) - return -1; + struct physdev_map_pirq map_irq; - ret = set_irq_msi(irq, msidesc); - if (ret) - goto error; - } - return 0; + memset(&map_irq, 0, sizeof(map_irq)); + map_irq.domid = DOMID_SELF; + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + map_irq.devfn = dev->devfn; -error: - xen_destroy_irq(irq); + if (type == PCI_CAP_ID_MSIX) { + int pos; + u32 table_offset, bir; + + pos = pci_find_capability(dev, PCI_CAP_ID_MSIX); + + pci_read_config_dword(dev, pos + PCI_MSIX_TABLE, + &table_offset); + bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK); + + map_irq.table_base = pci_resource_start(dev, bir); + map_irq.entry_nr = msidesc->msi_attrib.entry_nr; + } + + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + if (ret) { + dev_warn(&dev->dev, "xen map irq failed %d\n", ret); + goto out; + } + + ret = xen_bind_pirq_msi_to_irq(dev, msidesc, + map_irq.pirq, map_irq.index, + (type == PCI_CAP_ID_MSIX) ? + "msi-x" : "msi"); + if (ret < 0) + goto out; + } + ret = 0; +out: return ret; } #endif +#endif static int xen_pcifront_enable_irq(struct pci_dev *dev) { int rc; int share = 1; + u8 gsi; - dev_info(&dev->dev, "Xen PCI enabling IRQ: %d\n", dev->irq); - - if (dev->irq < 0) - return -EINVAL; + rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &gsi); + if (rc < 0) { + dev_warn(&dev->dev, "Xen PCI: failed to read interrupt line: %d\n", + rc); + return rc; + } - if (dev->irq < NR_IRQS_LEGACY) + if (gsi < NR_IRQS_LEGACY) share = 0; - rc = xen_allocate_pirq(dev->irq, share, "pcifront"); + rc = xen_allocate_pirq(gsi, share, "pcifront"); if (rc < 0) { - dev_warn(&dev->dev, "Xen PCI IRQ: %d, failed to register:%d\n", - dev->irq, rc); + dev_warn(&dev->dev, "Xen PCI: failed to register GSI%d: %d\n", + gsi, rc); return rc; } + + dev->irq = rc; + dev_info(&dev->dev, "Xen PCI mapped GSI%d to IRQ%d\n", gsi, dev->irq); return 0; } diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index 7b24460917d5..374a05d8ad22 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -131,7 +131,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, unsigned long mmr_offset, int limit) { const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_cfg *cfg = get_irq_chip_data(irq); + struct irq_cfg *cfg = irq_get_chip_data(irq); unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; int mmr_pnode, err; @@ -148,7 +148,7 @@ arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, else irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, + irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, irq_name); mmr_value = 0; diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c index 632037671746..fe4cf8294878 100644 --- a/arch/x86/platform/visws/visws_quirks.c +++ b/arch/x86/platform/visws/visws_quirks.c @@ -569,11 +569,13 @@ out_unlock: static struct irqaction master_action = { .handler = piix4_master_intr, .name = "PIIX4-8259", + .flags = IRQF_NO_THREAD, }; static struct irqaction cascade_action = { .handler = no_action, .name = "cascade", + .flags = IRQF_NO_THREAD, }; static inline void set_piix4_virtual_irq_type(void) @@ -606,7 +608,7 @@ static void __init visws_pre_intr_init(void) chip = &cobalt_irq_type; if (chip) - set_irq_chip(i, chip); + irq_set_chip(i, chip); } setup_irq(CO_IRQ_8259, &master_action); diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 5b54892e4bc3..e4343fe488ed 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -48,3 +48,11 @@ config XEN_DEBUG_FS help Enable statistics output and various tuning options in debugfs. Enabling this option may incur a significant performance overhead. + +config XEN_DEBUG + bool "Enable Xen debug checks" + depends on XEN + default n + help + Enable various WARN_ON checks in the Xen MMU code. + Enabling this option WILL incur a significant performance overhead. diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 50542efe45fb..49dbd78ec3cb 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1284,15 +1284,14 @@ static int init_hvm_pv_info(int *major, int *minor) xen_setup_features(); - pv_info = xen_info; - pv_info.kernel_rpl = 0; + pv_info.name = "Xen HVM"; xen_domain_type = XEN_HVM_DOMAIN; return 0; } -void xen_hvm_init_shared_info(void) +void __ref xen_hvm_init_shared_info(void) { int cpu; struct xen_add_to_physmap xatp; @@ -1331,6 +1330,8 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; + if (xen_have_vector_callback) + xen_init_lock_cpu(cpu); break; default: break; @@ -1355,6 +1356,7 @@ static void __init xen_hvm_guest_init(void) if (xen_feature(XENFEAT_hvm_callback_vector)) xen_have_vector_callback = 1; + xen_hvm_smp_init(); register_cpu_notifier(&xen_hvm_cpu_notifier); xen_unplug_emulated_devices(); have_vcpu_info_placement = 0; diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 77f3228cfc8d..3f6f3347aa17 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -46,6 +46,7 @@ #include <linux/module.h> #include <linux/gfp.h> #include <linux/memblock.h> +#include <linux/seq_file.h> #include <asm/pgtable.h> #include <asm/tlbflush.h> @@ -416,8 +417,12 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (val & _PAGE_PRESENT) { unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; pteval_t flags = val & PTE_FLAGS_MASK; - unsigned long mfn = pfn_to_mfn(pfn); + unsigned long mfn; + if (!xen_feature(XENFEAT_auto_translated_physmap)) + mfn = get_phys_to_machine(pfn); + else + mfn = pfn; /* * If there's no mfn for the pfn, then just create an * empty non-present pte. Unfortunately this loses @@ -427,8 +432,18 @@ static pteval_t pte_pfn_to_mfn(pteval_t val) if (unlikely(mfn == INVALID_P2M_ENTRY)) { mfn = 0; flags = 0; + } else { + /* + * Paramount to do this test _after_ the + * INVALID_P2M_ENTRY as INVALID_P2M_ENTRY & + * IDENTITY_FRAME_BIT resolves to true. + */ + mfn &= ~FOREIGN_FRAME_BIT; + if (mfn & IDENTITY_FRAME_BIT) { + mfn &= ~IDENTITY_FRAME_BIT; + flags |= _PAGE_IOMAP; + } } - val = ((pteval_t)mfn << PAGE_SHIFT) | flags; } @@ -532,6 +547,41 @@ pte_t xen_make_pte(pteval_t pte) } PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); +#ifdef CONFIG_XEN_DEBUG +pte_t xen_make_pte_debug(pteval_t pte) +{ + phys_addr_t addr = (pte & PTE_PFN_MASK); + phys_addr_t other_addr; + bool io_page = false; + pte_t _pte; + + if (pte & _PAGE_IOMAP) + io_page = true; + + _pte = xen_make_pte(pte); + + if (!addr) + return _pte; + + if (io_page && + (xen_initial_domain() || addr >= ISA_END_ADDRESS)) { + other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT; + WARN(addr != other_addr, + "0x%lx is using VM_IO, but it is 0x%lx!\n", + (unsigned long)addr, (unsigned long)other_addr); + } else { + pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP; + other_addr = (_pte.pte & PTE_PFN_MASK); + WARN((addr == other_addr) && (!io_page) && (!iomap_set), + "0x%lx is missing VM_IO (and wasn't fixed)!\n", + (unsigned long)addr); + } + + return _pte; +} +PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug); +#endif + pgd_t xen_make_pgd(pgdval_t pgd) { pgd = pte_pfn_to_mfn(pgd); @@ -1940,6 +1990,9 @@ __init void xen_ident_map_ISA(void) static __init void xen_post_allocator_init(void) { +#ifdef CONFIG_XEN_DEBUG + pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug); +#endif pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pud = xen_set_pud; @@ -2072,7 +2125,7 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, in_frames[i] = virt_to_mfn(vaddr); MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); - set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); + __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); if (out_frames) out_frames[i] = virt_to_pfn(vaddr); @@ -2351,6 +2404,18 @@ EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range); #ifdef CONFIG_XEN_DEBUG_FS +static int p2m_dump_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, p2m_dump_show, NULL); +} + +static const struct file_operations p2m_dump_fops = { + .open = p2m_dump_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static struct dentry *d_mmu_debug; static int __init xen_mmu_debugfs(void) @@ -2406,6 +2471,7 @@ static int __init xen_mmu_debugfs(void) debugfs_create_u32("prot_commit_batched", 0444, d_mmu_debug, &mmu_stats.prot_commit_batched); + debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops); return 0; } fs_initcall(xen_mmu_debugfs); diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index fd12d7ce7ff9..215a3ce61068 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -23,6 +23,129 @@ * P2M_PER_PAGE depends on the architecture, as a mfn is always * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to * 512 and 1024 entries respectively. + * + * In short, these structures contain the Machine Frame Number (MFN) of the PFN. + * + * However not all entries are filled with MFNs. Specifically for all other + * leaf entries, or for the top root, or middle one, for which there is a void + * entry, we assume it is "missing". So (for example) + * pfn_to_mfn(0x90909090)=INVALID_P2M_ENTRY. + * + * We also have the possibility of setting 1-1 mappings on certain regions, so + * that: + * pfn_to_mfn(0xc0000)=0xc0000 + * + * The benefit of this is, that we can assume for non-RAM regions (think + * PCI BARs, or ACPI spaces), we can create mappings easily b/c we + * get the PFN value to match the MFN. + * + * For this to work efficiently we have one new page p2m_identity and + * allocate (via reserved_brk) any other pages we need to cover the sides + * (1GB or 4MB boundary violations). All entries in p2m_identity are set to + * INVALID_P2M_ENTRY type (Xen toolstack only recognizes that and MFNs, + * no other fancy value). + * + * On lookup we spot that the entry points to p2m_identity and return the + * identity value instead of dereferencing and returning INVALID_P2M_ENTRY. + * If the entry points to an allocated page, we just proceed as before and + * return the PFN. If the PFN has IDENTITY_FRAME_BIT set we unmask that in + * appropriate functions (pfn_to_mfn). + * + * The reason for having the IDENTITY_FRAME_BIT instead of just returning the + * PFN is that we could find ourselves where pfn_to_mfn(pfn)==pfn for a + * non-identity pfn. To protect ourselves against we elect to set (and get) the + * IDENTITY_FRAME_BIT on all identity mapped PFNs. + * + * This simplistic diagram is used to explain the more subtle piece of code. + * There is also a digram of the P2M at the end that can help. + * Imagine your E820 looking as so: + * + * 1GB 2GB + * /-------------------+---------\/----\ /----------\ /---+-----\ + * | System RAM | Sys RAM ||ACPI| | reserved | | Sys RAM | + * \-------------------+---------/\----/ \----------/ \---+-----/ + * ^- 1029MB ^- 2001MB + * + * [1029MB = 263424 (0x40500), 2001MB = 512256 (0x7D100), + * 2048MB = 524288 (0x80000)] + * + * And dom0_mem=max:3GB,1GB is passed in to the guest, meaning memory past 1GB + * is actually not present (would have to kick the balloon driver to put it in). + * + * When we are told to set the PFNs for identity mapping (see patch: "xen/setup: + * Set identity mapping for non-RAM E820 and E820 gaps.") we pass in the start + * of the PFN and the end PFN (263424 and 512256 respectively). The first step + * is to reserve_brk a top leaf page if the p2m[1] is missing. The top leaf page + * covers 512^2 of page estate (1GB) and in case the start or end PFN is not + * aligned on 512^2*PAGE_SIZE (1GB) we loop on aligned 1GB PFNs from start pfn + * to end pfn. We reserve_brk top leaf pages if they are missing (means they + * point to p2m_mid_missing). + * + * With the E820 example above, 263424 is not 1GB aligned so we allocate a + * reserve_brk page which will cover the PFNs estate from 0x40000 to 0x80000. + * Each entry in the allocate page is "missing" (points to p2m_missing). + * + * Next stage is to determine if we need to do a more granular boundary check + * on the 4MB (or 2MB depending on architecture) off the start and end pfn's. + * We check if the start pfn and end pfn violate that boundary check, and if + * so reserve_brk a middle (p2m[x][y]) leaf page. This way we have a much finer + * granularity of setting which PFNs are missing and which ones are identity. + * In our example 263424 and 512256 both fail the check so we reserve_brk two + * pages. Populate them with INVALID_P2M_ENTRY (so they both have "missing" + * values) and assign them to p2m[1][2] and p2m[1][488] respectively. + * + * At this point we would at minimum reserve_brk one page, but could be up to + * three. Each call to set_phys_range_identity has at maximum a three page + * cost. If we were to query the P2M at this stage, all those entries from + * start PFN through end PFN (so 1029MB -> 2001MB) would return + * INVALID_P2M_ENTRY ("missing"). + * + * The next step is to walk from the start pfn to the end pfn setting + * the IDENTITY_FRAME_BIT on each PFN. This is done in set_phys_range_identity. + * If we find that the middle leaf is pointing to p2m_missing we can swap it + * over to p2m_identity - this way covering 4MB (or 2MB) PFN space. At this + * point we do not need to worry about boundary aligment (so no need to + * reserve_brk a middle page, figure out which PFNs are "missing" and which + * ones are identity), as that has been done earlier. If we find that the + * middle leaf is not occupied by p2m_identity or p2m_missing, we dereference + * that page (which covers 512 PFNs) and set the appropriate PFN with + * IDENTITY_FRAME_BIT. In our example 263424 and 512256 end up there, and we + * set from p2m[1][2][256->511] and p2m[1][488][0->256] with + * IDENTITY_FRAME_BIT set. + * + * All other regions that are void (or not filled) either point to p2m_missing + * (considered missing) or have the default value of INVALID_P2M_ENTRY (also + * considered missing). In our case, p2m[1][2][0->255] and p2m[1][488][257->511] + * contain the INVALID_P2M_ENTRY value and are considered "missing." + * + * This is what the p2m ends up looking (for the E820 above) with this + * fabulous drawing: + * + * p2m /--------------\ + * /-----\ | &mfn_list[0],| /-----------------\ + * | 0 |------>| &mfn_list[1],| /---------------\ | ~0, ~0, .. | + * |-----| | ..., ~0, ~0 | | ~0, ~0, [x]---+----->| IDENTITY [@256] | + * | 1 |---\ \--------------/ | [p2m_identity]+\ | IDENTITY [@257] | + * |-----| \ | [p2m_identity]+\\ | .... | + * | 2 |--\ \-------------------->| ... | \\ \----------------/ + * |-----| \ \---------------/ \\ + * | 3 |\ \ \\ p2m_identity + * |-----| \ \-------------------->/---------------\ /-----------------\ + * | .. +->+ | [p2m_identity]+-->| ~0, ~0, ~0, ... | + * \-----/ / | [p2m_identity]+-->| ..., ~0 | + * / /---------------\ | .... | \-----------------/ + * / | IDENTITY[@0] | /-+-[x], ~0, ~0.. | + * / | IDENTITY[@256]|<----/ \---------------/ + * / | ~0, ~0, .... | + * | \---------------/ + * | + * p2m_missing p2m_missing + * /------------------\ /------------\ + * | [p2m_mid_missing]+---->| ~0, ~0, ~0 | + * | [p2m_mid_missing]+---->| ..., ~0 | + * \------------------/ \------------/ + * + * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT) */ #include <linux/init.h> @@ -30,6 +153,7 @@ #include <linux/list.h> #include <linux/hash.h> #include <linux/sched.h> +#include <linux/seq_file.h> #include <asm/cache.h> #include <asm/setup.h> @@ -59,9 +183,15 @@ static RESERVE_BRK_ARRAY(unsigned long **, p2m_top, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long, p2m_top_mfn, P2M_TOP_PER_PAGE); static RESERVE_BRK_ARRAY(unsigned long *, p2m_top_mfn_p, P2M_TOP_PER_PAGE); +static RESERVE_BRK_ARRAY(unsigned long, p2m_identity, P2M_PER_PAGE); + RESERVE_BRK(p2m_mid, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); RESERVE_BRK(p2m_mid_mfn, PAGE_SIZE * (MAX_DOMAIN_PAGES / (P2M_PER_PAGE * P2M_MID_PER_PAGE))); +/* We might hit two boundary violations at the start and end, at max each + * boundary violation will require three middle nodes. */ +RESERVE_BRK(p2m_mid_identity, PAGE_SIZE * 2 * 3); + static inline unsigned p2m_top_index(unsigned long pfn) { BUG_ON(pfn >= MAX_P2M_PFN); @@ -136,7 +266,7 @@ static void p2m_init(unsigned long *p2m) * - After resume we're called from within stop_machine, but the mfn * tree should alreay be completely allocated. */ -void xen_build_mfn_list_list(void) +void __ref xen_build_mfn_list_list(void) { unsigned long pfn; @@ -221,6 +351,9 @@ void __init xen_build_dynamic_phys_to_machine(void) p2m_top = extend_brk(PAGE_SIZE, PAGE_SIZE); p2m_top_init(p2m_top); + p2m_identity = extend_brk(PAGE_SIZE, PAGE_SIZE); + p2m_init(p2m_identity); + /* * The domain builder gives us a pre-constructed p2m array in * mfn_list for all the pages initially given to us, so we just @@ -266,6 +399,14 @@ unsigned long get_phys_to_machine(unsigned long pfn) mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); + /* + * The INVALID_P2M_ENTRY is filled in both p2m_*identity + * and in p2m_*missing, so returning the INVALID_P2M_ENTRY + * would be wrong. + */ + if (p2m_top[topidx][mididx] == p2m_identity) + return IDENTITY_FRAME(pfn); + return p2m_top[topidx][mididx][idx]; } EXPORT_SYMBOL_GPL(get_phys_to_machine); @@ -335,9 +476,11 @@ static bool alloc_p2m(unsigned long pfn) p2m_top_mfn_p[topidx] = mid_mfn; } - if (p2m_top[topidx][mididx] == p2m_missing) { + if (p2m_top[topidx][mididx] == p2m_identity || + p2m_top[topidx][mididx] == p2m_missing) { /* p2m leaf page is missing */ unsigned long *p2m; + unsigned long *p2m_orig = p2m_top[topidx][mididx]; p2m = alloc_p2m_page(); if (!p2m) @@ -345,7 +488,7 @@ static bool alloc_p2m(unsigned long pfn) p2m_init(p2m); - if (cmpxchg(&mid[mididx], p2m_missing, p2m) != p2m_missing) + if (cmpxchg(&mid[mididx], p2m_orig, p2m) != p2m_orig) free_p2m_page(p2m); else mid_mfn[mididx] = virt_to_mfn(p2m); @@ -354,11 +497,91 @@ static bool alloc_p2m(unsigned long pfn) return true; } +bool __early_alloc_p2m(unsigned long pfn) +{ + unsigned topidx, mididx, idx; + + topidx = p2m_top_index(pfn); + mididx = p2m_mid_index(pfn); + idx = p2m_index(pfn); + + /* Pfff.. No boundary cross-over, lets get out. */ + if (!idx) + return false; + + WARN(p2m_top[topidx][mididx] == p2m_identity, + "P2M[%d][%d] == IDENTITY, should be MISSING (or alloced)!\n", + topidx, mididx); + + /* + * Could be done by xen_build_dynamic_phys_to_machine.. + */ + if (p2m_top[topidx][mididx] != p2m_missing) + return false; + + /* Boundary cross-over for the edges: */ + if (idx) { + unsigned long *p2m = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_init(p2m); + + p2m_top[topidx][mididx] = p2m; + + } + return idx != 0; +} +unsigned long set_phys_range_identity(unsigned long pfn_s, + unsigned long pfn_e) +{ + unsigned long pfn; + + if (unlikely(pfn_s >= MAX_P2M_PFN || pfn_e >= MAX_P2M_PFN)) + return 0; + + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) + return pfn_e - pfn_s; + + if (pfn_s > pfn_e) + return 0; + + for (pfn = (pfn_s & ~(P2M_MID_PER_PAGE * P2M_PER_PAGE - 1)); + pfn < ALIGN(pfn_e, (P2M_MID_PER_PAGE * P2M_PER_PAGE)); + pfn += P2M_MID_PER_PAGE * P2M_PER_PAGE) + { + unsigned topidx = p2m_top_index(pfn); + if (p2m_top[topidx] == p2m_mid_missing) { + unsigned long **mid = extend_brk(PAGE_SIZE, PAGE_SIZE); + + p2m_mid_init(mid); + + p2m_top[topidx] = mid; + } + } + + __early_alloc_p2m(pfn_s); + __early_alloc_p2m(pfn_e); + + for (pfn = pfn_s; pfn < pfn_e; pfn++) + if (!__set_phys_to_machine(pfn, IDENTITY_FRAME(pfn))) + break; + + if (!WARN((pfn - pfn_s) != (pfn_e - pfn_s), + "Identity mapping failed. We are %ld short of 1-1 mappings!\n", + (pfn_e - pfn_s) - (pfn - pfn_s))) + printk(KERN_DEBUG "1-1 mapping on %lx->%lx\n", pfn_s, pfn); + + return pfn - pfn_s; +} + /* Try to install p2m mapping; fail if intermediate bits missing */ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) { unsigned topidx, mididx, idx; + if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { + BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); + return true; + } if (unlikely(pfn >= MAX_P2M_PFN)) { BUG_ON(mfn != INVALID_P2M_ENTRY); return true; @@ -368,6 +591,21 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) mididx = p2m_mid_index(pfn); idx = p2m_index(pfn); + /* For sparse holes were the p2m leaf has real PFN along with + * PCI holes, stick in the PFN as the MFN value. + */ + if (mfn != INVALID_P2M_ENTRY && (mfn & IDENTITY_FRAME_BIT)) { + if (p2m_top[topidx][mididx] == p2m_identity) + return true; + + /* Swap over from MISSING to IDENTITY if needed. */ + if (p2m_top[topidx][mididx] == p2m_missing) { + WARN_ON(cmpxchg(&p2m_top[topidx][mididx], p2m_missing, + p2m_identity) != p2m_missing); + return true; + } + } + if (p2m_top[topidx][mididx] == p2m_missing) return mfn == INVALID_P2M_ENTRY; @@ -378,11 +616,6 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) bool set_phys_to_machine(unsigned long pfn, unsigned long mfn) { - if (unlikely(xen_feature(XENFEAT_auto_translated_physmap))) { - BUG_ON(pfn != mfn && mfn != INVALID_P2M_ENTRY); - return true; - } - if (unlikely(!__set_phys_to_machine(pfn, mfn))) { if (!alloc_p2m(pfn)) return false; @@ -421,7 +654,7 @@ int m2p_add_override(unsigned long mfn, struct page *page) { unsigned long flags; unsigned long pfn; - unsigned long address; + unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; @@ -455,7 +688,7 @@ int m2p_remove_override(struct page *page) unsigned long flags; unsigned long mfn; unsigned long pfn; - unsigned long address; + unsigned long uninitialized_var(address); unsigned level; pte_t *ptep = NULL; @@ -520,3 +753,80 @@ unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn) return ret; } EXPORT_SYMBOL_GPL(m2p_find_override_pfn); + +#ifdef CONFIG_XEN_DEBUG_FS + +int p2m_dump_show(struct seq_file *m, void *v) +{ + static const char * const level_name[] = { "top", "middle", + "entry", "abnormal" }; + static const char * const type_name[] = { "identity", "missing", + "pfn", "abnormal"}; +#define TYPE_IDENTITY 0 +#define TYPE_MISSING 1 +#define TYPE_PFN 2 +#define TYPE_UNKNOWN 3 + unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0; + unsigned int uninitialized_var(prev_level); + unsigned int uninitialized_var(prev_type); + + if (!p2m_top) + return 0; + + for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn++) { + unsigned topidx = p2m_top_index(pfn); + unsigned mididx = p2m_mid_index(pfn); + unsigned idx = p2m_index(pfn); + unsigned lvl, type; + + lvl = 4; + type = TYPE_UNKNOWN; + if (p2m_top[topidx] == p2m_mid_missing) { + lvl = 0; type = TYPE_MISSING; + } else if (p2m_top[topidx] == NULL) { + lvl = 0; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx] == NULL) { + lvl = 1; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx] == p2m_identity) { + lvl = 1; type = TYPE_IDENTITY; + } else if (p2m_top[topidx][mididx] == p2m_missing) { + lvl = 1; type = TYPE_MISSING; + } else if (p2m_top[topidx][mididx][idx] == 0) { + lvl = 2; type = TYPE_UNKNOWN; + } else if (p2m_top[topidx][mididx][idx] == IDENTITY_FRAME(pfn)) { + lvl = 2; type = TYPE_IDENTITY; + } else if (p2m_top[topidx][mididx][idx] == INVALID_P2M_ENTRY) { + lvl = 2; type = TYPE_MISSING; + } else if (p2m_top[topidx][mididx][idx] == pfn) { + lvl = 2; type = TYPE_PFN; + } else if (p2m_top[topidx][mididx][idx] != pfn) { + lvl = 2; type = TYPE_PFN; + } + if (pfn == 0) { + prev_level = lvl; + prev_type = type; + } + if (pfn == MAX_DOMAIN_PAGES-1) { + lvl = 3; + type = TYPE_UNKNOWN; + } + if (prev_type != type) { + seq_printf(m, " [0x%lx->0x%lx] %s\n", + prev_pfn_type, pfn, type_name[prev_type]); + prev_pfn_type = pfn; + prev_type = type; + } + if (prev_level != lvl) { + seq_printf(m, " [0x%lx->0x%lx] level %s\n", + prev_pfn_level, pfn, level_name[prev_level]); + prev_pfn_level = pfn; + prev_level = lvl; + } + } + return 0; +#undef TYPE_IDENTITY +#undef TYPE_MISSING +#undef TYPE_PFN +#undef TYPE_UNKNOWN +} +#endif diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index a8a66a50d446..fa0269a99377 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -52,6 +52,8 @@ phys_addr_t xen_extra_mem_start, xen_extra_mem_size; static __init void xen_add_extra_mem(unsigned long pages) { + unsigned long pfn; + u64 size = (u64)pages * PAGE_SIZE; u64 extra_start = xen_extra_mem_start + xen_extra_mem_size; @@ -66,6 +68,9 @@ static __init void xen_add_extra_mem(unsigned long pages) xen_extra_mem_size += size; xen_max_p2m_pfn = PFN_DOWN(extra_start + size); + + for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++) + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } static unsigned long __init xen_release_chunk(phys_addr_t start_addr, @@ -104,7 +109,7 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr, WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n", start, end, ret); if (ret == 1) { - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); len++; } } @@ -138,12 +143,55 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn, return released; } +static unsigned long __init xen_set_identity(const struct e820entry *list, + ssize_t map_size) +{ + phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS; + phys_addr_t start_pci = last; + const struct e820entry *entry; + unsigned long identity = 0; + int i; + + for (i = 0, entry = list; i < map_size; i++, entry++) { + phys_addr_t start = entry->addr; + phys_addr_t end = start + entry->size; + + if (start < last) + start = last; + + if (end <= start) + continue; + + /* Skip over the 1MB region. */ + if (last > end) + continue; + + if (entry->type == E820_RAM) { + if (start > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(start)); + + /* Without saving 'last' we would gooble RAM too + * at the end of the loop. */ + last = end; + start_pci = end; + continue; + } + start_pci = min(start, start_pci); + last = end; + } + if (last > start_pci) + identity += set_phys_range_identity( + PFN_UP(start_pci), PFN_DOWN(last)); + return identity; +} /** * machine_specific_memory_setup - Hook for machine specific memory setup. **/ char * __init xen_memory_setup(void) { static struct e820entry map[E820MAX] __initdata; + static struct e820entry map_raw[E820MAX] __initdata; unsigned long max_pfn = xen_start_info->nr_pages; unsigned long long mem_end; @@ -151,6 +199,7 @@ char * __init xen_memory_setup(void) struct xen_memory_map memmap; unsigned long extra_pages = 0; unsigned long extra_limit; + unsigned long identity_pages = 0; int i; int op; @@ -176,6 +225,7 @@ char * __init xen_memory_setup(void) } BUG_ON(rc); + memcpy(map_raw, map, sizeof(map)); e820.nr_map = 0; xen_extra_mem_start = mem_end; for (i = 0; i < memmap.nr_entries; i++) { @@ -194,6 +244,15 @@ char * __init xen_memory_setup(void) end -= delta; extra_pages += PFN_DOWN(delta); + /* + * Set RAM below 4GB that is not for us to be unusable. + * This prevents "System RAM" address space from being + * used as potential resource for I/O address (happens + * when 'allocate_resource' is called). + */ + if (delta && + (xen_initial_domain() && end < 0x100000000ULL)) + e820_add_region(end, delta, E820_UNUSABLE); } if (map[i].size > 0 && end > xen_extra_mem_start) @@ -251,6 +310,13 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(extra_pages); + /* + * Set P2M for all non-RAM pages and E820 gaps to be identity + * type PFNs. We supply it with the non-sanitized version + * of the E820. + */ + identity_pages = xen_set_identity(map_raw, memmap.nr_entries); + printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages); return "Xen"; } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 72a4c7959045..30612441ed99 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -509,3 +509,41 @@ void __init xen_smp_init(void) xen_fill_possible_map(); xen_init_spinlocks(); } + +static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus) +{ + native_smp_prepare_cpus(max_cpus); + WARN_ON(xen_smp_intr_init(0)); + + if (!xen_have_vector_callback) + return; + xen_init_lock_cpu(0); + xen_init_spinlocks(); +} + +static int __cpuinit xen_hvm_cpu_up(unsigned int cpu) +{ + int rc; + rc = native_cpu_up(cpu); + WARN_ON (xen_smp_intr_init(cpu)); + return rc; +} + +static void xen_hvm_cpu_die(unsigned int cpu) +{ + unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL); + unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL); + native_cpu_die(cpu); +} + +void __init xen_hvm_smp_init(void) +{ + smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus; + smp_ops.smp_send_reschedule = xen_smp_send_reschedule; + smp_ops.cpu_up = xen_hvm_cpu_up; + smp_ops.cpu_die = xen_hvm_cpu_die; + smp_ops.send_call_func_ipi = xen_smp_send_call_function_ipi; + smp_ops.send_call_func_single_ipi = xen_smp_send_call_function_single_ipi; +} diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 9bbd63a129b5..45329c8c226e 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -12,7 +12,7 @@ #include "xen-ops.h" #include "mmu.h" -void xen_pre_suspend(void) +void xen_arch_pre_suspend(void) { xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn); xen_start_info->console.domU.mfn = @@ -26,8 +26,9 @@ void xen_pre_suspend(void) BUG(); } -void xen_hvm_post_suspend(int suspend_cancelled) +void xen_arch_hvm_post_suspend(int suspend_cancelled) { +#ifdef CONFIG_XEN_PVHVM int cpu; xen_hvm_init_shared_info(); xen_callback_vector(); @@ -37,9 +38,10 @@ void xen_hvm_post_suspend(int suspend_cancelled) xen_setup_runstate_info(cpu); } } +#endif } -void xen_post_suspend(int suspend_cancelled) +void xen_arch_post_suspend(int suspend_cancelled) { xen_build_mfn_list_list(); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 067759e3d6a5..2e2d370a47b1 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -397,7 +397,9 @@ void xen_setup_timer(int cpu) name = "<timer kasprintf failed>"; irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, - IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER, + IRQF_DISABLED|IRQF_PERCPU| + IRQF_NOBALANCING|IRQF_TIMER| + IRQF_FORCE_RESUME, name, NULL); evt = &per_cpu(xen_clock_events, cpu); diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 1a5ff24e29c0..aaa7291c9259 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -28,9 +28,9 @@ ENTRY(startup_xen) __FINIT .pushsection .text - .align PAGE_SIZE_asm + .align PAGE_SIZE ENTRY(hypercall_page) - .skip PAGE_SIZE_asm + .skip PAGE_SIZE .popsection ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9d41bf985757..3112f55638c4 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -64,10 +64,12 @@ void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); +void __init xen_hvm_smp_init(void); extern cpumask_var_t xen_cpu_initialized_map; #else static inline void xen_smp_init(void) {} +static inline void xen_hvm_smp_init(void) {} #endif #ifdef CONFIG_PARAVIRT_SPINLOCKS |