diff options
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/arch_timer.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/armksyms.c | 4 | ||||
-rw-r--r-- | arch/arm/kernel/entry-armv.S | 1 | ||||
-rw-r--r-- | arch/arm/kernel/entry-common.S | 20 | ||||
-rw-r--r-- | arch/arm/kernel/head.S | 59 | ||||
-rw-r--r-- | arch/arm/kernel/kprobes-test-arm.c | 4 | ||||
-rw-r--r-- | arch/arm/kernel/kprobes-thumb.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 17 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v6.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 5 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_xscale.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/ptrace.c | 37 | ||||
-rw-r--r-- | arch/arm/kernel/signal.c | 46 | ||||
-rw-r--r-- | arch/arm/kernel/signal.h | 2 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/topology.c | 239 | ||||
-rw-r--r-- | arch/arm/kernel/traps.c | 80 | ||||
-rw-r--r-- | arch/arm/kernel/vmlinux.lds.S | 2 |
18 files changed, 384 insertions, 142 deletions
diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c index 675cee09c014..cf258807160d 100644 --- a/arch/arm/kernel/arch_timer.c +++ b/arch/arm/kernel/arch_timer.c @@ -139,7 +139,7 @@ static int __cpuinit arch_timer_setup(struct clock_event_device *clk) /* Be safe... */ arch_timer_disable(); - clk->features = CLOCK_EVT_FEAT_ONESHOT; + clk->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; clk->name = "arch_sys_timer"; clk->rating = 450; clk->set_mode = arch_timer_set_mode; diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 71962284d288..60d3b738d420 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -86,10 +86,6 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 437f0c426517..0d1851ca6eb9 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -495,6 +495,7 @@ ENDPROC(__und_usr) * The out of line fixup for the ldrt above. */ .pushsection .fixup, "ax" + .align 2 4: mov pc, r9 .popsection .pushsection __ex_table,"a" diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 4afed88d250a..49d9f9305247 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -95,13 +95,7 @@ ENDPROC(ret_to_user) ENTRY(ret_from_fork) bl schedule_tail get_thread_info tsk - ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing mov why, #1 - tst r1, #_TIF_SYSCALL_WORK @ are we tracing syscalls? - beq ret_slow_syscall - mov r1, sp - mov r0, #1 @ trace exit [IP = 1] - bl syscall_trace b ret_slow_syscall ENDPROC(ret_from_fork) @@ -448,10 +442,9 @@ ENDPROC(vector_swi) * context switches, and waiting for our parent to respond. */ __sys_trace: - mov r2, scno - add r1, sp, #S_OFF - mov r0, #0 @ trace entry [IP = 0] - bl syscall_trace + mov r1, scno + add r0, sp, #S_OFF + bl syscall_trace_enter adr lr, BSYM(__sys_trace_return) @ return address mov scno, r0 @ syscall number (possibly new) @@ -463,10 +456,9 @@ __sys_trace: __sys_trace_return: str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 - mov r2, scno - mov r1, sp - mov r0, #1 @ trace exit [IP = 1] - bl syscall_trace + mov r1, scno + mov r0, sp + bl syscall_trace_exit b ret_slow_syscall .align 5 diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 835898e7d704..3db960e20cb8 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -55,14 +55,6 @@ add \rd, \phys, #TEXT_OFFSET - PG_DIR_SIZE .endm -#ifdef CONFIG_XIP_KERNEL -#define KERNEL_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) -#define KERNEL_END _edata_loc -#else -#define KERNEL_START KERNEL_RAM_VADDR -#define KERNEL_END _end -#endif - /* * Kernel startup entry point. * --------------------------- @@ -218,51 +210,46 @@ __create_page_tables: blo 1b /* - * Now setup the pagetables for our kernel direct - * mapped region. + * Map our RAM from the start to the end of the kernel .bss section. */ - mov r3, pc - mov r3, r3, lsr #SECTION_SHIFT - orr r3, r7, r3, lsl #SECTION_SHIFT - add r0, r4, #(KERNEL_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #((KERNEL_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! - ldr r6, =(KERNEL_END - 1) - add r0, r0, #1 << PMD_ORDER + add r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER) + ldr r6, =(_end - 1) + orr r3, r8, r7 add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) -1: cmp r0, r6 +1: str r3, [r0], #1 << PMD_ORDER add r3, r3, #1 << SECTION_SHIFT - strls r3, [r0], #1 << PMD_ORDER + cmp r0, r6 bls 1b #ifdef CONFIG_XIP_KERNEL /* - * Map some ram to cover our .data and .bss areas. + * Map the kernel image separately as it is not located in RAM. */ - add r3, r8, #TEXT_OFFSET - orr r3, r3, r7 - add r0, r4, #(KERNEL_RAM_VADDR & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) - str r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> (SECTION_SHIFT - PMD_ORDER)]! - ldr r6, =(_end - 1) - add r0, r0, #4 +#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) + mov r3, pc + mov r3, r3, lsr #SECTION_SHIFT + orr r3, r7, r3, lsl #SECTION_SHIFT + add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) + str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! + ldr r6, =(_edata_loc - 1) + add r0, r0, #1 << PMD_ORDER add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) 1: cmp r0, r6 - add r3, r3, #1 << 20 - strls r3, [r0], #4 + add r3, r3, #1 << SECTION_SHIFT + strls r3, [r0], #1 << PMD_ORDER bls 1b #endif /* - * Then map boot params address in r2 or the first 1MB (2MB with LPAE) - * of ram if boot params address is not specified. + * Then map boot params address in r2 if specified. */ mov r0, r2, lsr #SECTION_SHIFT movs r0, r0, lsl #SECTION_SHIFT - moveq r0, r8 - sub r3, r0, r8 - add r3, r3, #PAGE_OFFSET - add r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) - orr r6, r7, r0 - str r6, [r3] + subne r3, r0, r8 + addne r3, r3, #PAGE_OFFSET + addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) + orrne r6, r7, r0 + strne r6, [r3] #ifdef CONFIG_DEBUG_LL #if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING) diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c index ba32b393b3f0..38c1a3b103a0 100644 --- a/arch/arm/kernel/kprobes-test-arm.c +++ b/arch/arm/kernel/kprobes-test-arm.c @@ -187,8 +187,8 @@ void kprobe_arm_test_cases(void) TEST_BF_R ("mov pc, r",0,2f,"") TEST_BF_RR("mov pc, r",0,2f,", asl r",1,0,"") TEST_BB( "sub pc, pc, #1b-2b+8") -#if __LINUX_ARM_ARCH__ >= 6 - TEST_BB( "sub pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before ARMv6 */ +#if __LINUX_ARM_ARCH__ == 6 && !defined(CONFIG_CPU_V7) + TEST_BB( "sub pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before and after ARMv6 */ #endif TEST_BB_R( "sub pc, pc, r",14, 1f-2f+8,"") TEST_BB_R( "rsb pc, r",14,1f-2f+8,", pc") diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c index 8f96ec778e8d..6123daf397a7 100644 --- a/arch/arm/kernel/kprobes-thumb.c +++ b/arch/arm/kernel/kprobes-thumb.c @@ -660,7 +660,7 @@ static const union decode_item t32_table_1111_100x[] = { /* LDRSB (literal) 1111 1001 x001 1111 xxxx xxxx xxxx xxxx */ /* LDRH (literal) 1111 1000 x011 1111 xxxx xxxx xxxx xxxx */ /* LDRSH (literal) 1111 1001 x011 1111 xxxx xxxx xxxx xxxx */ - DECODE_EMULATEX (0xfe5f0000, 0xf81f0000, t32_simulate_ldr_literal, + DECODE_SIMULATEX(0xfe5f0000, 0xf81f0000, t32_simulate_ldr_literal, REGS(PC, NOSPPCX, 0, 0, 0)), /* STRB (immediate) 1111 1000 0000 xxxx xxxx 1xxx xxxx xxxx */ diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 186c8cb982c5..ab243b87118d 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -47,17 +47,14 @@ static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); /* Set at runtime when we know what CPU type we are. */ static struct arm_pmu *cpu_pmu; -enum arm_perf_pmu_ids -armpmu_get_pmu_id(void) +const char *perf_pmu_name(void) { - int id = -ENODEV; - - if (cpu_pmu != NULL) - id = cpu_pmu->id; + if (!cpu_pmu) + return NULL; - return id; + return cpu_pmu->pmu.name; } -EXPORT_SYMBOL_GPL(armpmu_get_pmu_id); +EXPORT_SYMBOL_GPL(perf_pmu_name); int perf_num_counters(void) { @@ -503,7 +500,7 @@ __hw_perf_event_init(struct perf_event *event) event_requires_mode_exclusion(&event->attr)) { pr_debug("ARM performance counters do not support " "mode exclusion\n"); - return -EPERM; + return -EOPNOTSUPP; } /* @@ -760,7 +757,7 @@ init_hw_perf_events(void) cpu_pmu->name, cpu_pmu->num_events); cpu_pmu_init(cpu_pmu); register_cpu_notifier(&pmu_cpu_notifier); - armpmu_register(cpu_pmu, "cpu", PERF_TYPE_RAW); + armpmu_register(cpu_pmu, cpu_pmu->name, PERF_TYPE_RAW); } else { pr_info("no hardware support available\n"); } diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index ab627a740fa3..c90fcb2b6967 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -650,7 +650,6 @@ static int armv6_map_event(struct perf_event *event) } static struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, .name = "v6", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, @@ -685,7 +684,6 @@ static int armv6mpcore_map_event(struct perf_event *event) } static struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index d3c536068162..f04070bd2183 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1258,7 +1258,6 @@ static u32 __init armv7_read_num_pmnc_events(void) static struct arm_pmu *__init armv7_a8_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA8; armv7pmu.name = "ARMv7 Cortex-A8"; armv7pmu.map_event = armv7_a8_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1267,7 +1266,6 @@ static struct arm_pmu *__init armv7_a8_pmu_init(void) static struct arm_pmu *__init armv7_a9_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA9; armv7pmu.name = "ARMv7 Cortex-A9"; armv7pmu.map_event = armv7_a9_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1276,7 +1274,6 @@ static struct arm_pmu *__init armv7_a9_pmu_init(void) static struct arm_pmu *__init armv7_a5_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA5; armv7pmu.name = "ARMv7 Cortex-A5"; armv7pmu.map_event = armv7_a5_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1285,7 +1282,6 @@ static struct arm_pmu *__init armv7_a5_pmu_init(void) static struct arm_pmu *__init armv7_a15_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA15; armv7pmu.name = "ARMv7 Cortex-A15"; armv7pmu.map_event = armv7_a15_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); @@ -1295,7 +1291,6 @@ static struct arm_pmu *__init armv7_a15_pmu_init(void) static struct arm_pmu *__init armv7_a7_pmu_init(void) { - armv7pmu.id = ARM_PERF_PMU_ID_CA7; armv7pmu.name = "ARMv7 Cortex-A7"; armv7pmu.map_event = armv7_a7_map_event; armv7pmu.num_events = armv7_read_num_pmnc_events(); diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index e34e7254e652..f759fe0bab63 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -435,7 +435,6 @@ static int xscale_map_event(struct perf_event *event) } static struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, @@ -803,7 +802,6 @@ xscale2pmu_write_counter(int counter, u32 val) } static struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 5700a7ae7f0b..dab711e6e1ca 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -25,7 +25,6 @@ #include <linux/regset.h> #include <linux/audit.h> #include <linux/tracehook.h> -#include <linux/unistd.h> #include <asm/pgtable.h> #include <asm/traps.h> @@ -908,18 +907,16 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static int ptrace_syscall_trace(struct pt_regs *regs, int scno, + enum ptrace_syscall_dir dir) { unsigned long ip; - if (why) - audit_syscall_exit(regs); - else - audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, - regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); - - if (why == 0 && test_and_clear_thread_flag(TIF_SYSCALL_RESTARTSYS)) - scno = __NR_restart_syscall - __NR_SYSCALL_BASE; if (!test_thread_flag(TIF_SYSCALL_TRACE)) return scno; @@ -930,14 +927,28 @@ asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) * IP = 0 -> entry, =1 -> exit */ ip = regs->ARM_ip; - regs->ARM_ip = why; + regs->ARM_ip = dir; - if (why) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) current_thread_info()->syscall = -1; regs->ARM_ip = ip; - return current_thread_info()->syscall; } + +asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) +{ + int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_ENTER); + audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1, + regs->ARM_r2, regs->ARM_r3); + return ret; +} + +asmlinkage int syscall_trace_exit(struct pt_regs *regs, int scno) +{ + int ret = ptrace_syscall_trace(regs, scno, PTRACE_SYSCALL_EXIT); + audit_syscall_exit(regs); + return ret; +} diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index fd2392a17ac1..536c5d6b340b 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -27,6 +27,7 @@ */ #define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)) #define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)) +#define SWI_SYS_RESTART (0xef000000|__NR_restart_syscall|__NR_OABI_SYSCALL_BASE) /* * With EABI, the syscall number has to be loaded into r7. @@ -47,6 +48,18 @@ const unsigned long sigreturn_codes[7] = { }; /* + * Either we support OABI only, or we have EABI with the OABI + * compat layer enabled. In the later case we don't know if + * user space is EABI or not, and if not we must not clobber r7. + * Always using the OABI syscall solves that issue and works for + * all those cases. + */ +const unsigned long syscall_restart_code[2] = { + SWI_SYS_RESTART, /* swi __NR_restart_syscall */ + 0xe49df004, /* ldr pc, [sp], #4 */ +}; + +/* * atomically swap in the new signal mask, and wait for a signal. */ asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t mask) @@ -592,10 +605,12 @@ static void do_signal(struct pt_regs *regs, int syscall) case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: - case -ERESTART_RESTARTBLOCK: regs->ARM_r0 = regs->ARM_ORIG_r0; regs->ARM_pc = restart_addr; break; + case -ERESTART_RESTARTBLOCK: + regs->ARM_r0 = -EINTR; + break; } } @@ -611,14 +626,12 @@ static void do_signal(struct pt_regs *regs, int syscall) * debugger has chosen to restart at a different PC. */ if (regs->ARM_pc == restart_addr) { - if (retval == -ERESTARTNOHAND || - retval == -ERESTART_RESTARTBLOCK + if (retval == -ERESTARTNOHAND || (retval == -ERESTARTSYS && !(ka.sa.sa_flags & SA_RESTART))) { regs->ARM_r0 = -EINTR; regs->ARM_pc = continue_addr; } - clear_thread_flag(TIF_SYSCALL_RESTARTSYS); } handle_signal(signr, &ka, &info, regs); @@ -632,8 +645,29 @@ static void do_signal(struct pt_regs *regs, int syscall) * ignore the restart. */ if (retval == -ERESTART_RESTARTBLOCK - && regs->ARM_pc == restart_addr) - set_thread_flag(TIF_SYSCALL_RESTARTSYS); + && regs->ARM_pc == continue_addr) { + if (thumb_mode(regs)) { + regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE; + regs->ARM_pc -= 2; + } else { +#if defined(CONFIG_AEABI) && !defined(CONFIG_OABI_COMPAT) + regs->ARM_r7 = __NR_restart_syscall; + regs->ARM_pc -= 4; +#else + u32 __user *usp; + + regs->ARM_sp -= 4; + usp = (u32 __user *)regs->ARM_sp; + + if (put_user(regs->ARM_pc, usp) == 0) { + regs->ARM_pc = KERN_RESTART_CODE; + } else { + regs->ARM_sp += 4; + force_sigsegv(0, current); + } +#endif + } + } } restore_saved_sigmask(); diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h index 5ff067b7c752..6fcfe8398aa4 100644 --- a/arch/arm/kernel/signal.h +++ b/arch/arm/kernel/signal.h @@ -8,5 +8,7 @@ * published by the Free Software Foundation. */ #define KERN_SIGRETURN_CODE (CONFIG_VECTORS_BASE + 0x00000500) +#define KERN_RESTART_CODE (KERN_SIGRETURN_CODE + sizeof(sigreturn_codes)) extern const unsigned long sigreturn_codes[7]; +extern const unsigned long syscall_restart_code[2]; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 2c7217d971db..aea74f5bc34a 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -179,7 +179,7 @@ void __ref cpu_die(void) mb(); /* Tell __cpu_die() that this CPU is now safe to dispose of */ - complete(&cpu_died); + RCU_NONIDLE(complete(&cpu_died)); /* * actual CPU shutdown procedure is at least platform (if not diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index 8200deaa14f6..198b08456e90 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -17,11 +17,190 @@ #include <linux/percpu.h> #include <linux/node.h> #include <linux/nodemask.h> +#include <linux/of.h> #include <linux/sched.h> +#include <linux/slab.h> #include <asm/cputype.h> #include <asm/topology.h> +/* + * cpu power scale management + */ + +/* + * cpu power table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_power field so the scheduler can + * take this difference into account during load balance. A per cpu structure + * is preferred because each CPU updates its own cpu_power field during the + * load balance except for idle cores. One idle core is selected to run the + * rebalance_domains for all idle cores and the cpu_power can be updated + * during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_power_scale(unsigned int cpu, unsigned long power) +{ + per_cpu(cpu_scale, cpu) = power; +} + +#ifdef CONFIG_OF +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_POWER_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_POWER_SCALE value for cpu_scale. + */ +struct cpu_efficiency table_efficiency[] = { + {"arm,cortex-a15", 3891}, + {"arm,cortex-a7", 2048}, + {NULL, }, +}; + +struct cpu_capacity { + unsigned long hwid; + unsigned long capacity; +}; + +struct cpu_capacity *cpu_capacity; + +unsigned long middle_capacity = 1; + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_power field such that an + * 'average' CPU is of middle power. Also see the comments near + * table_efficiency[] and update_cpu_power(). + */ +static void __init parse_dt_topology(void) +{ + struct cpu_efficiency *cpu_eff; + struct device_node *cn = NULL; + unsigned long min_capacity = (unsigned long)(-1); + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int alloc_size, cpu = 0; + + alloc_size = nr_cpu_ids * sizeof(struct cpu_capacity); + cpu_capacity = (struct cpu_capacity *)kzalloc(alloc_size, GFP_NOWAIT); + + while ((cn = of_find_node_by_type(cn, "cpu"))) { + const u32 *rate, *reg; + int len; + + if (cpu >= num_possible_cpus()) + break; + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) + continue; + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s missing clock-frequency property\n", + cn->full_name); + continue; + } + + reg = of_get_property(cn, "reg", &len); + if (!reg || len != 4) { + pr_err("%s missing reg property\n", cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity[cpu].capacity = capacity; + cpu_capacity[cpu++].hwid = be32_to_cpup(reg); + } + + if (cpu < num_possible_cpus()) + cpu_capacity[cpu].hwid = (unsigned long)(-1); + + /* If min and max capacities are equals, we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_POWER_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (min_capacity == max_capacity) + cpu_capacity[0].hwid = (unsigned long)(-1); + else if (4*max_capacity < (3*(max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_POWER_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_POWER_SHIFT-1)) + 1; + +} + +/* + * Look for a customed capacity of a CPU in the cpu_capacity table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +void update_cpu_power(unsigned int cpu, unsigned long hwid) +{ + unsigned int idx = 0; + + /* look for the cpu's hwid in the cpu capacity table */ + for (idx = 0; idx < num_possible_cpus(); idx++) { + if (cpu_capacity[idx].hwid == hwid) + break; + + if (cpu_capacity[idx].hwid == -1) + return; + } + + if (idx == num_possible_cpus()) + return; + + set_power_scale(cpu, cpu_capacity[idx].capacity / middle_capacity); + + printk(KERN_INFO "CPU%u: update cpu_power %lu\n", + cpu, arch_scale_freq_power(NULL, cpu)); +} + +#else +static inline void parse_dt_topology(void) {} +static inline void update_cpu_power(unsigned int cpuid, unsigned int mpidr) {} +#endif + + +/* + * cpu topology management + */ + #define MPIDR_SMP_BITMASK (0x3 << 30) #define MPIDR_SMP_VALUE (0x2 << 30) @@ -31,6 +210,7 @@ * These masks reflect the current use of the affinity levels. * The affinity level can be up to 16 bits according to ARM ARM */ +#define MPIDR_HWID_BITMASK 0xFFFFFF #define MPIDR_LEVEL0_MASK 0x3 #define MPIDR_LEVEL0_SHIFT 0 @@ -41,6 +221,9 @@ #define MPIDR_LEVEL2_MASK 0xFF #define MPIDR_LEVEL2_SHIFT 16 +/* + * cpu topology table + */ struct cputopo_arm cpu_topology[NR_CPUS]; const struct cpumask *cpu_coregroup_mask(int cpu) @@ -48,6 +231,32 @@ const struct cpumask *cpu_coregroup_mask(int cpu) return &cpu_topology[cpu].core_sibling; } +void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + /* * store_cpu_topology is called at boot when only one cpu is running * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, @@ -57,7 +266,6 @@ void store_cpu_topology(unsigned int cpuid) { struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; unsigned int mpidr; - unsigned int cpu; /* If the cpu topology has been already set, just return */ if (cpuid_topo->core_id != -1) @@ -99,26 +307,9 @@ void store_cpu_topology(unsigned int cpuid) cpuid_topo->socket_id = -1; } - /* update core and thread sibling masks */ - for_each_possible_cpu(cpu) { - struct cputopo_arm *cpu_topo = &cpu_topology[cpu]; - - if (cpuid_topo->socket_id == cpu_topo->socket_id) { - cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, - &cpuid_topo->core_sibling); - - if (cpuid_topo->core_id == cpu_topo->core_id) { - cpumask_set_cpu(cpuid, - &cpu_topo->thread_sibling); - if (cpu != cpuid) - cpumask_set_cpu(cpu, - &cpuid_topo->thread_sibling); - } - } - } - smp_wmb(); + update_siblings_masks(cpuid); + + update_cpu_power(cpuid, mpidr & MPIDR_HWID_BITMASK); printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", cpuid, cpu_topology[cpuid].thread_id, @@ -134,7 +325,7 @@ void init_cpu_topology(void) { unsigned int cpu; - /* init core mask */ + /* init core mask and power*/ for_each_possible_cpu(cpu) { struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); @@ -143,6 +334,10 @@ void init_cpu_topology(void) cpu_topo->socket_id = -1; cpumask_clear(&cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); + + set_power_scale(cpu, SCHED_POWER_SCALE); } smp_wmb(); + + parse_dt_topology(); } diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 4928d89758f4..8b97d739b17b 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -233,9 +233,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #define S_ISA " ARM" #endif -static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs) +static int __die(const char *str, int err, struct pt_regs *regs) { - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; static int die_counter; int ret; @@ -245,12 +245,12 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt /* trap and error numbers are mostly meaningless on ARM */ ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV); if (ret == NOTIFY_STOP) - return ret; + return 1; print_modules(); __show_regs(regs); printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n", - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp, @@ -259,45 +259,77 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt dump_instr(KERN_EMERG, regs); } - return ret; + return 0; } -static DEFINE_RAW_SPINLOCK(die_lock); +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; -/* - * This function is protected against re-entrancy. - */ -void die(const char *str, struct pt_regs *regs, int err) +static unsigned long oops_begin(void) { - struct thread_info *thread = current_thread_info(); - int ret; - enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE; + int cpu; + unsigned long flags; oops_enter(); - raw_spin_lock_irq(&die_lock); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; console_verbose(); bust_spinlocks(1); - if (!user_mode(regs)) - bug_type = report_bug(regs->ARM_pc, regs); - if (bug_type != BUG_TRAP_TYPE_NONE) - str = "Oops - BUG"; - ret = __die(str, err, thread, regs); + return flags; +} - if (regs && kexec_should_crash(thread->task)) +static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); + die_owner = -1; add_taint(TAINT_DIE); - raw_spin_unlock_irq(&die_lock); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + if (signr) + do_exit(signr); +} + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) +{ + enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE; + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + if (!user_mode(regs)) + bug_type = report_bug(regs->ARM_pc, regs); + if (bug_type != BUG_TRAP_TYPE_NONE) + str = "Oops - BUG"; + + if (__die(str, err, regs)) + sig = 0; + + oops_end(flags, regs, sig); } void arm_notify_die(const char *str, struct pt_regs *regs, @@ -820,6 +852,8 @@ void __init early_trap_init(void *vectors_base) */ memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE), sigreturn_codes, sizeof(sigreturn_codes)); + memcpy((void *)(vectors + KERN_RESTART_CODE - CONFIG_VECTORS_BASE), + syscall_restart_code, sizeof(syscall_restart_code)); flush_icache_range(vectors, vectors + PAGE_SIZE); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index 43a31fb06318..36ff15bbfdd4 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -183,7 +183,9 @@ SECTIONS } #endif +#ifdef CONFIG_SMP PERCPU_SECTION(L1_CACHE_BYTES) +#endif #ifdef CONFIG_XIP_KERNEL __data_loc = ALIGN(4); /* location in binary */ |