diff options
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r-- | arch/powerpc/kernel/Makefile | 7 | ||||
-rw-r--r-- | arch/powerpc/kernel/cacheinfo.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/cputable.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/e500-pmu.c | 129 | ||||
-rw-r--r-- | arch/powerpc/kernel/head_64.S | 17 | ||||
-rw-r--r-- | arch/powerpc/kernel/legacy_serial.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kernel/paca.c | 93 | ||||
-rw-r--r-- | arch/powerpc/kernel/pci-common.c | 15 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_event.c | 8 | ||||
-rw-r--r-- | arch/powerpc/kernel/perf_event_fsl_emb.c | 654 | ||||
-rw-r--r-- | arch/powerpc/kernel/prom.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/ptrace.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup-common.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kernel/setup_64.c | 12 | ||||
-rw-r--r-- | arch/powerpc/kernel/syscalls.c | 164 |
15 files changed, 899 insertions, 224 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index c002b0410219..877326320e74 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -98,11 +98,16 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o perf_callchain.o +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o + +obj-$(CONFIG_PPC_PERF_CTRS) += perf_event.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT) += perf_event_fsl_emb.o +obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o + obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o ifneq ($(CONFIG_PPC_INDIRECT_IO),y) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index bb37b1d19a58..01fe9ce28379 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -642,7 +642,7 @@ static struct kobj_attribute *cache_index_opt_attrs[] = { &cache_assoc_attr, }; -static struct sysfs_ops cache_index_ops = { +static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 2fc82bac3bbc..8af4949434b2 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1808,7 +1808,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 64, .dcache_bsize = 64, .num_pmcs = 4, - .oprofile_cpu_type = "ppc/e500", /* xxx - galak, e500mc? */ + .oprofile_cpu_type = "ppc/e500mc", .oprofile_type = PPC_OPROFILE_FSL_EMB, .cpu_setup = __setup_cpu_e500mc, .machine_check = machine_check_e500, diff --git a/arch/powerpc/kernel/e500-pmu.c b/arch/powerpc/kernel/e500-pmu.c new file mode 100644 index 000000000000..7c07de0d8943 --- /dev/null +++ b/arch/powerpc/kernel/e500-pmu.c @@ -0,0 +1,129 @@ +/* + * Performance counter support for e500 family processors. + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/string.h> +#include <linux/perf_event.h> +#include <asm/reg.h> +#include <asm/cputable.h> + +/* + * Map of generic hardware event types to hardware events + * Zero if unsupported + */ +static int e500_generic_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 1, + [PERF_COUNT_HW_INSTRUCTIONS] = 2, + [PERF_COUNT_HW_CACHE_MISSES] = 41, /* Data L1 cache reloads */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 12, + [PERF_COUNT_HW_BRANCH_MISSES] = 15, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int e500_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { + /* + * D-cache misses are not split into read/write/prefetch; + * use raw event 41. + */ + [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 27, 0 }, + [C(OP_WRITE)] = { 28, 0 }, + [C(OP_PREFETCH)] = { 29, 0 }, + }, + [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 2, 60 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * Assuming LL means L2, it's not a good match for this model. + * It allocates only on L1 castout or explicit prefetch, and + * does not have separate read/write events (but it does have + * separate instruction/data events). + */ + [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 0, 0 }, + [C(OP_WRITE)] = { 0, 0 }, + [C(OP_PREFETCH)] = { 0, 0 }, + }, + /* + * There are data/instruction MMU misses, but that's a miss on + * the chip's internal level-one TLB which is probably not + * what the user wants. Instead, unified level-two TLB misses + * are reported here. + */ + [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 26, 66 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, + [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ + [C(OP_READ)] = { 12, 15 }, + [C(OP_WRITE)] = { -1, -1 }, + [C(OP_PREFETCH)] = { -1, -1 }, + }, +}; + +static int num_events = 128; + +/* Upper half of event id is PMLCb, for threshold events */ +static u64 e500_xlate_event(u64 event_id) +{ + u32 event_low = (u32)event_id; + u64 ret; + + if (event_low >= num_events) + return 0; + + ret = FSL_EMB_EVENT_VALID; + + if (event_low >= 76 && event_low <= 81) { + ret |= FSL_EMB_EVENT_RESTRICTED; + ret |= event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH); + } else if (event_id & + (FSL_EMB_EVENT_THRESHMUL | FSL_EMB_EVENT_THRESH)) { + /* Threshold requested on non-threshold event */ + return 0; + } + + return ret; +} + +static struct fsl_emb_pmu e500_pmu = { + .name = "e500 family", + .n_counter = 4, + .n_restricted = 2, + .xlate_event = e500_xlate_event, + .n_generic = ARRAY_SIZE(e500_generic_events), + .generic_events = e500_generic_events, + .cache_events = &e500_cache_events, +}; + +static int init_e500_pmu(void) +{ + if (!cur_cpu_spec->oprofile_cpu_type) + return -ENODEV; + + if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc")) + num_events = 256; + else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500")) + return -ENODEV; + + return register_fsl_emb_pmu(&e500_pmu); +} + +arch_initcall(init_e500_pmu); diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 925807488022..bed9a29ee383 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -219,7 +219,8 @@ generic_secondary_common_init: * physical cpu id in r24, we need to search the pacas to find * which logical id maps to our physical one. */ - LOAD_REG_ADDR(r13, paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r13, paca) /* Load paca pointer */ + ld r13,0(r13) /* Get base vaddr of paca array */ li r5,0 /* logical cpu id */ 1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ cmpw r6,r24 /* Compare to our id */ @@ -536,7 +537,8 @@ _GLOBAL(pmac_secondary_start) mtmsrd r3 /* RI on */ /* Set up a paca value for this processor. */ - LOAD_REG_ADDR(r4,paca) /* Get base vaddr of paca array */ + LOAD_REG_ADDR(r4,paca) /* Load paca pointer */ + ld r4,0(r4) /* Get base vaddr of paca array */ mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ add r13,r13,r4 /* for this processor. */ mtspr SPRN_SPRG_PACA,r13 /* Save vaddr of paca in an SPRG*/ @@ -615,6 +617,17 @@ _GLOBAL(start_secondary_prolog) std r3,0(r1) /* Zero the stack frame pointer */ bl .start_secondary b . +/* + * Reset stack pointer and call start_secondary + * to continue with online operation when woken up + * from cede in cpu offline. + */ +_GLOBAL(start_secondary_resume) + ld r1,PACAKSAVE(r13) /* Reload kernel stack pointer */ + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary + b . #endif /* diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 9ddfaef1a184..035ada5443ee 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -469,7 +469,7 @@ static int __init serial_dev_init(void) return -ENODEV; /* - * Before we register the platfrom serial devices, we need + * Before we register the platform serial devices, we need * to fixup their interrupts and their IO ports. */ DBG("Fixing serial ports interrupts and IO ports ...\n"); diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index d16b1ea55d44..0c40c6f476fe 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -9,11 +9,15 @@ #include <linux/threads.h> #include <linux/module.h> +#include <linux/lmb.h> +#include <asm/firmware.h> #include <asm/lppaca.h> #include <asm/paca.h> #include <asm/sections.h> #include <asm/pgtable.h> +#include <asm/iseries/lpar_map.h> +#include <asm/iseries/hv_types.h> /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -70,37 +74,82 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -struct paca_struct paca[NR_CPUS]; +struct paca_struct *paca; EXPORT_SYMBOL(paca); -void __init initialise_pacas(void) -{ - int cpu; +struct paca_struct boot_paca; - /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB - * of the TOC can be addressed using a single machine instruction. - */ +void __init initialise_paca(struct paca_struct *new_paca, int cpu) +{ + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; - /* Can't use for_each_*_cpu, as they aren't functional yet */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - struct paca_struct *new_paca = &paca[cpu]; - #ifdef CONFIG_PPC_BOOK3S - new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lppaca_ptr = &lppaca[cpu]; #else - new_paca->kernel_pgd = swapper_pg_dir; + new_paca->kernel_pgd = swapper_pg_dir; #endif - new_paca->lock_token = 0x8000; - new_paca->paca_index = cpu; - new_paca->kernel_toc = kernel_toc; - new_paca->kernelbase = (unsigned long) _stext; - new_paca->kernel_msr = MSR_KERNEL; - new_paca->hw_cpu_id = 0xffff; - new_paca->__current = &init_task; + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->kernelbase = (unsigned long) _stext; + new_paca->kernel_msr = MSR_KERNEL; + new_paca->hw_cpu_id = 0xffff; + new_paca->__current = &init_task; #ifdef CONFIG_PPC_STD_MMU_64 - new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; #endif /* CONFIG_PPC_STD_MMU_64 */ +} + +static int __initdata paca_size; + +void __init allocate_pacas(void) +{ + int nr_cpus, cpu, limit; + + /* + * We can't take SLB misses on the paca, and we want to access them + * in real mode, so allocate them within the RMA and also within + * the first segment. On iSeries they must be within the area mapped + * by the HV, which is HvPagesToMap * HVPAGESIZE bytes. + */ + limit = min(0x10000000ULL, lmb.rmo_size); + if (firmware_has_feature(FW_FEATURE_ISERIES)) + limit = min(limit, HvPagesToMap * HVPAGESIZE); + + nr_cpus = NR_CPUS; + /* On iSeries we know we can never have more than 64 cpus */ + if (firmware_has_feature(FW_FEATURE_ISERIES)) + nr_cpus = min(64, nr_cpus); + + paca_size = PAGE_ALIGN(sizeof(struct paca_struct) * nr_cpus); + + paca = __va(lmb_alloc_base(paca_size, PAGE_SIZE, limit)); + memset(paca, 0, paca_size); + + printk(KERN_DEBUG "Allocated %u bytes for %d pacas at %p\n", + paca_size, nr_cpus, paca); + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < nr_cpus; cpu++) + initialise_paca(&paca[cpu], cpu); +} + +void __init free_unused_pacas(void) +{ + int new_size; + + new_size = PAGE_ALIGN(sizeof(struct paca_struct) * num_possible_cpus()); + + if (new_size >= paca_size) + return; + + lmb_free(__pa(paca) + new_size, paca_size - new_size); + + printk(KERN_DEBUG "Freed %u bytes for unused pacas\n", + paca_size - new_size); - } + paca_size = new_size; } diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2597f9545d8a..f3c42ce516e7 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -63,21 +63,6 @@ struct dma_map_ops *get_pci_dma_ops(void) } EXPORT_SYMBOL(get_pci_dma_ops); -int pci_set_dma_mask(struct pci_dev *dev, u64 mask) -{ - return dma_set_mask(&dev->dev, mask); -} - -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask) -{ - int rc; - - rc = dma_set_mask(&dev->dev, mask); - dev->dev.coherent_dma_mask = dev->dma_mask; - - return rc; -} - struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index b6cf8f1f4d35..5120bd44f69a 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -1164,10 +1164,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * Finally record data if requested. */ if (record) { - struct perf_sample_data data = { - .addr = ~0ULL, - .period = event->hw.last_period, - }; + struct perf_sample_data data; + + perf_sample_data_init(&data, ~0ULL); + data.period = event->hw.last_period; if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c new file mode 100644 index 000000000000..369872f6cf78 --- /dev/null +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -0,0 +1,654 @@ +/* + * Performance event support - Freescale Embedded Performance Monitor + * + * Copyright 2008-2009 Paul Mackerras, IBM Corporation. + * Copyright 2010 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/hardirq.h> +#include <asm/reg_fsl_emb.h> +#include <asm/pmc.h> +#include <asm/machdep.h> +#include <asm/firmware.h> +#include <asm/ptrace.h> + +struct cpu_hw_events { + int n_events; + int disabled; + u8 pmcs_enabled; + struct perf_event *event[MAX_HWEVENTS]; +}; +static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +static struct fsl_emb_pmu *ppmu; + +/* Number of perf_events counting hardware events */ +static atomic_t num_events; +/* Used to avoid races in calling reserve/release_pmc_hardware */ +static DEFINE_MUTEX(pmc_reserve_mutex); + +/* + * If interrupts were soft-disabled when a PMU interrupt occurs, treat + * it as an NMI. + */ +static inline int perf_intr_is_nmi(struct pt_regs *regs) +{ +#ifdef __powerpc64__ + return !regs->softe; +#else + return 0; +#endif +} + +static void perf_event_interrupt(struct pt_regs *regs); + +/* + * Read one performance monitor counter (PMC). + */ +static unsigned long read_pmc(int idx) +{ + unsigned long val; + + switch (idx) { + case 0: + val = mfpmr(PMRN_PMC0); + break; + case 1: + val = mfpmr(PMRN_PMC1); + break; + case 2: + val = mfpmr(PMRN_PMC2); + break; + case 3: + val = mfpmr(PMRN_PMC3); + break; + default: + printk(KERN_ERR "oops trying to read PMC%d\n", idx); + val = 0; + } + return val; +} + +/* + * Write one PMC. + */ +static void write_pmc(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMC0, val); + break; + case 1: + mtpmr(PMRN_PMC1, val); + break; + case 2: + mtpmr(PMRN_PMC2, val); + break; + case 3: + mtpmr(PMRN_PMC3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMC%d\n", idx); + } + + isync(); +} + +/* + * Write one local control A register + */ +static void write_pmlca(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCA0, val); + break; + case 1: + mtpmr(PMRN_PMLCA1, val); + break; + case 2: + mtpmr(PMRN_PMLCA2, val); + break; + case 3: + mtpmr(PMRN_PMLCA3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCA%d\n", idx); + } + + isync(); +} + +/* + * Write one local control B register + */ +static void write_pmlcb(int idx, unsigned long val) +{ + switch (idx) { + case 0: + mtpmr(PMRN_PMLCB0, val); + break; + case 1: + mtpmr(PMRN_PMLCB1, val); + break; + case 2: + mtpmr(PMRN_PMLCB2, val); + break; + case 3: + mtpmr(PMRN_PMLCB3, val); + break; + default: + printk(KERN_ERR "oops trying to write PMLCB%d\n", idx); + } + + isync(); +} + +static void fsl_emb_pmu_read(struct perf_event *event) +{ + s64 val, delta, prev; + + /* + * Performance monitor interrupts come even when interrupts + * are soft-disabled, as long as interrupts are hard-enabled. + * Therefore we treat them like NMIs. + */ + do { + prev = atomic64_read(&event->hw.prev_count); + barrier(); + val = read_pmc(event->hw.idx); + } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev); + + /* The counters are only 32 bits wide */ + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + atomic64_sub(delta, &event->hw.period_left); +} + +/* + * Disable all events to prevent PMU interrupts and to allow + * events to be added or removed. + */ +void hw_perf_disable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + + if (!cpuhw->disabled) { + cpuhw->disabled = 1; + + /* + * Check if we ever enabled the PMU on this cpu. + */ + if (!cpuhw->pmcs_enabled) { + ppc_enable_pmcs(); + cpuhw->pmcs_enabled = 1; + } + + if (atomic_read(&num_events)) { + /* + * Set the 'freeze all counters' bit, and disable + * interrupts. The barrier is to make sure the + * mtpmr has been executed and the PMU has frozen + * the events before we return. + */ + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + } + local_irq_restore(flags); +} + +/* + * Re-enable all events if disable == 0. + * If we were previously disabled and events were added, then + * put the new config on the PMU. + */ +void hw_perf_enable(void) +{ + struct cpu_hw_events *cpuhw; + unsigned long flags; + + local_irq_save(flags); + cpuhw = &__get_cpu_var(cpu_hw_events); + if (!cpuhw->disabled) + goto out; + + cpuhw->disabled = 0; + ppc_set_pmu_inuse(cpuhw->n_events != 0); + + if (cpuhw->n_events > 0) { + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + } + + out: + local_irq_restore(flags); +} + +static int collect_events(struct perf_event *group, int max_count, + struct perf_event *ctrs[]) +{ + int n = 0; + struct perf_event *event; + + if (!is_software_event(group)) { + if (n >= max_count) + return -1; + ctrs[n] = group; + n++; + } + list_for_each_entry(event, &group->sibling_list, group_entry) { + if (!is_software_event(event) && + event->state != PERF_EVENT_STATE_OFF) { + if (n >= max_count) + return -1; + ctrs[n] = event; + n++; + } + } + return n; +} + +/* perf must be disabled, context locked on entry */ +static int fsl_emb_pmu_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int ret = -EAGAIN; + int num_counters = ppmu->n_counter; + u64 val; + int i; + + cpuhw = &get_cpu_var(cpu_hw_events); + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_counters = ppmu->n_restricted; + + /* + * Allocate counters from top-down, so that restricted-capable + * counters are kept free as long as possible. + */ + for (i = num_counters - 1; i >= 0; i--) { + if (cpuhw->event[i]) + continue; + + break; + } + + if (i < 0) + goto out; + + event->hw.idx = i; + cpuhw->event[i] = event; + ++cpuhw->n_events; + + val = 0; + if (event->hw.sample_period) { + s64 left = atomic64_read(&event->hw.period_left); + if (left < 0x80000000L) + val = 0x80000000L - left; + } + atomic64_set(&event->hw.prev_count, val); + write_pmc(i, val); + perf_event_update_userpage(event); + + write_pmlcb(i, event->hw.config >> 32); + write_pmlca(i, event->hw.config_base); + + ret = 0; + out: + put_cpu_var(cpu_hw_events); + return ret; +} + +/* perf must be disabled, context locked on entry */ +static void fsl_emb_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuhw; + int i = event->hw.idx; + + if (i < 0) + goto out; + + fsl_emb_pmu_read(event); + + cpuhw = &get_cpu_var(cpu_hw_events); + + WARN_ON(event != cpuhw->event[event->hw.idx]); + + write_pmlca(i, 0); + write_pmlcb(i, 0); + write_pmc(i, 0); + + cpuhw->event[i] = NULL; + event->hw.idx = -1; + + /* + * TODO: if at least one restricted event exists, and we + * just freed up a non-restricted-capable counter, and + * there is a restricted-capable counter occupied by + * a non-restricted event, migrate that event to the + * vacated counter. + */ + + cpuhw->n_events--; + + out: + put_cpu_var(cpu_hw_events); +} + +/* + * Re-enable interrupts on a event after they were throttled + * because they were coming too fast. + * + * Context is locked on entry, but perf is not disabled. + */ +static void fsl_emb_pmu_unthrottle(struct perf_event *event) +{ + s64 val, left; + unsigned long flags; + + if (event->hw.idx < 0 || !event->hw.sample_period) + return; + local_irq_save(flags); + perf_disable(); + fsl_emb_pmu_read(event); + left = event->hw.sample_period; + event->hw.last_period = left; + val = 0; + if (left < 0x80000000L) + val = 0x80000000L - left; + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); + perf_enable(); + local_irq_restore(flags); +} + +static struct pmu fsl_emb_pmu = { + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + +/* + * Release the PMU if this is the last perf_event. + */ +static void hw_perf_event_destroy(struct perf_event *event) +{ + if (!atomic_add_unless(&num_events, -1, 1)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + release_pmc_hardware(); + mutex_unlock(&pmc_reserve_mutex); + } +} + +/* + * Translate a generic cache event_id config to a raw event_id code. + */ +static int hw_perf_cache_event(u64 config, u64 *eventp) +{ + unsigned long type, op, result; + int ev; + + if (!ppmu->cache_events) + return -EINVAL; + + /* unpack config */ + type = config & 0xff; + op = (config >> 8) & 0xff; + result = (config >> 16) & 0xff; + + if (type >= PERF_COUNT_HW_CACHE_MAX || + op >= PERF_COUNT_HW_CACHE_OP_MAX || + result >= PERF_COUNT_HW_CACHE_RESULT_MAX) + return -EINVAL; + + ev = (*ppmu->cache_events)[type][op][result]; + if (ev == 0) + return -EOPNOTSUPP; + if (ev == -1) + return -EINVAL; + *eventp = ev; + return 0; +} + +const struct pmu *hw_perf_event_init(struct perf_event *event) +{ + u64 ev; + struct perf_event *events[MAX_HWEVENTS]; + int n; + int err; + int num_restricted; + int i; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + ev = event->attr.config; + if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) + return ERR_PTR(-EOPNOTSUPP); + ev = ppmu->generic_events[ev]; + break; + + case PERF_TYPE_HW_CACHE: + err = hw_perf_cache_event(event->attr.config, &ev); + if (err) + return ERR_PTR(err); + break; + + case PERF_TYPE_RAW: + ev = event->attr.config; + break; + + default: + return ERR_PTR(-EINVAL); + } + + event->hw.config = ppmu->xlate_event(ev); + if (!(event->hw.config & FSL_EMB_EVENT_VALID)) + return ERR_PTR(-EINVAL); + + /* + * If this is in a group, check if it can go on with all the + * other hardware events in the group. We assume the event + * hasn't been linked into its leader's sibling list at this point. + */ + n = 0; + if (event->group_leader != event) { + n = collect_events(event->group_leader, + ppmu->n_counter - 1, events); + if (n < 0) + return ERR_PTR(-EINVAL); + } + + if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { + num_restricted = 0; + for (i = 0; i < n; i++) { + if (events[i]->hw.config & FSL_EMB_EVENT_RESTRICTED) + num_restricted++; + } + + if (num_restricted >= ppmu->n_restricted) + return ERR_PTR(-EINVAL); + } + + event->hw.idx = -1; + + event->hw.config_base = PMLCA_CE | PMLCA_FCM1 | + (u32)((ev << 16) & PMLCA_EVENT_MASK); + + if (event->attr.exclude_user) + event->hw.config_base |= PMLCA_FCU; + if (event->attr.exclude_kernel) + event->hw.config_base |= PMLCA_FCS; + if (event->attr.exclude_idle) + return ERR_PTR(-ENOTSUPP); + + event->hw.last_period = event->hw.sample_period; + atomic64_set(&event->hw.period_left, event->hw.last_period); + + /* + * See if we need to reserve the PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * reserve_pmc_hardware or release_pmc_hardware. + */ + err = 0; + if (!atomic_inc_not_zero(&num_events)) { + mutex_lock(&pmc_reserve_mutex); + if (atomic_read(&num_events) == 0 && + reserve_pmc_hardware(perf_event_interrupt)) + err = -EBUSY; + else + atomic_inc(&num_events); + mutex_unlock(&pmc_reserve_mutex); + + mtpmr(PMRN_PMGC0, PMGC0_FAC); + isync(); + } + event->destroy = hw_perf_event_destroy; + + if (err) + return ERR_PTR(err); + return &fsl_emb_pmu; +} + +/* + * A counter has overflowed; update its count and record + * things if requested. Note that interrupts are hard-disabled + * here so there is no possibility of being interrupted. + */ +static void record_and_restart(struct perf_event *event, unsigned long val, + struct pt_regs *regs, int nmi) +{ + u64 period = event->hw.sample_period; + s64 prev, delta, left; + int record = 0; + + /* we don't have to worry about interrupts here */ + prev = atomic64_read(&event->hw.prev_count); + delta = (val - prev) & 0xfffffffful; + atomic64_add(delta, &event->count); + + /* + * See if the total period for this event has expired, + * and update for the next period. + */ + val = 0; + left = atomic64_read(&event->hw.period_left) - delta; + if (period) { + if (left <= 0) { + left += period; + if (left <= 0) + left = period; + record = 1; + } + if (left < 0x80000000LL) + val = 0x80000000LL - left; + } + + /* + * Finally record data if requested. + */ + if (record) { + struct perf_sample_data data = { + .period = event->hw.last_period, + }; + + if (perf_event_overflow(event, nmi, &data, regs)) { + /* + * Interrupts are coming too fast - throttle them + * by setting the event to 0, so it will be + * at least 2^30 cycles until the next interrupt + * (assuming each event counts at most 2 counts + * per cycle). + */ + val = 0; + left = ~0ULL >> 1; + } + } + + write_pmc(event->hw.idx, val); + atomic64_set(&event->hw.prev_count, val); + atomic64_set(&event->hw.period_left, left); + perf_event_update_userpage(event); +} + +static void perf_event_interrupt(struct pt_regs *regs) +{ + int i; + struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + unsigned long val; + int found = 0; + int nmi; + + nmi = perf_intr_is_nmi(regs); + if (nmi) + nmi_enter(); + else + irq_enter(); + + for (i = 0; i < ppmu->n_counter; ++i) { + event = cpuhw->event[i]; + + val = read_pmc(i); + if ((int)val < 0) { + if (event) { + /* event has overflowed */ + found = 1; + record_and_restart(event, val, regs, nmi); + } else { + /* + * Disabled counter is negative, + * reset it just in case. + */ + write_pmc(i, 0); + } + } + } + + /* PMM will keep counters frozen until we return from the interrupt. */ + mtmsr(mfmsr() | MSR_PMM); + mtpmr(PMRN_PMGC0, PMGC0_PMIE | PMGC0_FCECE); + isync(); + + if (nmi) + nmi_exit(); + else + irq_exit(); +} + +void hw_perf_event_setup(int cpu) +{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); + + memset(cpuhw, 0, sizeof(*cpuhw)); +} + +int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) +{ + if (ppmu) + return -EBUSY; /* something's already registered */ + + ppmu = pmu; + pr_info("%s performance monitor hardware support registered\n", + pmu->name); + + return 0; +} diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 43238b2054b6..05131d634e73 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -43,6 +43,7 @@ #include <asm/smp.h> #include <asm/system.h> #include <asm/mmu.h> +#include <asm/paca.h> #include <asm/pgtable.h> #include <asm/pci.h> #include <asm/iommu.h> @@ -721,6 +722,8 @@ void __init early_init_devtree(void *params) * FIXME .. and the initrd too? */ move_device_tree(); + allocate_pacas(); + DBG("Scanning CPUs ...\n"); /* Retreive CPU related informations from the flat tree diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d9b05866615f..ed2cfe17d25e 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -940,7 +940,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) { switch (slot) { case 1: - if (child->thread.iac1 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC1) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) { @@ -952,7 +952,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) child->thread.dbcr0 &= ~DBCR0_IAC1; break; case 2: - if (child->thread.iac2 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC2) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC12MODE) @@ -963,7 +963,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) break; #if CONFIG_PPC_ADV_DEBUG_IACS > 2 case 3: - if (child->thread.iac3 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC3) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) { @@ -975,7 +975,7 @@ static int del_instruction_bp(struct task_struct *child, int slot) child->thread.dbcr0 &= ~DBCR0_IAC3; break; case 4: - if (child->thread.iac4 == 0) + if ((child->thread.dbcr0 & DBCR0_IAC4) == 0) return -ENOENT; if (dbcr_iac_range(child) & DBCR_IAC34MODE) @@ -1054,7 +1054,7 @@ static int set_dac(struct task_struct *child, struct ppc_hw_breakpoint *bp_info) static int del_dac(struct task_struct *child, int slot) { if (slot == 1) { - if (child->thread.dac1 == 0) + if ((dbcr_dac(child) & (DBCR_DAC1R | DBCR_DAC1W)) == 0) return -ENOENT; child->thread.dac1 = 0; @@ -1070,7 +1070,7 @@ static int del_dac(struct task_struct *child, int slot) child->thread.dvc1 = 0; #endif } else if (slot == 2) { - if (child->thread.dac1 == 0) + if ((dbcr_dac(child) & (DBCR_DAC2R | DBCR_DAC2W)) == 0) return -ENOENT; #ifdef CONFIG_PPC_ADV_DEBUG_DAC_RANGE diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 03dd6a248198..48f0a008b20b 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -36,6 +36,7 @@ #include <linux/lmb.h> #include <linux/of_platform.h> #include <asm/io.h> +#include <asm/paca.h> #include <asm/prom.h> #include <asm/processor.h> #include <asm/vdso_datapage.h> @@ -493,6 +494,8 @@ void __init smp_setup_cpu_maps(void) * here will have to be reworked */ cpu_init_thread_core_maps(nthreads); + + free_unused_pacas(); } #endif /* CONFIG_SMP */ diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 6568406b2a30..63547394048c 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -144,9 +144,9 @@ early_param("smt-enabled", early_smt_enabled); #endif /* CONFIG_SMP */ /* Put the paca pointer into r13 and SPRG_PACA */ -void __init setup_paca(int cpu) +static void __init setup_paca(struct paca_struct *new_paca) { - local_paca = &paca[cpu]; + local_paca = new_paca; mtspr(SPRN_SPRG_PACA, local_paca); #ifdef CONFIG_PPC_BOOK3E mtspr(SPRN_SPRG_TLB_EXFRAME, local_paca->extlb); @@ -176,14 +176,12 @@ void __init early_setup(unsigned long dt_ptr) { /* -------- printk is _NOT_ safe to use here ! ------- */ - /* Fill in any unititialised pacas */ - initialise_pacas(); - /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); /* Assume we're on cpu 0 for now. Don't write to the paca yet! */ - setup_paca(0); + initialise_paca(&boot_paca, 0); + setup_paca(&boot_paca); /* Initialize lockdep early or else spinlocks will blow */ lockdep_init(); @@ -203,7 +201,7 @@ void __init early_setup(unsigned long dt_ptr) early_init_devtree(__va(dt_ptr)); /* Now we know the logical id of our boot cpu, setup the paca. */ - setup_paca(boot_cpuid); + setup_paca(&paca[boot_cpuid]); /* Fix up paca fields required for the boot cpu */ get_paca()->cpu_start = 1; diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c index 3370e62e43d4..f2496f2faecc 100644 --- a/arch/powerpc/kernel/syscalls.c +++ b/arch/powerpc/kernel/syscalls.c @@ -42,100 +42,6 @@ #include <asm/time.h> #include <asm/unistd.h> -/* - * sys_ipc() is the de-multiplexer for the SysV IPC calls.. - * - * This is really horribly ugly. - */ -int sys_ipc(uint call, int first, unsigned long second, long third, - void __user *ptr, long fifth) -{ - int version, ret; - - version = call >> 16; /* hack for backward compatibility */ - call &= 0xffff; - - ret = -ENOSYS; - switch (call) { - case SEMOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, NULL); - break; - case SEMTIMEDOP: - ret = sys_semtimedop(first, (struct sembuf __user *)ptr, - (unsigned)second, - (const struct timespec __user *) fifth); - break; - case SEMGET: - ret = sys_semget (first, (int)second, third); - break; - case SEMCTL: { - union semun fourth; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = get_user(fourth.__pad, (void __user * __user *)ptr))) - break; - ret = sys_semctl(first, (int)second, third, fourth); - break; - } - case MSGSND: - ret = sys_msgsnd(first, (struct msgbuf __user *)ptr, - (size_t)second, third); - break; - case MSGRCV: - switch (version) { - case 0: { - struct ipc_kludge tmp; - - ret = -EINVAL; - if (!ptr) - break; - if ((ret = copy_from_user(&tmp, - (struct ipc_kludge __user *) ptr, - sizeof (tmp)) ? -EFAULT : 0)) - break; - ret = sys_msgrcv(first, tmp.msgp, (size_t) second, - tmp.msgtyp, third); - break; - } - default: - ret = sys_msgrcv (first, (struct msgbuf __user *) ptr, - (size_t)second, fifth, third); - break; - } - break; - case MSGGET: - ret = sys_msgget((key_t)first, (int)second); - break; - case MSGCTL: - ret = sys_msgctl(first, (int)second, - (struct msqid_ds __user *)ptr); - break; - case SHMAT: { - ulong raddr; - ret = do_shmat(first, (char __user *)ptr, (int)second, &raddr); - if (ret) - break; - ret = put_user(raddr, (ulong __user *) third); - break; - } - case SHMDT: - ret = sys_shmdt((char __user *)ptr); - break; - case SHMGET: - ret = sys_shmget(first, (size_t)second, third); - break; - case SHMCTL: - ret = sys_shmctl(first, (int)second, - (struct shmid_ds __user *)ptr); - break; - } - - return ret; -} - static inline unsigned long do_mmap2(unsigned long addr, size_t len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long off, int shift) @@ -210,76 +116,6 @@ long ppc64_personality(unsigned long personality) } #endif -#ifdef CONFIG_PPC64 -#define OVERRIDE_MACHINE (personality(current->personality) == PER_LINUX32) -#else -#define OVERRIDE_MACHINE 0 -#endif - -static inline int override_machine(char __user *mach) -{ - if (OVERRIDE_MACHINE) { - /* change ppc64 to ppc */ - if (__put_user(0, mach+3) || __put_user(0, mach+4)) - return -EFAULT; - } - return 0; -} - -long ppc_newuname(struct new_utsname __user * name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_uname(struct old_utsname __user *name) -{ - int err = 0; - - down_read(&uts_sem); - if (copy_to_user(name, utsname(), sizeof(*name))) - err = -EFAULT; - up_read(&uts_sem); - if (!err) - err = override_machine(name->machine); - return err; -} - -int sys_olduname(struct oldold_utsname __user *name) -{ - int error; - - if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname))) - return -EFAULT; - - down_read(&uts_sem); - error = __copy_to_user(&name->sysname, &utsname()->sysname, - __OLD_UTS_LEN); - error |= __put_user(0, name->sysname + __OLD_UTS_LEN); - error |= __copy_to_user(&name->nodename, &utsname()->nodename, - __OLD_UTS_LEN); - error |= __put_user(0, name->nodename + __OLD_UTS_LEN); - error |= __copy_to_user(&name->release, &utsname()->release, - __OLD_UTS_LEN); - error |= __put_user(0, name->release + __OLD_UTS_LEN); - error |= __copy_to_user(&name->version, &utsname()->version, - __OLD_UTS_LEN); - error |= __put_user(0, name->version + __OLD_UTS_LEN); - error |= __copy_to_user(&name->machine, &utsname()->machine, - __OLD_UTS_LEN); - error |= override_machine(name->machine); - up_read(&uts_sem); - - return error? -EFAULT: 0; -} - long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low, u32 len_high, u32 len_low) { |