diff options
Diffstat (limited to 'arch/powerpc/platforms/cell')
28 files changed, 3916 insertions, 1077 deletions
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig index 3e430b489bb7..06a85b704331 100644 --- a/arch/powerpc/platforms/cell/Kconfig +++ b/arch/powerpc/platforms/cell/Kconfig @@ -20,4 +20,18 @@ config CBE_RAS bool "RAS features for bare metal Cell BE" default y +config CBE_THERM + tristate "CBE thermal support" + default m + depends on CBE_RAS + +config CBE_CPUFREQ + tristate "CBE frequency scaling" + depends on CBE_RAS && CPU_FREQ + default m + help + This adds the cpufreq driver for Cell BE processors. + For details, take a look at <file:Documentation/cpu-freq/>. + If you don't have such processor, say N + endmenu diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile index c89cdd67383b..f90e8337796c 100644 --- a/arch/powerpc/platforms/cell/Makefile +++ b/arch/powerpc/platforms/cell/Makefile @@ -1,7 +1,11 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \ - cbe_regs.o spider-pic.o pervasive.o + cbe_regs.o spider-pic.o \ + pervasive.o pmu.o io-workarounds.o obj-$(CONFIG_CBE_RAS) += ras.o +obj-$(CONFIG_CBE_THERM) += cbe_thermal.o +obj-$(CONFIG_CBE_CPUFREQ) += cbe_cpufreq.o + ifeq ($(CONFIG_SMP),y) obj-$(CONFIG_PPC_CELL_NATIVE) += smp.o endif @@ -11,5 +15,6 @@ spufs-modular-$(CONFIG_SPU_FS) += spu_syscalls.o spu-priv1-$(CONFIG_PPC_CELL_NATIVE) += spu_priv1_mmio.o obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ + spu_coredump.o \ $(spufs-modular-m) \ $(spu-priv1-y) spufs/ diff --git a/arch/powerpc/platforms/cell/cbe_cpufreq.c b/arch/powerpc/platforms/cell/cbe_cpufreq.c new file mode 100644 index 000000000000..a3850fd1e94c --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_cpufreq.c @@ -0,0 +1,248 @@ +/* + * cpufreq driver for the cell processor + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/cpufreq.h> +#include <linux/timer.h> + +#include <asm/hw_irq.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/prom.h> +#include <asm/time.h> + +#include "cbe_regs.h" + +static DEFINE_MUTEX(cbe_switch_mutex); + + +/* the CBE supports an 8 step frequency scaling */ +static struct cpufreq_frequency_table cbe_freqs[] = { + {1, 0}, + {2, 0}, + {3, 0}, + {4, 0}, + {5, 0}, + {6, 0}, + {8, 0}, + {10, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +/* to write to MIC register */ +static u64 MIC_Slow_Fast_Timer_table[] = { + [0 ... 7] = 0x007fc00000000000ull, +}; + +/* more values for the MIC */ +static u64 MIC_Slow_Next_Timer_table[] = { + 0x0000240000000000ull, + 0x0000268000000000ull, + 0x000029C000000000ull, + 0x00002D0000000000ull, + 0x0000300000000000ull, + 0x0000334000000000ull, + 0x000039C000000000ull, + 0x00003FC000000000ull, +}; + +/* + * hardware specific functions + */ + +static int get_pmode(int cpu) +{ + int ret; + struct cbe_pmd_regs __iomem *pmd_regs; + + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + ret = in_be64(&pmd_regs->pmsr) & 0x07; + + return ret; +} + +static int set_pmode(int cpu, unsigned int pmode) +{ + struct cbe_pmd_regs __iomem *pmd_regs; + struct cbe_mic_tm_regs __iomem *mic_tm_regs; + u64 flags; + u64 value; + + local_irq_save(flags); + + mic_tm_regs = cbe_get_cpu_mic_tm_regs(cpu); + pmd_regs = cbe_get_cpu_pmd_regs(cpu); + + pr_debug("pm register is mapped at %p\n", &pmd_regs->pmcr); + pr_debug("mic register is mapped at %p\n", &mic_tm_regs->slow_fast_timer_0); + + out_be64(&mic_tm_regs->slow_fast_timer_0, MIC_Slow_Fast_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_fast_timer_1, MIC_Slow_Fast_Timer_table[pmode]); + + out_be64(&mic_tm_regs->slow_next_timer_0, MIC_Slow_Next_Timer_table[pmode]); + out_be64(&mic_tm_regs->slow_next_timer_1, MIC_Slow_Next_Timer_table[pmode]); + + value = in_be64(&pmd_regs->pmcr); + /* set bits to zero */ + value &= 0xFFFFFFFFFFFFFFF8ull; + /* set bits to next pmode */ + value |= pmode; + + out_be64(&pmd_regs->pmcr, value); + + /* wait until new pmode appears in status register */ + value = in_be64(&pmd_regs->pmsr) & 0x07; + while(value != pmode) { + cpu_relax(); + value = in_be64(&pmd_regs->pmsr) & 0x07; + } + + local_irq_restore(flags); + + return 0; +} + +/* + * cpufreq functions + */ + +static int cbe_cpufreq_cpu_init (struct cpufreq_policy *policy) +{ + u32 *max_freq; + int i, cur_pmode; + struct device_node *cpu; + + cpu = of_get_cpu_node(policy->cpu, NULL); + + if(!cpu) + return -ENODEV; + + pr_debug("init cpufreq on CPU %d\n", policy->cpu); + + max_freq = (u32*) get_property(cpu, "clock-frequency", NULL); + + if(!max_freq) + return -EINVAL; + + // we need the freq in kHz + *max_freq /= 1000; + + pr_debug("max clock-frequency is at %u kHz\n", *max_freq); + pr_debug("initializing frequency table\n"); + + // initialize frequency table + for (i=0; cbe_freqs[i].frequency!=CPUFREQ_TABLE_END; i++) { + cbe_freqs[i].frequency = *max_freq / cbe_freqs[i].index; + pr_debug("%d: %d\n", i, cbe_freqs[i].frequency); + } + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + /* if DEBUG is enabled set_pmode() measures the correct latency of a transition */ + policy->cpuinfo.transition_latency = 25000; + + cur_pmode = get_pmode(policy->cpu); + pr_debug("current pmode is at %d\n",cur_pmode); + + policy->cur = cbe_freqs[cur_pmode].frequency; + +#ifdef CONFIG_SMP + policy->cpus = cpu_sibling_map[policy->cpu]; +#endif + + cpufreq_frequency_table_get_attr (cbe_freqs, policy->cpu); + + /* this ensures that policy->cpuinfo_min and policy->cpuinfo_max are set correctly */ + return cpufreq_frequency_table_cpuinfo (policy, cbe_freqs); +} + +static int cbe_cpufreq_cpu_exit(struct cpufreq_policy *policy) +{ + cpufreq_frequency_table_put_attr(policy->cpu); + return 0; +} + +static int cbe_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, cbe_freqs); +} + + +static int cbe_cpufreq_target(struct cpufreq_policy *policy, unsigned int target_freq, + unsigned int relation) +{ + int rc; + struct cpufreq_freqs freqs; + int cbe_pmode_new; + + cpufreq_frequency_table_target(policy, + cbe_freqs, + target_freq, + relation, + &cbe_pmode_new); + + freqs.old = policy->cur; + freqs.new = cbe_freqs[cbe_pmode_new].frequency; + freqs.cpu = policy->cpu; + + mutex_lock (&cbe_switch_mutex); + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + + pr_debug("setting frequency for cpu %d to %d kHz, 1/%d of max frequency\n", + policy->cpu, + cbe_freqs[cbe_pmode_new].frequency, + cbe_freqs[cbe_pmode_new].index); + + rc = set_pmode(policy->cpu, cbe_pmode_new); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + mutex_unlock(&cbe_switch_mutex); + + return rc; +} + +static struct cpufreq_driver cbe_cpufreq_driver = { + .verify = cbe_cpufreq_verify, + .target = cbe_cpufreq_target, + .init = cbe_cpufreq_cpu_init, + .exit = cbe_cpufreq_cpu_exit, + .name = "cbe-cpufreq", + .owner = THIS_MODULE, + .flags = CPUFREQ_CONST_LOOPS, +}; + +/* + * module init and destoy + */ + +static int __init cbe_cpufreq_init(void) +{ + return cpufreq_register_driver(&cbe_cpufreq_driver); +} + +static void __exit cbe_cpufreq_exit(void) +{ + cpufreq_unregister_driver(&cbe_cpufreq_driver); +} + +module_init(cbe_cpufreq_init); +module_exit(cbe_cpufreq_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index 2f194ba29899..9a0ee62691d5 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -8,6 +8,7 @@ #include <linux/percpu.h> #include <linux/types.h> +#include <linux/module.h> #include <asm/io.h> #include <asm/pgtable.h> @@ -16,8 +17,6 @@ #include "cbe_regs.h" -#define MAX_CBE 2 - /* * Current implementation uses "cpu" nodes. We build our own mapping * array of cpu numbers to cpu nodes locally for now to allow interrupt @@ -30,6 +29,8 @@ static struct cbe_regs_map struct device_node *cpu_node; struct cbe_pmd_regs __iomem *pmd_regs; struct cbe_iic_regs __iomem *iic_regs; + struct cbe_mic_tm_regs __iomem *mic_tm_regs; + struct cbe_pmd_shadow_regs pmd_shadow_regs; } cbe_regs_maps[MAX_CBE]; static int cbe_regs_map_count; @@ -42,6 +43,19 @@ static struct cbe_thread_map static struct cbe_regs_map *cbe_find_map(struct device_node *np) { int i; + struct device_node *tmp_np; + + if (strcasecmp(np->type, "spe") == 0) { + if (np->data == NULL) { + /* walk up path until cpu node was found */ + tmp_np = np->parent; + while (tmp_np != NULL && strcasecmp(tmp_np->type, "cpu") != 0) + tmp_np = tmp_np->parent; + + np->data = cbe_find_map(tmp_np); + } + return np->data; + } for (i = 0; i < cbe_regs_map_count; i++) if (cbe_regs_maps[i].cpu_node == np) @@ -56,6 +70,7 @@ struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np) return NULL; return map->pmd_regs; } +EXPORT_SYMBOL_GPL(cbe_get_pmd_regs); struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu) { @@ -64,7 +79,23 @@ struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu) return NULL; return map->pmd_regs; } +EXPORT_SYMBOL_GPL(cbe_get_cpu_pmd_regs); +struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return &map->pmd_shadow_regs; +} + +struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return &map->pmd_shadow_regs; +} struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np) { @@ -73,6 +104,7 @@ struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np) return NULL; return map->iic_regs; } + struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu) { struct cbe_regs_map *map = cbe_thread_map[cpu].regs; @@ -81,6 +113,36 @@ struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu) return map->iic_regs; } +struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np) +{ + struct cbe_regs_map *map = cbe_find_map(np); + if (map == NULL) + return NULL; + return map->mic_tm_regs; +} + +struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu) +{ + struct cbe_regs_map *map = cbe_thread_map[cpu].regs; + if (map == NULL) + return NULL; + return map->mic_tm_regs; +} +EXPORT_SYMBOL_GPL(cbe_get_cpu_mic_tm_regs); + +/* FIXME + * This is little more than a stub at the moment. It should be + * fleshed out so that it works for both SMT and non-SMT, no + * matter if the passed cpu is odd or even. + * For SMT enabled, returns 0 for even-numbered cpu; otherwise 1. + * For SMT disabled, returns 0 for all cpus. + */ +u32 cbe_get_hw_thread_id(int cpu) +{ + return (cpu & 1); +} +EXPORT_SYMBOL_GPL(cbe_get_hw_thread_id); + void __init cbe_regs_init(void) { int i; @@ -119,6 +181,11 @@ void __init cbe_regs_init(void) prop = get_property(cpu, "iic", NULL); if (prop != NULL) map->iic_regs = ioremap(prop->address, prop->len); + + prop = (struct address_prop *)get_property(cpu, "mic-tm", + NULL); + if (prop != NULL) + map->mic_tm_regs = ioremap(prop->address, prop->len); } } diff --git a/arch/powerpc/platforms/cell/cbe_regs.h b/arch/powerpc/platforms/cell/cbe_regs.h index e76e4a6af5bc..440a7ecc66ea 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.h +++ b/arch/powerpc/platforms/cell/cbe_regs.h @@ -4,12 +4,19 @@ * This file is intended to hold the various register definitions for CBE * on-chip system devices (memory controller, IO controller, etc...) * + * (C) Copyright IBM Corporation 2001,2006 + * + * Authors: Maximino Aguilar (maguilar@us.ibm.com) + * David J. Erb (djerb@us.ibm.com) + * * (c) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org>, IBM Corp. */ #ifndef CBE_REGS_H #define CBE_REGS_H +#include <asm/cell-pmu.h> + /* * * Some HID register definitions @@ -22,6 +29,7 @@ #define HID0_CBE_THERM_INT_EN 0x0000000400000000ul #define HID0_CBE_SYSERR_INT_EN 0x0000000200000000ul +#define MAX_CBE 2 /* * @@ -29,51 +37,124 @@ * */ +union spe_reg { + u64 val; + u8 spe[8]; +}; + +union ppe_spe_reg { + u64 val; + struct { + u32 ppe; + u32 spe; + }; +}; + + struct cbe_pmd_regs { - u8 pad_0x0000_0x0800[0x0800 - 0x0000]; /* 0x0000 */ + /* Debug Bus Control */ + u64 pad_0x0000; /* 0x0000 */ + + u64 group_control; /* 0x0008 */ + + u8 pad_0x0010_0x00a8 [0x00a8 - 0x0010]; /* 0x0010 */ + + u64 debug_bus_control; /* 0x00a8 */ + + u8 pad_0x00b0_0x0100 [0x0100 - 0x00b0]; /* 0x00b0 */ + + u64 trace_aux_data; /* 0x0100 */ + u64 trace_buffer_0_63; /* 0x0108 */ + u64 trace_buffer_64_127; /* 0x0110 */ + u64 trace_address; /* 0x0118 */ + u64 ext_tr_timer; /* 0x0120 */ + + u8 pad_0x0128_0x0400 [0x0400 - 0x0128]; /* 0x0128 */ + + /* Performance Monitor */ + u64 pm_status; /* 0x0400 */ + u64 pm_control; /* 0x0408 */ + u64 pm_interval; /* 0x0410 */ + u64 pm_ctr[4]; /* 0x0418 */ + u64 pm_start_stop; /* 0x0438 */ + u64 pm07_control[8]; /* 0x0440 */ + + u8 pad_0x0480_0x0800 [0x0800 - 0x0480]; /* 0x0480 */ /* Thermal Sensor Registers */ - u64 ts_ctsr1; /* 0x0800 */ - u64 ts_ctsr2; /* 0x0808 */ - u64 ts_mtsr1; /* 0x0810 */ - u64 ts_mtsr2; /* 0x0818 */ - u64 ts_itr1; /* 0x0820 */ - u64 ts_itr2; /* 0x0828 */ - u64 ts_gitr; /* 0x0830 */ - u64 ts_isr; /* 0x0838 */ - u64 ts_imr; /* 0x0840 */ - u64 tm_cr1; /* 0x0848 */ - u64 tm_cr2; /* 0x0850 */ - u64 tm_simr; /* 0x0858 */ - u64 tm_tpr; /* 0x0860 */ - u64 tm_str1; /* 0x0868 */ - u64 tm_str2; /* 0x0870 */ - u64 tm_tsr; /* 0x0878 */ + union spe_reg ts_ctsr1; /* 0x0800 */ + u64 ts_ctsr2; /* 0x0808 */ + union spe_reg ts_mtsr1; /* 0x0810 */ + u64 ts_mtsr2; /* 0x0818 */ + union spe_reg ts_itr1; /* 0x0820 */ + u64 ts_itr2; /* 0x0828 */ + u64 ts_gitr; /* 0x0830 */ + u64 ts_isr; /* 0x0838 */ + u64 ts_imr; /* 0x0840 */ + union spe_reg tm_cr1; /* 0x0848 */ + u64 tm_cr2; /* 0x0850 */ + u64 tm_simr; /* 0x0858 */ + union ppe_spe_reg tm_tpr; /* 0x0860 */ + union spe_reg tm_str1; /* 0x0868 */ + u64 tm_str2; /* 0x0870 */ + union ppe_spe_reg tm_tsr; /* 0x0878 */ /* Power Management */ - u64 pm_control; /* 0x0880 */ -#define CBE_PMD_PAUSE_ZERO_CONTROL 0x10000 - u64 pm_status; /* 0x0888 */ + u64 pmcr; /* 0x0880 */ +#define CBE_PMD_PAUSE_ZERO_CONTROL 0x10000 + u64 pmsr; /* 0x0888 */ /* Time Base Register */ - u64 tbr; /* 0x0890 */ + u64 tbr; /* 0x0890 */ - u8 pad_0x0898_0x0c00 [0x0c00 - 0x0898]; /* 0x0898 */ + u8 pad_0x0898_0x0c00 [0x0c00 - 0x0898]; /* 0x0898 */ /* Fault Isolation Registers */ - u64 checkstop_fir; /* 0x0c00 */ - u64 recoverable_fir; - u64 spec_att_mchk_fir; - u64 fir_mode_reg; - u64 fir_enable_mask; + u64 checkstop_fir; /* 0x0c00 */ + u64 recoverable_fir; /* 0x0c08 */ + u64 spec_att_mchk_fir; /* 0x0c10 */ + u64 fir_mode_reg; /* 0x0c18 */ + u64 fir_enable_mask; /* 0x0c20 */ - u8 pad_0x0c28_0x1000 [0x1000 - 0x0c28]; /* 0x0c28 */ + u8 pad_0x0c28_0x1000 [0x1000 - 0x0c28]; /* 0x0c28 */ }; extern struct cbe_pmd_regs __iomem *cbe_get_pmd_regs(struct device_node *np); extern struct cbe_pmd_regs __iomem *cbe_get_cpu_pmd_regs(int cpu); /* + * PMU shadow registers + * + * Many of the registers in the performance monitoring unit are write-only, + * so we need to save a copy of what we write to those registers. + * + * The actual data counters are read/write. However, writing to the counters + * only takes effect if the PMU is enabled. Otherwise the value is stored in + * a hardware latch until the next time the PMU is enabled. So we save a copy + * of the counter values if we need to read them back while the PMU is + * disabled. The counter_value_in_latch field is a bitmap indicating which + * counters currently have a value waiting to be written. + */ + +struct cbe_pmd_shadow_regs { + u32 group_control; + u32 debug_bus_control; + u32 trace_address; + u32 ext_tr_timer; + u32 pm_status; + u32 pm_control; + u32 pm_interval; + u32 pm_start_stop; + u32 pm07_control[NR_CTRS]; + + u32 pm_ctr[NR_PHYS_CTRS]; + u32 counter_value_in_latch; +}; + +extern struct cbe_pmd_shadow_regs *cbe_get_pmd_shadow_regs(struct device_node *np); +extern struct cbe_pmd_shadow_regs *cbe_get_cpu_pmd_shadow_regs(int cpu); + +/* * * IIC unit register definitions * @@ -102,18 +183,28 @@ struct cbe_iic_regs { /* IIC interrupt registers */ struct cbe_iic_thread_regs thread[2]; /* 0x0400 */ - u64 iic_ir; /* 0x0440 */ - u64 iic_is; /* 0x0448 */ + + u64 iic_ir; /* 0x0440 */ +#define CBE_IIC_IR_PRIO(x) (((x) & 0xf) << 12) +#define CBE_IIC_IR_DEST_NODE(x) (((x) & 0xf) << 4) +#define CBE_IIC_IR_DEST_UNIT(x) ((x) & 0xf) +#define CBE_IIC_IR_IOC_0 0x0 +#define CBE_IIC_IR_IOC_1S 0xb +#define CBE_IIC_IR_PT_0 0xe +#define CBE_IIC_IR_PT_1 0xf + + u64 iic_is; /* 0x0448 */ +#define CBE_IIC_IS_PMI 0x2 u8 pad_0x0450_0x0500[0x0500 - 0x0450]; /* 0x0450 */ /* IOC FIR */ u64 ioc_fir_reset; /* 0x0500 */ - u64 ioc_fir_set; - u64 ioc_checkstop_enable; - u64 ioc_fir_error_mask; - u64 ioc_syserr_enable; - u64 ioc_fir; + u64 ioc_fir_set; /* 0x0508 */ + u64 ioc_checkstop_enable; /* 0x0510 */ + u64 ioc_fir_error_mask; /* 0x0518 */ + u64 ioc_syserr_enable; /* 0x0520 */ + u64 ioc_fir; /* 0x0528 */ u8 pad_0x0530_0x1000[0x1000 - 0x0530]; /* 0x0530 */ }; @@ -122,6 +213,48 @@ extern struct cbe_iic_regs __iomem *cbe_get_iic_regs(struct device_node *np); extern struct cbe_iic_regs __iomem *cbe_get_cpu_iic_regs(int cpu); +struct cbe_mic_tm_regs { + u8 pad_0x0000_0x0040[0x0040 - 0x0000]; /* 0x0000 */ + + u64 mic_ctl_cnfg2; /* 0x0040 */ +#define CBE_MIC_ENABLE_AUX_TRC 0x8000000000000000LL +#define CBE_MIC_DISABLE_PWR_SAV_2 0x0200000000000000LL +#define CBE_MIC_DISABLE_AUX_TRC_WRAP 0x0100000000000000LL +#define CBE_MIC_ENABLE_AUX_TRC_INT 0x0080000000000000LL + + u64 pad_0x0048; /* 0x0048 */ + + u64 mic_aux_trc_base; /* 0x0050 */ + u64 mic_aux_trc_max_addr; /* 0x0058 */ + u64 mic_aux_trc_cur_addr; /* 0x0060 */ + u64 mic_aux_trc_grf_addr; /* 0x0068 */ + u64 mic_aux_trc_grf_data; /* 0x0070 */ + + u64 pad_0x0078; /* 0x0078 */ + + u64 mic_ctl_cnfg_0; /* 0x0080 */ +#define CBE_MIC_DISABLE_PWR_SAV_0 0x8000000000000000LL + + u64 pad_0x0088; /* 0x0088 */ + + u64 slow_fast_timer_0; /* 0x0090 */ + u64 slow_next_timer_0; /* 0x0098 */ + + u8 pad_0x00a0_0x01c0[0x01c0 - 0x0a0]; /* 0x00a0 */ + + u64 mic_ctl_cnfg_1; /* 0x01c0 */ +#define CBE_MIC_DISABLE_PWR_SAV_1 0x8000000000000000LL + u64 pad_0x01c8; /* 0x01c8 */ + + u64 slow_fast_timer_1; /* 0x01d0 */ + u64 slow_next_timer_1; /* 0x01d8 */ + + u8 pad_0x01e0_0x1000[0x1000 - 0x01e0]; /* 0x01e0 */ +}; + +extern struct cbe_mic_tm_regs __iomem *cbe_get_mic_tm_regs(struct device_node *np); +extern struct cbe_mic_tm_regs __iomem *cbe_get_cpu_mic_tm_regs(int cpu); + /* Init this module early */ extern void cbe_regs_init(void); diff --git a/arch/powerpc/platforms/cell/cbe_thermal.c b/arch/powerpc/platforms/cell/cbe_thermal.c new file mode 100644 index 000000000000..616a0a3fd0e2 --- /dev/null +++ b/arch/powerpc/platforms/cell/cbe_thermal.c @@ -0,0 +1,226 @@ +/* + * thermal support for the cell processor + * + * (C) Copyright IBM Deutschland Entwicklung GmbH 2005 + * + * Author: Christian Krafft <krafft@de.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/module.h> +#include <linux/sysdev.h> +#include <linux/kernel.h> +#include <linux/cpu.h> +#include <asm/spu.h> +#include <asm/io.h> +#include <asm/prom.h> + +#include "cbe_regs.h" +#include "spu_priv1_mmio.h" + +static struct cbe_pmd_regs __iomem *get_pmd_regs(struct sys_device *sysdev) +{ + struct spu *spu; + + spu = container_of(sysdev, struct spu, sysdev); + + return cbe_get_pmd_regs(spu_devnode(spu)); +} + +/* returns the value for a given spu in a given register */ +static u8 spu_read_register_value(struct sys_device *sysdev, union spe_reg __iomem *reg) +{ + unsigned int *id; + union spe_reg value; + struct spu *spu; + + /* getting the id from the reg attribute will not work on future device-tree layouts + * in future we should store the id to the spu struct and use it here */ + spu = container_of(sysdev, struct spu, sysdev); + id = (unsigned int *)get_property(spu_devnode(spu), "reg", NULL); + value.val = in_be64(®->val); + + return value.spe[*id]; +} + +static ssize_t spu_show_temp(struct sys_device *sysdev, char *buf) +{ + int value; + struct cbe_pmd_regs __iomem *pmd_regs; + + pmd_regs = get_pmd_regs(sysdev); + + value = spu_read_register_value(sysdev, &pmd_regs->ts_ctsr1); + /* clear all other bits */ + value &= 0x3F; + /* temp is stored in steps of 2 degrees */ + value *= 2; + /* base temp is 65 degrees */ + value += 65; + + return sprintf(buf, "%d\n", (int) value); +} + +static ssize_t ppe_show_temp(struct sys_device *sysdev, char *buf, int pos) +{ + struct cbe_pmd_regs __iomem *pmd_regs; + u64 value; + + pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); + value = in_be64(&pmd_regs->ts_ctsr2); + + /* access the corresponding byte */ + value >>= pos; + /* clear all other bits */ + value &= 0x3F; + /* temp is stored in steps of 2 degrees */ + value *= 2; + /* base temp is 65 degrees */ + value += 65; + + return sprintf(buf, "%d\n", (int) value); +} + + +/* shows the temperature of the DTS on the PPE, + * located near the linear thermal sensor */ +static ssize_t ppe_show_temp0(struct sys_device *sysdev, char *buf) +{ + return ppe_show_temp(sysdev, buf, 32); +} + +/* shows the temperature of the second DTS on the PPE */ +static ssize_t ppe_show_temp1(struct sys_device *sysdev, char *buf) +{ + return ppe_show_temp(sysdev, buf, 0); +} + +static struct sysdev_attribute attr_spu_temperature = { + .attr = {.name = "temperature", .mode = 0400 }, + .show = spu_show_temp, +}; + +static struct attribute *spu_attributes[] = { + &attr_spu_temperature.attr, +}; + +static struct attribute_group spu_attribute_group = { + .name = "thermal", + .attrs = spu_attributes, +}; + +static struct sysdev_attribute attr_ppe_temperature0 = { + .attr = {.name = "temperature0", .mode = 0400 }, + .show = ppe_show_temp0, +}; + +static struct sysdev_attribute attr_ppe_temperature1 = { + .attr = {.name = "temperature1", .mode = 0400 }, + .show = ppe_show_temp1, +}; + +static struct attribute *ppe_attributes[] = { + &attr_ppe_temperature0.attr, + &attr_ppe_temperature1.attr, +}; + +static struct attribute_group ppe_attribute_group = { + .name = "thermal", + .attrs = ppe_attributes, +}; + +/* + * initialize throttling with default values + */ +static void __init init_default_values(void) +{ + int cpu; + struct cbe_pmd_regs __iomem *pmd_regs; + struct sys_device *sysdev; + union ppe_spe_reg tpr; + union spe_reg str1; + u64 str2; + union spe_reg cr1; + u64 cr2; + + /* TPR defaults */ + /* ppe + * 1F - no full stop + * 08 - dynamic throttling starts if over 80 degrees + * 03 - dynamic throttling ceases if below 70 degrees */ + tpr.ppe = 0x1F0803; + /* spe + * 10 - full stopped when over 96 degrees + * 08 - dynamic throttling starts if over 80 degrees + * 03 - dynamic throttling ceases if below 70 degrees + */ + tpr.spe = 0x100803; + + /* STR defaults */ + /* str1 + * 10 - stop 16 of 32 cycles + */ + str1.val = 0x1010101010101010ull; + /* str2 + * 10 - stop 16 of 32 cycles + */ + str2 = 0x10; + + /* CR defaults */ + /* cr1 + * 4 - normal operation + */ + cr1.val = 0x0404040404040404ull; + /* cr2 + * 4 - normal operation + */ + cr2 = 0x04; + + for_each_possible_cpu (cpu) { + pr_debug("processing cpu %d\n", cpu); + sysdev = get_cpu_sysdev(cpu); + pmd_regs = cbe_get_cpu_pmd_regs(sysdev->id); + + out_be64(&pmd_regs->tm_str2, str2); + out_be64(&pmd_regs->tm_str1.val, str1.val); + out_be64(&pmd_regs->tm_tpr.val, tpr.val); + out_be64(&pmd_regs->tm_cr1.val, cr1.val); + out_be64(&pmd_regs->tm_cr2, cr2); + } +} + + +static int __init thermal_init(void) +{ + init_default_values(); + + spu_add_sysdev_attr_group(&spu_attribute_group); + cpu_add_sysdev_attr_group(&ppe_attribute_group); + + return 0; +} +module_init(thermal_init); + +static void __exit thermal_exit(void) +{ + spu_remove_sysdev_attr_group(&spu_attribute_group); + cpu_remove_sysdev_attr_group(&ppe_attribute_group); +} +module_exit(thermal_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Christian Krafft <krafft@de.ibm.com>"); + diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c index a914c12b4060..6666d037eb44 100644 --- a/arch/powerpc/platforms/cell/interrupt.c +++ b/arch/powerpc/platforms/cell/interrupt.c @@ -396,3 +396,19 @@ void __init iic_init_IRQ(void) /* Enable on current CPU */ iic_setup_cpu(); } + +void iic_set_interrupt_routing(int cpu, int thread, int priority) +{ + struct cbe_iic_regs __iomem *iic_regs = cbe_get_cpu_iic_regs(cpu); + u64 iic_ir = 0; + int node = cpu >> 1; + + /* Set which node and thread will handle the next interrupt */ + iic_ir |= CBE_IIC_IR_PRIO(priority) | + CBE_IIC_IR_DEST_NODE(node); + if (thread == 0) + iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_0); + else + iic_ir |= CBE_IIC_IR_DEST_UNIT(CBE_IIC_IR_PT_1); + out_be64(&iic_regs->iic_ir, iic_ir); +} diff --git a/arch/powerpc/platforms/cell/interrupt.h b/arch/powerpc/platforms/cell/interrupt.h index 9ba1d3c17b4b..942dc39d6045 100644 --- a/arch/powerpc/platforms/cell/interrupt.h +++ b/arch/powerpc/platforms/cell/interrupt.h @@ -83,5 +83,7 @@ extern u8 iic_get_target_id(int cpu); extern void spider_init_IRQ(void); +extern void iic_set_interrupt_routing(int cpu, int thread, int priority); + #endif #endif /* ASM_CELL_PIC_H */ diff --git a/arch/powerpc/platforms/cell/io-workarounds.c b/arch/powerpc/platforms/cell/io-workarounds.c new file mode 100644 index 000000000000..580d42595912 --- /dev/null +++ b/arch/powerpc/platforms/cell/io-workarounds.c @@ -0,0 +1,346 @@ +/* + * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> + * IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#undef DEBUG + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/pci.h> +#include <asm/io.h> +#include <asm/machdep.h> +#include <asm/pci-bridge.h> +#include <asm/ppc-pci.h> + + +#define SPIDER_PCI_REG_BASE 0xd000 +#define SPIDER_PCI_VCI_CNTL_STAT 0x0110 +#define SPIDER_PCI_DUMMY_READ 0x0810 +#define SPIDER_PCI_DUMMY_READ_BASE 0x0814 + +/* Undefine that to re-enable bogus prefetch + * + * Without that workaround, the chip will do bogus prefetch past + * page boundary from system memory. This setting will disable that, + * though the documentation is unclear as to the consequences of doing + * so, either purely performances, or possible misbehaviour... It's not + * clear wether the chip can handle unaligned accesses at all without + * prefetching enabled. + * + * For now, things appear to be behaving properly with that prefetching + * disabled and IDE, possibly because IDE isn't doing any unaligned + * access. + */ +#define SPIDER_DISABLE_PREFETCH + +#define MAX_SPIDERS 2 + +static struct spider_pci_bus { + void __iomem *regs; + unsigned long mmio_start; + unsigned long mmio_end; + unsigned long pio_vstart; + unsigned long pio_vend; +} spider_pci_busses[MAX_SPIDERS]; +static int spider_pci_count; + +static struct spider_pci_bus *spider_pci_find(unsigned long vaddr, + unsigned long paddr) +{ + int i; + + for (i = 0; i < spider_pci_count; i++) { + struct spider_pci_bus *bus = &spider_pci_busses[i]; + if (paddr && paddr >= bus->mmio_start && paddr < bus->mmio_end) + return bus; + if (vaddr && vaddr >= bus->pio_vstart && vaddr < bus->pio_vend) + return bus; + } + return NULL; +} + +static void spider_io_flush(const volatile void __iomem *addr) +{ + struct spider_pci_bus *bus; + int token; + + /* Get platform token (set by ioremap) from address */ + token = PCI_GET_ADDR_TOKEN(addr); + + /* Fast path if we have a non-0 token, it indicates which bus we + * are on. + * + * If the token is 0, that means either the the ioremap was done + * before we initialized this layer, or it's a PIO operation. We + * fallback to a low path in this case. Hopefully, internal devices + * which are ioremap'ed early should use in_XX/out_XX functions + * instead of the PCI ones and thus not suffer from the slowdown. + * + * Also note that currently, the workaround will not work for areas + * that are not mapped with PTEs (bolted in the hash table). This + * is the case for ioremaps done very early at boot (before + * mem_init_done) and includes the mapping of the ISA IO space. + * + * Fortunately, none of the affected devices is expected to do DMA + * and thus there should be no problem in practice. + * + * In order to improve performances, we only do the PTE search for + * addresses falling in the PHB IO space area. That means it will + * not work for hotplug'ed PHBs but those don't exist with Spider. + */ + if (token && token <= spider_pci_count) + bus = &spider_pci_busses[token - 1]; + else { + unsigned long vaddr, paddr; + pte_t *ptep; + + /* Fixup physical address */ + vaddr = (unsigned long)PCI_FIX_ADDR(addr); + + /* Check if it's in allowed range for PIO */ + if (vaddr < PHBS_IO_BASE || vaddr >= IMALLOC_BASE) + return; + + /* Try to find a PTE. If not, clear the paddr, we'll do + * a vaddr only lookup (PIO only) + */ + ptep = find_linux_pte(init_mm.pgd, vaddr); + if (ptep == NULL) + paddr = 0; + else + paddr = pte_pfn(*ptep) << PAGE_SHIFT; + + bus = spider_pci_find(vaddr, paddr); + if (bus == NULL) + return; + } + + /* Now do the workaround + */ + (void)in_be32(bus->regs + SPIDER_PCI_DUMMY_READ); +} + +static u8 spider_readb(const volatile void __iomem *addr) +{ + u8 val = __do_readb(addr); + spider_io_flush(addr); + return val; +} + +static u16 spider_readw(const volatile void __iomem *addr) +{ + u16 val = __do_readw(addr); + spider_io_flush(addr); + return val; +} + +static u32 spider_readl(const volatile void __iomem *addr) +{ + u32 val = __do_readl(addr); + spider_io_flush(addr); + return val; +} + +static u64 spider_readq(const volatile void __iomem *addr) +{ + u64 val = __do_readq(addr); + spider_io_flush(addr); + return val; +} + +static u16 spider_readw_be(const volatile void __iomem *addr) +{ + u16 val = __do_readw_be(addr); + spider_io_flush(addr); + return val; +} + +static u32 spider_readl_be(const volatile void __iomem *addr) +{ + u32 val = __do_readl_be(addr); + spider_io_flush(addr); + return val; +} + +static u64 spider_readq_be(const volatile void __iomem *addr) +{ + u64 val = __do_readq_be(addr); + spider_io_flush(addr); + return val; +} + +static void spider_readsb(const volatile void __iomem *addr, void *buf, + unsigned long count) +{ + __do_readsb(addr, buf, count); + spider_io_flush(addr); +} + +static void spider_readsw(const volatile void __iomem *addr, void *buf, + unsigned long count) +{ + __do_readsw(addr, buf, count); + spider_io_flush(addr); +} + +static void spider_readsl(const volatile void __iomem *addr, void *buf, + unsigned long count) +{ + __do_readsl(addr, buf, count); + spider_io_flush(addr); +} + +static void spider_memcpy_fromio(void *dest, const volatile void __iomem *src, + unsigned long n) +{ + __do_memcpy_fromio(dest, src, n); + spider_io_flush(src); +} + + +static void __iomem * spider_ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + struct spider_pci_bus *bus; + void __iomem *res = __ioremap(addr, size, flags); + int busno; + + pr_debug("spider_ioremap(0x%lx, 0x%lx, 0x%lx) -> 0x%p\n", + addr, size, flags, res); + + bus = spider_pci_find(0, addr); + if (bus != NULL) { + busno = bus - spider_pci_busses; + pr_debug(" found bus %d, setting token\n", busno); + PCI_SET_ADDR_TOKEN(res, busno + 1); + } + pr_debug(" result=0x%p\n", res); + + return res; +} + +static void __init spider_pci_setup_chip(struct spider_pci_bus *bus) +{ +#ifdef SPIDER_DISABLE_PREFETCH + u32 val = in_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT); + pr_debug(" PVCI_Control_Status was 0x%08x\n", val); + out_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8); +#endif + + /* Configure the dummy address for the workaround */ + out_be32(bus->regs + SPIDER_PCI_DUMMY_READ_BASE, 0x80000000); +} + +static void __init spider_pci_add_one(struct pci_controller *phb) +{ + struct spider_pci_bus *bus = &spider_pci_busses[spider_pci_count]; + struct device_node *np = phb->arch_data; + struct resource rsrc; + void __iomem *regs; + + if (spider_pci_count >= MAX_SPIDERS) { + printk(KERN_ERR "Too many spider bridges, workarounds" + " disabled for %s\n", np->full_name); + return; + } + + /* Get the registers for the beast */ + if (of_address_to_resource(np, 0, &rsrc)) { + printk(KERN_ERR "Failed to get registers for spider %s" + " workarounds disabled\n", np->full_name); + return; + } + + /* Mask out some useless bits in there to get to the base of the + * spider chip + */ + rsrc.start &= ~0xfffffffful; + + /* Map them */ + regs = ioremap(rsrc.start + SPIDER_PCI_REG_BASE, 0x1000); + if (regs == NULL) { + printk(KERN_ERR "Failed to map registers for spider %s" + " workarounds disabled\n", np->full_name); + return; + } + + spider_pci_count++; + + /* We assume spiders only have one MMIO resource */ + bus->mmio_start = phb->mem_resources[0].start; + bus->mmio_end = phb->mem_resources[0].end + 1; + + bus->pio_vstart = (unsigned long)phb->io_base_virt; + bus->pio_vend = bus->pio_vstart + phb->pci_io_size; + + bus->regs = regs; + + printk(KERN_INFO "PCI: Spider MMIO workaround for %s\n",np->full_name); + + pr_debug(" mmio (P) = 0x%016lx..0x%016lx\n", + bus->mmio_start, bus->mmio_end); + pr_debug(" pio (V) = 0x%016lx..0x%016lx\n", + bus->pio_vstart, bus->pio_vend); + pr_debug(" regs (P) = 0x%016lx (V) = 0x%p\n", + rsrc.start + SPIDER_PCI_REG_BASE, bus->regs); + + spider_pci_setup_chip(bus); +} + +static struct ppc_pci_io __initdata spider_pci_io = { + .readb = spider_readb, + .readw = spider_readw, + .readl = spider_readl, + .readq = spider_readq, + .readw_be = spider_readw_be, + .readl_be = spider_readl_be, + .readq_be = spider_readq_be, + .readsb = spider_readsb, + .readsw = spider_readsw, + .readsl = spider_readsl, + .memcpy_fromio = spider_memcpy_fromio, +}; + +static int __init spider_pci_workaround_init(void) +{ + struct pci_controller *phb; + + if (!machine_is(cell)) + return 0; + + /* Find spider bridges. We assume they have been all probed + * in setup_arch(). If that was to change, we would need to + * update this code to cope with dynamically added busses + */ + list_for_each_entry(phb, &hose_list, list_node) { + struct device_node *np = phb->arch_data; + const char *model = get_property(np, "model", NULL); + + /* If no model property or name isn't exactly "pci", skip */ + if (model == NULL || strcmp(np->name, "pci")) + continue; + /* If model is not "Spider", skip */ + if (strcmp(model, "Spider")) + continue; + spider_pci_add_one(phb); + } + + /* No Spider PCI found, exit */ + if (spider_pci_count == 0) + return 0; + + /* Setup IO callbacks. We only setup MMIO reads. PIO reads will + * fallback to MMIO reads (though without a token, thus slower) + */ + ppc_pci_io = spider_pci_io; + + /* Setup ioremap callback */ + ppc_md.ioremap = spider_ioremap; + + return 0; +} +arch_initcall(spider_pci_workaround_init); diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index aca4c3db0dde..b43466ba8096 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -1,514 +1,747 @@ /* * IOMMU implementation for Cell Broadband Processor Architecture - * We just establish a linear mapping at boot by setting all the - * IOPT cache entries in the CPU. - * The mapping functions should be identical to pci_direct_iommu, - * except for the handling of the high order bit that is required - * by the Spider bridge. These should be split into a separate - * file at the point where we get a different bridge chip. * - * Copyright (C) 2005 IBM Deutschland Entwicklung GmbH, - * Arnd Bergmann <arndb@de.ibm.com> + * (C) Copyright IBM Corporation 2006 * - * Based on linear mapping - * Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * Author: Jeremy Kerr <jk@ozlabs.org> * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #undef DEBUG #include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/delay.h> -#include <linux/string.h> #include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/mm.h> -#include <linux/dma-mapping.h> -#include <linux/kernel.h> -#include <linux/compiler.h> +#include <linux/interrupt.h> +#include <linux/notifier.h> -#include <asm/sections.h> -#include <asm/iommu.h> -#include <asm/io.h> #include <asm/prom.h> -#include <asm/pci-bridge.h> +#include <asm/iommu.h> #include <asm/machdep.h> -#include <asm/pmac_feature.h> -#include <asm/abs_addr.h> -#include <asm/system.h> -#include <asm/ppc-pci.h> +#include <asm/pci-bridge.h> #include <asm/udbg.h> +#include <asm/of_platform.h> +#include <asm/lmb.h> -#include "iommu.h" +#include "cbe_regs.h" +#include "interrupt.h" -static inline unsigned long -get_iopt_entry(unsigned long real_address, unsigned long ioid, - unsigned long prot) -{ - return (prot & IOPT_PROT_MASK) - | (IOPT_COHERENT) - | (IOPT_ORDER_VC) - | (real_address & IOPT_RPN_MASK) - | (ioid & IOPT_IOID_MASK); -} +/* Define CELL_IOMMU_REAL_UNMAP to actually unmap non-used pages + * instead of leaving them mapped to some dummy page. This can be + * enabled once the appropriate workarounds for spider bugs have + * been enabled + */ +#define CELL_IOMMU_REAL_UNMAP -typedef struct { - unsigned long val; -} ioste; +/* Define CELL_IOMMU_STRICT_PROTECTION to enforce protection of + * IO PTEs based on the transfer direction. That can be enabled + * once spider-net has been fixed to pass the correct direction + * to the DMA mapping functions + */ +#define CELL_IOMMU_STRICT_PROTECTION + + +#define NR_IOMMUS 2 + +/* IOC mmap registers */ +#define IOC_Reg_Size 0x2000 + +#define IOC_IOPT_CacheInvd 0x908 +#define IOC_IOPT_CacheInvd_NE_Mask 0xffe0000000000000ul +#define IOC_IOPT_CacheInvd_IOPTE_Mask 0x000003fffffffff8ul +#define IOC_IOPT_CacheInvd_Busy 0x0000000000000001ul + +#define IOC_IOST_Origin 0x918 +#define IOC_IOST_Origin_E 0x8000000000000000ul +#define IOC_IOST_Origin_HW 0x0000000000000800ul +#define IOC_IOST_Origin_HL 0x0000000000000400ul + +#define IOC_IO_ExcpStat 0x920 +#define IOC_IO_ExcpStat_V 0x8000000000000000ul +#define IOC_IO_ExcpStat_SPF_Mask 0x6000000000000000ul +#define IOC_IO_ExcpStat_SPF_S 0x6000000000000000ul +#define IOC_IO_ExcpStat_SPF_P 0x4000000000000000ul +#define IOC_IO_ExcpStat_ADDR_Mask 0x00000007fffff000ul +#define IOC_IO_ExcpStat_RW_Mask 0x0000000000000800ul +#define IOC_IO_ExcpStat_IOID_Mask 0x00000000000007fful + +#define IOC_IO_ExcpMask 0x928 +#define IOC_IO_ExcpMask_SFE 0x4000000000000000ul +#define IOC_IO_ExcpMask_PFE 0x2000000000000000ul + +#define IOC_IOCmd_Offset 0x1000 + +#define IOC_IOCmd_Cfg 0xc00 +#define IOC_IOCmd_Cfg_TE 0x0000800000000000ul + + +/* Segment table entries */ +#define IOSTE_V 0x8000000000000000ul /* valid */ +#define IOSTE_H 0x4000000000000000ul /* cache hint */ +#define IOSTE_PT_Base_RPN_Mask 0x3ffffffffffff000ul /* base RPN of IOPT */ +#define IOSTE_NPPT_Mask 0x0000000000000fe0ul /* no. pages in IOPT */ +#define IOSTE_PS_Mask 0x0000000000000007ul /* page size */ +#define IOSTE_PS_4K 0x0000000000000001ul /* - 4kB */ +#define IOSTE_PS_64K 0x0000000000000003ul /* - 64kB */ +#define IOSTE_PS_1M 0x0000000000000005ul /* - 1MB */ +#define IOSTE_PS_16M 0x0000000000000007ul /* - 16MB */ + +/* Page table entries */ +#define IOPTE_PP_W 0x8000000000000000ul /* protection: write */ +#define IOPTE_PP_R 0x4000000000000000ul /* protection: read */ +#define IOPTE_M 0x2000000000000000ul /* coherency required */ +#define IOPTE_SO_R 0x1000000000000000ul /* ordering: writes */ +#define IOPTE_SO_RW 0x1800000000000000ul /* ordering: r & w */ +#define IOPTE_RPN_Mask 0x07fffffffffff000ul /* RPN */ +#define IOPTE_H 0x0000000000000800ul /* cache hint */ +#define IOPTE_IOID_Mask 0x00000000000007fful /* ioid */ + + +/* IOMMU sizing */ +#define IO_SEGMENT_SHIFT 28 +#define IO_PAGENO_BITS (IO_SEGMENT_SHIFT - IOMMU_PAGE_SHIFT) + +/* The high bit needs to be set on every DMA address */ +#define SPIDER_DMA_OFFSET 0x80000000ul + +struct iommu_window { + struct list_head list; + struct cbe_iommu *iommu; + unsigned long offset; + unsigned long size; + unsigned long pte_offset; + unsigned int ioid; + struct iommu_table table; +}; -static inline ioste -mk_ioste(unsigned long val) -{ - ioste ioste = { .val = val, }; - return ioste; -} +#define NAMESIZE 8 +struct cbe_iommu { + int nid; + char name[NAMESIZE]; + void __iomem *xlate_regs; + void __iomem *cmd_regs; + unsigned long *stab; + unsigned long *ptab; + void *pad_page; + struct list_head windows; +}; + +/* Static array of iommus, one per node + * each contains a list of windows, keyed from dma_window property + * - on bus setup, look for a matching window, or create one + * - on dev setup, assign iommu_table ptr + */ +static struct cbe_iommu iommus[NR_IOMMUS]; +static int cbe_nr_iommus; -static inline ioste -get_iost_entry(unsigned long iopt_base, unsigned long io_address, unsigned page_size) +static void invalidate_tce_cache(struct cbe_iommu *iommu, unsigned long *pte, + long n_ptes) { - unsigned long ps; - unsigned long iostep; - unsigned long nnpt; - unsigned long shift; - - switch (page_size) { - case 0x1000000: - ps = IOST_PS_16M; - nnpt = 0; /* one page per segment */ - shift = 5; /* segment has 16 iopt entries */ - break; - - case 0x100000: - ps = IOST_PS_1M; - nnpt = 0; /* one page per segment */ - shift = 1; /* segment has 256 iopt entries */ - break; - - case 0x10000: - ps = IOST_PS_64K; - nnpt = 0x07; /* 8 pages per io page table */ - shift = 0; /* all entries are used */ - break; - - case 0x1000: - ps = IOST_PS_4K; - nnpt = 0x7f; /* 128 pages per io page table */ - shift = 0; /* all entries are used */ - break; - - default: /* not a known compile time constant */ - { - /* BUILD_BUG_ON() is not usable here */ - extern void __get_iost_entry_bad_page_size(void); - __get_iost_entry_bad_page_size(); - } - break; - } + unsigned long *reg, val; + long n; - iostep = iopt_base + - /* need 8 bytes per iopte */ - (((io_address / page_size * 8) - /* align io page tables on 4k page boundaries */ - << shift) - /* nnpt+1 pages go into each iopt */ - & ~(nnpt << 12)); - - nnpt++; /* this seems to work, but the documentation is not clear - about wether we put nnpt or nnpt-1 into the ioste bits. - In theory, this can't work for 4k pages. */ - return mk_ioste(IOST_VALID_MASK - | (iostep & IOST_PT_BASE_MASK) - | ((nnpt << 5) & IOST_NNPT_MASK) - | (ps & IOST_PS_MASK)); -} + reg = iommu->xlate_regs + IOC_IOPT_CacheInvd; -/* compute the address of an io pte */ -static inline unsigned long -get_ioptep(ioste iost_entry, unsigned long io_address) -{ - unsigned long iopt_base; - unsigned long page_size; - unsigned long page_number; - unsigned long iopt_offset; - - iopt_base = iost_entry.val & IOST_PT_BASE_MASK; - page_size = iost_entry.val & IOST_PS_MASK; - - /* decode page size to compute page number */ - page_number = (io_address & 0x0fffffff) >> (10 + 2 * page_size); - /* page number is an offset into the io page table */ - iopt_offset = (page_number << 3) & 0x7fff8ul; - return iopt_base + iopt_offset; -} + while (n_ptes > 0) { + /* we can invalidate up to 1 << 11 PTEs at once */ + n = min(n_ptes, 1l << 11); + val = (((n /*- 1*/) << 53) & IOC_IOPT_CacheInvd_NE_Mask) + | (__pa(pte) & IOC_IOPT_CacheInvd_IOPTE_Mask) + | IOC_IOPT_CacheInvd_Busy; -/* compute the tag field of the iopt cache entry */ -static inline unsigned long -get_ioc_tag(ioste iost_entry, unsigned long io_address) -{ - unsigned long iopte = get_ioptep(iost_entry, io_address); + out_be64(reg, val); + while (in_be64(reg) & IOC_IOPT_CacheInvd_Busy) + ; - return IOPT_VALID_MASK - | ((iopte & 0x00000000000000ff8ul) >> 3) - | ((iopte & 0x0000003fffffc0000ul) >> 9); + n_ptes -= n; + pte += n; + } } -/* compute the hashed 6 bit index for the 4-way associative pte cache */ -static inline unsigned long -get_ioc_hash(ioste iost_entry, unsigned long io_address) +static void tce_build_cell(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction) { - unsigned long iopte = get_ioptep(iost_entry, io_address); - - return ((iopte & 0x000000000000001f8ul) >> 3) - ^ ((iopte & 0x00000000000020000ul) >> 17) - ^ ((iopte & 0x00000000000010000ul) >> 15) - ^ ((iopte & 0x00000000000008000ul) >> 13) - ^ ((iopte & 0x00000000000004000ul) >> 11) - ^ ((iopte & 0x00000000000002000ul) >> 9) - ^ ((iopte & 0x00000000000001000ul) >> 7); + int i; + unsigned long *io_pte, base_pte; + struct iommu_window *window = + container_of(tbl, struct iommu_window, table); + + /* implementing proper protection causes problems with the spidernet + * driver - check mapping directions later, but allow read & write by + * default for now.*/ +#ifdef CELL_IOMMU_STRICT_PROTECTION + /* to avoid referencing a global, we use a trick here to setup the + * protection bit. "prot" is setup to be 3 fields of 4 bits apprended + * together for each of the 3 supported direction values. It is then + * shifted left so that the fields matching the desired direction + * lands on the appropriate bits, and other bits are masked out. + */ + const unsigned long prot = 0xc48; + base_pte = + ((prot << (52 + 4 * direction)) & (IOPTE_PP_W | IOPTE_PP_R)) + | IOPTE_M | IOPTE_SO_RW | (window->ioid & IOPTE_IOID_Mask); +#else + base_pte = IOPTE_PP_W | IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW | + (window->ioid & IOPTE_IOID_Mask); +#endif + + io_pte = (unsigned long *)tbl->it_base + (index - window->pte_offset); + + for (i = 0; i < npages; i++, uaddr += IOMMU_PAGE_SIZE) + io_pte[i] = base_pte | (__pa(uaddr) & IOPTE_RPN_Mask); + + mb(); + + invalidate_tce_cache(window->iommu, io_pte, npages); + + pr_debug("tce_build_cell(index=%lx,n=%lx,dir=%d,base_pte=%lx)\n", + index, npages, direction, base_pte); } -/* same as above, but pretend that we have a simpler 1-way associative - pte cache with an 8 bit index */ -static inline unsigned long -get_ioc_hash_1way(ioste iost_entry, unsigned long io_address) +static void tce_free_cell(struct iommu_table *tbl, long index, long npages) { - unsigned long iopte = get_ioptep(iost_entry, io_address); - - return ((iopte & 0x000000000000001f8ul) >> 3) - ^ ((iopte & 0x00000000000020000ul) >> 17) - ^ ((iopte & 0x00000000000010000ul) >> 15) - ^ ((iopte & 0x00000000000008000ul) >> 13) - ^ ((iopte & 0x00000000000004000ul) >> 11) - ^ ((iopte & 0x00000000000002000ul) >> 9) - ^ ((iopte & 0x00000000000001000ul) >> 7) - ^ ((iopte & 0x0000000000000c000ul) >> 8); -} -static inline ioste -get_iost_cache(void __iomem *base, unsigned long index) -{ - unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR); - return mk_ioste(in_be64(&p[index])); -} + int i; + unsigned long *io_pte, pte; + struct iommu_window *window = + container_of(tbl, struct iommu_window, table); -static inline void -set_iost_cache(void __iomem *base, unsigned long index, ioste ste) -{ - unsigned long __iomem *p = (base + IOC_ST_CACHE_DIR); - pr_debug("ioste %02lx was %016lx, store %016lx", index, - get_iost_cache(base, index).val, ste.val); - out_be64(&p[index], ste.val); - pr_debug(" now %016lx\n", get_iost_cache(base, index).val); -} + pr_debug("tce_free_cell(index=%lx,n=%lx)\n", index, npages); -static inline unsigned long -get_iopt_cache(void __iomem *base, unsigned long index, unsigned long *tag) -{ - unsigned long __iomem *tags = (void *)(base + IOC_PT_CACHE_DIR); - unsigned long __iomem *p = (void *)(base + IOC_PT_CACHE_REG); +#ifdef CELL_IOMMU_REAL_UNMAP + pte = 0; +#else + /* spider bridge does PCI reads after freeing - insert a mapping + * to a scratch page instead of an invalid entry */ + pte = IOPTE_PP_R | IOPTE_M | IOPTE_SO_RW | __pa(window->iommu->pad_page) + | (window->ioid & IOPTE_IOID_Mask); +#endif - *tag = tags[index]; - rmb(); - return *p; -} + io_pte = (unsigned long *)tbl->it_base + (index - window->pte_offset); -static inline void -set_iopt_cache(void __iomem *base, unsigned long index, - unsigned long tag, unsigned long val) -{ - unsigned long __iomem *tags = base + IOC_PT_CACHE_DIR; - unsigned long __iomem *p = base + IOC_PT_CACHE_REG; + for (i = 0; i < npages; i++) + io_pte[i] = pte; + + mb(); - out_be64(p, val); - out_be64(&tags[index], tag); + invalidate_tce_cache(window->iommu, io_pte, npages); } -static inline void -set_iost_origin(void __iomem *base) +static irqreturn_t ioc_interrupt(int irq, void *data) { - unsigned long __iomem *p = base + IOC_ST_ORIGIN; - unsigned long origin = IOSTO_ENABLE | IOSTO_SW; - - pr_debug("iost_origin %016lx, now %016lx\n", in_be64(p), origin); - out_be64(p, origin); + unsigned long stat; + struct cbe_iommu *iommu = data; + + stat = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); + + /* Might want to rate limit it */ + printk(KERN_ERR "iommu: DMA exception 0x%016lx\n", stat); + printk(KERN_ERR " V=%d, SPF=[%c%c], RW=%s, IOID=0x%04x\n", + !!(stat & IOC_IO_ExcpStat_V), + (stat & IOC_IO_ExcpStat_SPF_S) ? 'S' : ' ', + (stat & IOC_IO_ExcpStat_SPF_P) ? 'P' : ' ', + (stat & IOC_IO_ExcpStat_RW_Mask) ? "Read" : "Write", + (unsigned int)(stat & IOC_IO_ExcpStat_IOID_Mask)); + printk(KERN_ERR " page=0x%016lx\n", + stat & IOC_IO_ExcpStat_ADDR_Mask); + + /* clear interrupt */ + stat &= ~IOC_IO_ExcpStat_V; + out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, stat); + + return IRQ_HANDLED; } -static inline void -set_iocmd_config(void __iomem *base) +static int cell_iommu_find_ioc(int nid, unsigned long *base) { - unsigned long __iomem *p = base + 0xc00; - unsigned long conf; + struct device_node *np; + struct resource r; + + *base = 0; + + /* First look for new style /be nodes */ + for_each_node_by_name(np, "ioc") { + if (of_node_to_nid(np) != nid) + continue; + if (of_address_to_resource(np, 0, &r)) { + printk(KERN_ERR "iommu: can't get address for %s\n", + np->full_name); + continue; + } + *base = r.start; + of_node_put(np); + return 0; + } - conf = in_be64(p); - pr_debug("iost_conf %016lx, now %016lx\n", conf, conf | IOCMD_CONF_TE); - out_be64(p, conf | IOCMD_CONF_TE); + /* Ok, let's try the old way */ + for_each_node_by_type(np, "cpu") { + const unsigned int *nidp; + const unsigned long *tmp; + + nidp = get_property(np, "node-id", NULL); + if (nidp && *nidp == nid) { + tmp = get_property(np, "ioc-translation", NULL); + if (tmp) { + *base = *tmp; + of_node_put(np); + return 0; + } + } + } + + return -ENODEV; } -static void enable_mapping(void __iomem *base, void __iomem *mmio_base) +static void cell_iommu_setup_hardware(struct cbe_iommu *iommu, unsigned long size) { - set_iocmd_config(base); - set_iost_origin(mmio_base); -} + struct page *page; + int ret, i; + unsigned long reg, segments, pages_per_segment, ptab_size, n_pte_pages; + unsigned long xlate_base; + unsigned int virq; + + if (cell_iommu_find_ioc(iommu->nid, &xlate_base)) + panic("%s: missing IOC register mappings for node %d\n", + __FUNCTION__, iommu->nid); + + iommu->xlate_regs = ioremap(xlate_base, IOC_Reg_Size); + iommu->cmd_regs = iommu->xlate_regs + IOC_IOCmd_Offset; + + segments = size >> IO_SEGMENT_SHIFT; + pages_per_segment = 1ull << IO_PAGENO_BITS; + + pr_debug("%s: iommu[%d]: segments: %lu, pages per segment: %lu\n", + __FUNCTION__, iommu->nid, segments, pages_per_segment); + + /* set up the segment table */ + page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0); + BUG_ON(!page); + iommu->stab = page_address(page); + clear_page(iommu->stab); + + /* ... and the page tables. Since these are contiguous, we can treat + * the page tables as one array of ptes, like pSeries does. + */ + ptab_size = segments * pages_per_segment * sizeof(unsigned long); + pr_debug("%s: iommu[%d]: ptab_size: %lu, order: %d\n", __FUNCTION__, + iommu->nid, ptab_size, get_order(ptab_size)); + page = alloc_pages_node(iommu->nid, GFP_KERNEL, get_order(ptab_size)); + BUG_ON(!page); + + iommu->ptab = page_address(page); + memset(iommu->ptab, 0, ptab_size); + + /* allocate a bogus page for the end of each mapping */ + page = alloc_pages_node(iommu->nid, GFP_KERNEL, 0); + BUG_ON(!page); + iommu->pad_page = page_address(page); + clear_page(iommu->pad_page); + + /* number of pages needed for a page table */ + n_pte_pages = (pages_per_segment * + sizeof(unsigned long)) >> IOMMU_PAGE_SHIFT; + + pr_debug("%s: iommu[%d]: stab at %p, ptab at %p, n_pte_pages: %lu\n", + __FUNCTION__, iommu->nid, iommu->stab, iommu->ptab, + n_pte_pages); + + /* initialise the STEs */ + reg = IOSTE_V | ((n_pte_pages - 1) << 5); + + if (IOMMU_PAGE_SIZE == 0x1000) + reg |= IOSTE_PS_4K; + else if (IOMMU_PAGE_SIZE == 0x10000) + reg |= IOSTE_PS_64K; + else { + extern void __unknown_page_size_error(void); + __unknown_page_size_error(); + } + + pr_debug("Setting up IOMMU stab:\n"); + for (i = 0; i * (1ul << IO_SEGMENT_SHIFT) < size; i++) { + iommu->stab[i] = reg | + (__pa(iommu->ptab) + n_pte_pages * IOMMU_PAGE_SIZE * i); + pr_debug("\t[%d] 0x%016lx\n", i, iommu->stab[i]); + } -static void iommu_dev_setup_null(struct pci_dev *d) { } -static void iommu_bus_setup_null(struct pci_bus *b) { } + /* ensure that the STEs have updated */ + mb(); -struct cell_iommu { - unsigned long base; - unsigned long mmio_base; - void __iomem *mapped_base; - void __iomem *mapped_mmio_base; -}; + /* setup interrupts for the iommu. */ + reg = in_be64(iommu->xlate_regs + IOC_IO_ExcpStat); + out_be64(iommu->xlate_regs + IOC_IO_ExcpStat, + reg & ~IOC_IO_ExcpStat_V); + out_be64(iommu->xlate_regs + IOC_IO_ExcpMask, + IOC_IO_ExcpMask_PFE | IOC_IO_ExcpMask_SFE); -static struct cell_iommu cell_iommus[NR_CPUS]; + virq = irq_create_mapping(NULL, + IIC_IRQ_IOEX_ATI | (iommu->nid << IIC_IRQ_NODE_SHIFT)); + BUG_ON(virq == NO_IRQ); -/* initialize the iommu to support a simple linear mapping - * for each DMA window used by any device. For now, we - * happen to know that there is only one DMA window in use, - * starting at iopt_phys_offset. */ -static void cell_do_map_iommu(struct cell_iommu *iommu, - unsigned int ioid, - unsigned long map_start, - unsigned long map_size) -{ - unsigned long io_address, real_address; - void __iomem *ioc_base, *ioc_mmio_base; - ioste ioste; - unsigned long index; + ret = request_irq(virq, ioc_interrupt, IRQF_DISABLED, + iommu->name, iommu); + BUG_ON(ret); - /* we pretend the io page table was at a very high address */ - const unsigned long fake_iopt = 0x10000000000ul; - const unsigned long io_page_size = 0x1000000; /* use 16M pages */ - const unsigned long io_segment_size = 0x10000000; /* 256M */ - - ioc_base = iommu->mapped_base; - ioc_mmio_base = iommu->mapped_mmio_base; - - for (real_address = 0, io_address = map_start; - io_address <= map_start + map_size; - real_address += io_page_size, io_address += io_page_size) { - ioste = get_iost_entry(fake_iopt, io_address, io_page_size); - if ((real_address % io_segment_size) == 0) /* segment start */ - set_iost_cache(ioc_mmio_base, - io_address >> 28, ioste); - index = get_ioc_hash_1way(ioste, io_address); - pr_debug("addr %08lx, index %02lx, ioste %016lx\n", - io_address, index, ioste.val); - set_iopt_cache(ioc_mmio_base, - get_ioc_hash_1way(ioste, io_address), - get_ioc_tag(ioste, io_address), - get_iopt_entry(real_address, ioid, IOPT_PROT_RW)); - } + /* set the IOC segment table origin register (and turn on the iommu) */ + reg = IOC_IOST_Origin_E | __pa(iommu->stab) | IOC_IOST_Origin_HW; + out_be64(iommu->xlate_regs + IOC_IOST_Origin, reg); + in_be64(iommu->xlate_regs + IOC_IOST_Origin); + + /* turn on IO translation */ + reg = in_be64(iommu->cmd_regs + IOC_IOCmd_Cfg) | IOC_IOCmd_Cfg_TE; + out_be64(iommu->cmd_regs + IOC_IOCmd_Cfg, reg); } -static void iommu_devnode_setup(struct device_node *d) +#if 0/* Unused for now */ +static struct iommu_window *find_window(struct cbe_iommu *iommu, + unsigned long offset, unsigned long size) { - const unsigned int *ioid; - unsigned long map_start, map_size, token; - const unsigned long *dma_window; - struct cell_iommu *iommu; + struct iommu_window *window; - ioid = get_property(d, "ioid", NULL); - if (!ioid) - pr_debug("No ioid entry found !\n"); + /* todo: check for overlapping (but not equal) windows) */ - dma_window = get_property(d, "ibm,dma-window", NULL); - if (!dma_window) - pr_debug("No ibm,dma-window entry found !\n"); + list_for_each_entry(window, &(iommu->windows), list) { + if (window->offset == offset && window->size == size) + return window; + } - map_start = dma_window[1]; - map_size = dma_window[2]; - token = dma_window[0] >> 32; + return NULL; +} +#endif - iommu = &cell_iommus[token]; +static struct iommu_window * __init +cell_iommu_setup_window(struct cbe_iommu *iommu, struct device_node *np, + unsigned long offset, unsigned long size, + unsigned long pte_offset) +{ + struct iommu_window *window; + const unsigned int *ioid; - cell_do_map_iommu(iommu, *ioid, map_start, map_size); + ioid = get_property(np, "ioid", NULL); + if (ioid == NULL) + printk(KERN_WARNING "iommu: missing ioid for %s using 0\n", + np->full_name); + + window = kmalloc_node(sizeof(*window), GFP_KERNEL, iommu->nid); + BUG_ON(window == NULL); + + window->offset = offset; + window->size = size; + window->ioid = ioid ? *ioid : 0; + window->iommu = iommu; + window->pte_offset = pte_offset; + + window->table.it_blocksize = 16; + window->table.it_base = (unsigned long)iommu->ptab; + window->table.it_index = iommu->nid; + window->table.it_offset = (offset >> IOMMU_PAGE_SHIFT) + + window->pte_offset; + window->table.it_size = size >> IOMMU_PAGE_SHIFT; + + iommu_init_table(&window->table, iommu->nid); + + pr_debug("\tioid %d\n", window->ioid); + pr_debug("\tblocksize %ld\n", window->table.it_blocksize); + pr_debug("\tbase 0x%016lx\n", window->table.it_base); + pr_debug("\toffset 0x%lx\n", window->table.it_offset); + pr_debug("\tsize %ld\n", window->table.it_size); + + list_add(&window->list, &iommu->windows); + + if (offset != 0) + return window; + + /* We need to map and reserve the first IOMMU page since it's used + * by the spider workaround. In theory, we only need to do that when + * running on spider but it doesn't really matter. + * + * This code also assumes that we have a window that starts at 0, + * which is the case on all spider based blades. + */ + __set_bit(0, window->table.it_map); + tce_build_cell(&window->table, window->table.it_offset, 1, + (unsigned long)iommu->pad_page, DMA_TO_DEVICE); + window->table.it_hint = window->table.it_blocksize; + + return window; } -static void iommu_bus_setup(struct pci_bus *b) +static struct cbe_iommu *cell_iommu_for_node(int nid) { - struct device_node *d = (struct device_node *)b->sysdata; - iommu_devnode_setup(d); -} + int i; + for (i = 0; i < cbe_nr_iommus; i++) + if (iommus[i].nid == nid) + return &iommus[i]; + return NULL; +} -static int cell_map_iommu_hardcoded(int num_nodes) +static void cell_dma_dev_setup(struct device *dev) { - struct cell_iommu *iommu = NULL; - - pr_debug("%s(%d): Using hardcoded defaults\n", __FUNCTION__, __LINE__); + struct iommu_window *window; + struct cbe_iommu *iommu; + struct dev_archdata *archdata = &dev->archdata; + + /* If we run without iommu, no need to do anything */ + if (pci_dma_ops == &dma_direct_ops) + return; + + /* Current implementation uses the first window available in that + * node's iommu. We -might- do something smarter later though it may + * never be necessary + */ + iommu = cell_iommu_for_node(archdata->numa_node); + if (iommu == NULL || list_empty(&iommu->windows)) { + printk(KERN_ERR "iommu: missing iommu for %s (node %d)\n", + archdata->of_node ? archdata->of_node->full_name : "?", + archdata->numa_node); + return; + } + window = list_entry(iommu->windows.next, struct iommu_window, list); - /* node 0 */ - iommu = &cell_iommus[0]; - iommu->mapped_base = ioremap(0x20000511000ul, 0x1000); - iommu->mapped_mmio_base = ioremap(0x20000510000ul, 0x1000); + archdata->dma_data = &window->table; +} - enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); +static void cell_pci_dma_dev_setup(struct pci_dev *dev) +{ + cell_dma_dev_setup(&dev->dev); +} - cell_do_map_iommu(iommu, 0x048a, - 0x20000000ul,0x20000000ul); +static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action, + void *data) +{ + struct device *dev = data; - if (num_nodes < 2) + /* We are only intereted in device addition */ + if (action != BUS_NOTIFY_ADD_DEVICE) return 0; - /* node 1 */ - iommu = &cell_iommus[1]; - iommu->mapped_base = ioremap(0x30000511000ul, 0x1000); - iommu->mapped_mmio_base = ioremap(0x30000510000ul, 0x1000); - - enable_mapping(iommu->mapped_base, iommu->mapped_mmio_base); + /* We use the PCI DMA ops */ + dev->archdata.dma_ops = pci_dma_ops; - cell_do_map_iommu(iommu, 0x048a, - 0x20000000,0x20000000ul); + cell_dma_dev_setup(dev); return 0; } +static struct notifier_block cell_of_bus_notifier = { + .notifier_call = cell_of_bus_notify +}; -static int cell_map_iommu(void) +static int __init cell_iommu_get_window(struct device_node *np, + unsigned long *base, + unsigned long *size) { - unsigned int num_nodes = 0; - const unsigned int *node_id; - const unsigned long *base, *mmio_base; - struct device_node *dn; - struct cell_iommu *iommu = NULL; - - /* determine number of nodes (=iommus) */ - pr_debug("%s(%d): determining number of nodes...", __FUNCTION__, __LINE__); - for(dn = of_find_node_by_type(NULL, "cpu"); - dn; - dn = of_find_node_by_type(dn, "cpu")) { - node_id = get_property(dn, "node-id", NULL); - - if (num_nodes < *node_id) - num_nodes = *node_id; - } - - num_nodes++; - pr_debug("%i found.\n", num_nodes); + const void *dma_window; + unsigned long index; - /* map the iommu registers for each node */ - pr_debug("%s(%d): Looping through nodes\n", __FUNCTION__, __LINE__); - for(dn = of_find_node_by_type(NULL, "cpu"); - dn; - dn = of_find_node_by_type(dn, "cpu")) { + /* Use ibm,dma-window if available, else, hard code ! */ + dma_window = get_property(np, "ibm,dma-window", NULL); + if (dma_window == NULL) { + *base = 0; + *size = 0x80000000u; + return -ENODEV; + } - node_id = get_property(dn, "node-id", NULL); - base = get_property(dn, "ioc-cache", NULL); - mmio_base = get_property(dn, "ioc-translation", NULL); + of_parse_dma_window(np, dma_window, &index, base, size); + return 0; +} - if (!base || !mmio_base || !node_id) - return cell_map_iommu_hardcoded(num_nodes); +static void __init cell_iommu_init_one(struct device_node *np, unsigned long offset) +{ + struct cbe_iommu *iommu; + unsigned long base, size; + int nid, i; + + /* Get node ID */ + nid = of_node_to_nid(np); + if (nid < 0) { + printk(KERN_ERR "iommu: failed to get node for %s\n", + np->full_name); + return; + } + pr_debug("iommu: setting up iommu for node %d (%s)\n", + nid, np->full_name); + + /* XXX todo: If we can have multiple windows on the same IOMMU, which + * isn't the case today, we probably want here to check wether the + * iommu for that node is already setup. + * However, there might be issue with getting the size right so let's + * ignore that for now. We might want to completely get rid of the + * multiple window support since the cell iommu supports per-page ioids + */ + + if (cbe_nr_iommus >= NR_IOMMUS) { + printk(KERN_ERR "iommu: too many IOMMUs detected ! (%s)\n", + np->full_name); + return; + } - iommu = &cell_iommus[*node_id]; - iommu->base = *base; - iommu->mmio_base = *mmio_base; + /* Init base fields */ + i = cbe_nr_iommus++; + iommu = &iommus[i]; + iommu->stab = 0; + iommu->nid = nid; + snprintf(iommu->name, sizeof(iommu->name), "iommu%d", i); + INIT_LIST_HEAD(&iommu->windows); - iommu->mapped_base = ioremap(*base, 0x1000); - iommu->mapped_mmio_base = ioremap(*mmio_base, 0x1000); + /* Obtain a window for it */ + cell_iommu_get_window(np, &base, &size); - enable_mapping(iommu->mapped_base, - iommu->mapped_mmio_base); + pr_debug("\ttranslating window 0x%lx...0x%lx\n", + base, base + size - 1); - /* everything else will be done in iommu_bus_setup */ - } + /* Initialize the hardware */ + cell_iommu_setup_hardware(iommu, size); - return 1; + /* Setup the iommu_table */ + cell_iommu_setup_window(iommu, np, base, size, + offset >> IOMMU_PAGE_SHIFT); } -static void *cell_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t flag) +static void __init cell_disable_iommus(void) { - void *ret; - - ret = (void *)__get_free_pages(flag, get_order(size)); - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_abs(ret) | CELL_DMA_VALID; + int node; + unsigned long base, val; + void __iomem *xregs, *cregs; + + /* Make sure IOC translation is disabled on all nodes */ + for_each_online_node(node) { + if (cell_iommu_find_ioc(node, &base)) + continue; + xregs = ioremap(base, IOC_Reg_Size); + if (xregs == NULL) + continue; + cregs = xregs + IOC_IOCmd_Offset; + + pr_debug("iommu: cleaning up iommu on node %d\n", node); + + out_be64(xregs + IOC_IOST_Origin, 0); + (void)in_be64(xregs + IOC_IOST_Origin); + val = in_be64(cregs + IOC_IOCmd_Cfg); + val &= ~IOC_IOCmd_Cfg_TE; + out_be64(cregs + IOC_IOCmd_Cfg, val); + (void)in_be64(cregs + IOC_IOCmd_Cfg); + + iounmap(xregs); } - return ret; } -static void cell_free_coherent(struct device *hwdev, size_t size, - void *vaddr, dma_addr_t dma_handle) +static int __init cell_iommu_init_disabled(void) { - free_pages((unsigned long)vaddr, get_order(size)); -} + struct device_node *np = NULL; + unsigned long base = 0, size; + + /* When no iommu is present, we use direct DMA ops */ + pci_dma_ops = &dma_direct_ops; + + /* First make sure all IOC translation is turned off */ + cell_disable_iommus(); + + /* If we have no Axon, we set up the spider DMA magic offset */ + if (of_find_node_by_name(NULL, "axon") == NULL) + dma_direct_offset = SPIDER_DMA_OFFSET; + + /* Now we need to check to see where the memory is mapped + * in PCI space. We assume that all busses use the same dma + * window which is always the case so far on Cell, thus we + * pick up the first pci-internal node we can find and check + * the DMA window from there. + */ + for_each_node_by_name(np, "axon") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + if (cell_iommu_get_window(np, &base, &size) == 0) + break; + } + if (np == NULL) { + for_each_node_by_name(np, "pci-internal") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + if (cell_iommu_get_window(np, &base, &size) == 0) + break; + } + } + of_node_put(np); + + /* If we found a DMA window, we check if it's big enough to enclose + * all of physical memory. If not, we force enable IOMMU + */ + if (np && size < lmb_end_of_DRAM()) { + printk(KERN_WARNING "iommu: force-enabled, dma window" + " (%ldMB) smaller than total memory (%ldMB)\n", + size >> 20, lmb_end_of_DRAM() >> 20); + return -ENODEV; + } -static dma_addr_t cell_map_single(struct device *hwdev, void *ptr, - size_t size, enum dma_data_direction direction) -{ - return virt_to_abs(ptr) | CELL_DMA_VALID; -} + dma_direct_offset += base; -static void cell_unmap_single(struct device *hwdev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction) -{ + printk("iommu: disabled, direct DMA offset is 0x%lx\n", + dma_direct_offset); + + return 0; } -static int cell_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) +static int __init cell_iommu_init(void) { - int i; + struct device_node *np; + + if (!machine_is(cell)) + return -ENODEV; + + /* If IOMMU is disabled or we have little enough RAM to not need + * to enable it, we setup a direct mapping. + * + * Note: should we make sure we have the IOMMU actually disabled ? + */ + if (iommu_is_off || + (!iommu_force_on && lmb_end_of_DRAM() <= 0x80000000ull)) + if (cell_iommu_init_disabled() == 0) + goto bail; + + /* Setup various ppc_md. callbacks */ + ppc_md.pci_dma_dev_setup = cell_pci_dma_dev_setup; + ppc_md.tce_build = tce_build_cell; + ppc_md.tce_free = tce_free_cell; + + /* Create an iommu for each /axon node. */ + for_each_node_by_name(np, "axon") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + cell_iommu_init_one(np, 0); + } - for (i = 0; i < nents; i++, sg++) { - sg->dma_address = (page_to_phys(sg->page) + sg->offset) - | CELL_DMA_VALID; - sg->dma_length = sg->length; + /* Create an iommu for each toplevel /pci-internal node for + * old hardware/firmware + */ + for_each_node_by_name(np, "pci-internal") { + if (np->parent == NULL || np->parent->parent != NULL) + continue; + cell_iommu_init_one(np, SPIDER_DMA_OFFSET); } - return nents; -} + /* Setup default PCI iommu ops */ + pci_dma_ops = &dma_iommu_ops; -static void cell_unmap_sg(struct device *hwdev, struct scatterlist *sg, - int nents, enum dma_data_direction direction) -{ -} + bail: + /* Register callbacks on OF platform device addition/removal + * to handle linking them to the right DMA operations + */ + bus_register_notifier(&of_platform_bus_type, &cell_of_bus_notifier); -static int cell_dma_supported(struct device *dev, u64 mask) -{ - return mask < 0x100000000ull; + return 0; } +arch_initcall(cell_iommu_init); -static struct dma_mapping_ops cell_iommu_ops = { - .alloc_coherent = cell_alloc_coherent, - .free_coherent = cell_free_coherent, - .map_single = cell_map_single, - .unmap_single = cell_unmap_single, - .map_sg = cell_map_sg, - .unmap_sg = cell_unmap_sg, - .dma_supported = cell_dma_supported, -}; - -void cell_init_iommu(void) -{ - int setup_bus = 0; - - if (of_find_node_by_path("/mambo")) { - pr_info("Not using iommu on systemsim\n"); - } else { - - if (!(of_chosen && - get_property(of_chosen, "linux,iommu-off", NULL))) - setup_bus = cell_map_iommu(); - - if (setup_bus) { - pr_debug("%s: IOMMU mapping activated\n", __FUNCTION__); - ppc_md.iommu_dev_setup = iommu_dev_setup_null; - ppc_md.iommu_bus_setup = iommu_bus_setup; - } else { - pr_debug("%s: IOMMU mapping activated, " - "no device action necessary\n", __FUNCTION__); - /* Direct I/O, IOMMU off */ - ppc_md.iommu_dev_setup = iommu_dev_setup_null; - ppc_md.iommu_bus_setup = iommu_bus_setup_null; - } - } - - pci_dma_ops = cell_iommu_ops; -} diff --git a/arch/powerpc/platforms/cell/iommu.h b/arch/powerpc/platforms/cell/iommu.h deleted file mode 100644 index 490d77abfe85..000000000000 --- a/arch/powerpc/platforms/cell/iommu.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef CELL_IOMMU_H -#define CELL_IOMMU_H - -/* some constants */ -enum { - /* segment table entries */ - IOST_VALID_MASK = 0x8000000000000000ul, - IOST_TAG_MASK = 0x3000000000000000ul, - IOST_PT_BASE_MASK = 0x000003fffffff000ul, - IOST_NNPT_MASK = 0x0000000000000fe0ul, - IOST_PS_MASK = 0x000000000000000ful, - - IOST_PS_4K = 0x1, - IOST_PS_64K = 0x3, - IOST_PS_1M = 0x5, - IOST_PS_16M = 0x7, - - /* iopt tag register */ - IOPT_VALID_MASK = 0x0000000200000000ul, - IOPT_TAG_MASK = 0x00000001fffffffful, - - /* iopt cache register */ - IOPT_PROT_MASK = 0xc000000000000000ul, - IOPT_PROT_NONE = 0x0000000000000000ul, - IOPT_PROT_READ = 0x4000000000000000ul, - IOPT_PROT_WRITE = 0x8000000000000000ul, - IOPT_PROT_RW = 0xc000000000000000ul, - IOPT_COHERENT = 0x2000000000000000ul, - - IOPT_ORDER_MASK = 0x1800000000000000ul, - /* order access to same IOID/VC on same address */ - IOPT_ORDER_ADDR = 0x0800000000000000ul, - /* similar, but only after a write access */ - IOPT_ORDER_WRITES = 0x1000000000000000ul, - /* Order all accesses to same IOID/VC */ - IOPT_ORDER_VC = 0x1800000000000000ul, - - IOPT_RPN_MASK = 0x000003fffffff000ul, - IOPT_HINT_MASK = 0x0000000000000800ul, - IOPT_IOID_MASK = 0x00000000000007fful, - - IOSTO_ENABLE = 0x8000000000000000ul, - IOSTO_ORIGIN = 0x000003fffffff000ul, - IOSTO_HW = 0x0000000000000800ul, - IOSTO_SW = 0x0000000000000400ul, - - IOCMD_CONF_TE = 0x0000800000000000ul, - - /* memory mapped registers */ - IOC_PT_CACHE_DIR = 0x000, - IOC_ST_CACHE_DIR = 0x800, - IOC_PT_CACHE_REG = 0x910, - IOC_ST_ORIGIN = 0x918, - IOC_CONF = 0x930, - - /* The high bit needs to be set on every DMA address, - only 2GB are addressable */ - CELL_DMA_VALID = 0x80000000, - CELL_DMA_MASK = 0x7fffffff, -}; - - -void cell_init_iommu(void); - -#endif diff --git a/arch/powerpc/platforms/cell/pervasive.c b/arch/powerpc/platforms/cell/pervasive.c index 9f2e4ed20a57..8c20f0fb8651 100644 --- a/arch/powerpc/platforms/cell/pervasive.c +++ b/arch/powerpc/platforms/cell/pervasive.c @@ -38,32 +38,25 @@ #include "pervasive.h" #include "cbe_regs.h" -static DEFINE_SPINLOCK(cbe_pervasive_lock); - -static void __init cbe_enable_pause_zero(void) +static void cbe_power_save(void) { - unsigned long thread_switch_control; - unsigned long temp_register; - struct cbe_pmd_regs __iomem *pregs; - - spin_lock_irq(&cbe_pervasive_lock); - pregs = cbe_get_cpu_pmd_regs(smp_processor_id()); - if (pregs == NULL) - goto out; + unsigned long ctrl, thread_switch_control; - pr_debug("Power Management: CPU %d\n", smp_processor_id()); - - /* Enable Pause(0) control bit */ - temp_register = in_be64(&pregs->pm_control); + /* + * We need to hard disable interrupts, but we also need to mark them + * hard disabled in the PACA so that the local_irq_enable() done by + * our caller upon return propertly hard enables. + */ + hard_irq_disable(); + get_paca()->hard_enabled = 0; - out_be64(&pregs->pm_control, - temp_register | CBE_PMD_PAUSE_ZERO_CONTROL); + ctrl = mfspr(SPRN_CTRLF); /* Enable DEC and EE interrupt request */ thread_switch_control = mfspr(SPRN_TSC_CELL); thread_switch_control |= TSC_CELL_EE_ENABLE | TSC_CELL_EE_BOOST; - switch ((mfspr(SPRN_CTRLF) & CTRL_CT)) { + switch (ctrl & CTRL_CT) { case CTRL_CT0: thread_switch_control |= TSC_CELL_DEC_ENABLE_0; break; @@ -75,58 +68,21 @@ static void __init cbe_enable_pause_zero(void) __FUNCTION__); break; } - mtspr(SPRN_TSC_CELL, thread_switch_control); -out: - spin_unlock_irq(&cbe_pervasive_lock); -} - -static void cbe_idle(void) -{ - unsigned long ctrl; + /* + * go into low thread priority, medium priority will be + * restored for us after wake-up. + */ + HMT_low(); - /* Why do we do that on every idle ? Couldn't that be done once for - * all or do we lose the state some way ? Also, the pm_control - * register setting, that can't be set once at boot ? We really want - * to move that away in order to implement a simple powersave + /* + * atomically disable thread execution and runlatch. + * External and Decrementer exceptions are still handled when the + * thread is disabled but now enter in cbe_system_reset_exception() */ - cbe_enable_pause_zero(); - - while (1) { - if (!need_resched()) { - local_irq_disable(); - while (!need_resched()) { - /* go into low thread priority */ - HMT_low(); - - /* - * atomically disable thread execution - * and runlatch. - * External and Decrementer exceptions - * are still handled when the thread - * is disabled but now enter in - * cbe_system_reset_exception() - */ - ctrl = mfspr(SPRN_CTRLF); - ctrl &= ~(CTRL_RUNLATCH | CTRL_TE); - mtspr(SPRN_CTRLT, ctrl); - } - /* restore thread prio */ - HMT_medium(); - local_irq_enable(); - } - - /* - * turn runlatch on again before scheduling the - * process we just woke up - */ - ppc64_runlatch_on(); - - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } + ctrl &= ~(CTRL_RUNLATCH | CTRL_TE); + mtspr(SPRN_CTRLT, ctrl); } static int cbe_system_reset_exception(struct pt_regs *regs) @@ -158,9 +114,20 @@ static int cbe_system_reset_exception(struct pt_regs *regs) void __init cbe_pervasive_init(void) { + int cpu; if (!cpu_has_feature(CPU_FTR_PAUSE_ZERO)) return; - ppc_md.idle_loop = cbe_idle; + for_each_possible_cpu(cpu) { + struct cbe_pmd_regs __iomem *regs = cbe_get_cpu_pmd_regs(cpu); + if (!regs) + continue; + + /* Enable Pause(0) control bit */ + out_be64(®s->pmcr, in_be64(®s->pmcr) | + CBE_PMD_PAUSE_ZERO_CONTROL); + } + + ppc_md.power_save = cbe_power_save; ppc_md.system_reset_exception = cbe_system_reset_exception; } diff --git a/arch/powerpc/platforms/cell/pmu.c b/arch/powerpc/platforms/cell/pmu.c new file mode 100644 index 000000000000..99c612025e8f --- /dev/null +++ b/arch/powerpc/platforms/cell/pmu.c @@ -0,0 +1,429 @@ +/* + * Cell Broadband Engine Performance Monitor + * + * (C) Copyright IBM Corporation 2001,2006 + * + * Author: + * David Erb (djerb@us.ibm.com) + * Kevin Corry (kevcorry@us.ibm.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/interrupt.h> +#include <linux/types.h> +#include <asm/io.h> +#include <asm/irq_regs.h> +#include <asm/machdep.h> +#include <asm/pmc.h> +#include <asm/reg.h> +#include <asm/spu.h> + +#include "cbe_regs.h" +#include "interrupt.h" + +/* + * When writing to write-only mmio addresses, save a shadow copy. All of the + * registers are 32-bit, but stored in the upper-half of a 64-bit field in + * pmd_regs. + */ + +#define WRITE_WO_MMIO(reg, x) \ + do { \ + u32 _x = (x); \ + struct cbe_pmd_regs __iomem *pmd_regs; \ + struct cbe_pmd_shadow_regs *shadow_regs; \ + pmd_regs = cbe_get_cpu_pmd_regs(cpu); \ + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \ + out_be64(&(pmd_regs->reg), (((u64)_x) << 32)); \ + shadow_regs->reg = _x; \ + } while (0) + +#define READ_SHADOW_REG(val, reg) \ + do { \ + struct cbe_pmd_shadow_regs *shadow_regs; \ + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); \ + (val) = shadow_regs->reg; \ + } while (0) + +#define READ_MMIO_UPPER32(val, reg) \ + do { \ + struct cbe_pmd_regs __iomem *pmd_regs; \ + pmd_regs = cbe_get_cpu_pmd_regs(cpu); \ + (val) = (u32)(in_be64(&pmd_regs->reg) >> 32); \ + } while (0) + +/* + * Physical counter registers. + * Each physical counter can act as one 32-bit counter or two 16-bit counters. + */ + +u32 cbe_read_phys_ctr(u32 cpu, u32 phys_ctr) +{ + u32 val_in_latch, val = 0; + + if (phys_ctr < NR_PHYS_CTRS) { + READ_SHADOW_REG(val_in_latch, counter_value_in_latch); + + /* Read the latch or the actual counter, whichever is newer. */ + if (val_in_latch & (1 << phys_ctr)) { + READ_SHADOW_REG(val, pm_ctr[phys_ctr]); + } else { + READ_MMIO_UPPER32(val, pm_ctr[phys_ctr]); + } + } + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_phys_ctr); + +void cbe_write_phys_ctr(u32 cpu, u32 phys_ctr, u32 val) +{ + struct cbe_pmd_shadow_regs *shadow_regs; + u32 pm_ctrl; + + if (phys_ctr < NR_PHYS_CTRS) { + /* Writing to a counter only writes to a hardware latch. + * The new value is not propagated to the actual counter + * until the performance monitor is enabled. + */ + WRITE_WO_MMIO(pm_ctr[phys_ctr], val); + + pm_ctrl = cbe_read_pm(cpu, pm_control); + if (pm_ctrl & CBE_PM_ENABLE_PERF_MON) { + /* The counters are already active, so we need to + * rewrite the pm_control register to "re-enable" + * the PMU. + */ + cbe_write_pm(cpu, pm_control, pm_ctrl); + } else { + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); + shadow_regs->counter_value_in_latch |= (1 << phys_ctr); + } + } +} +EXPORT_SYMBOL_GPL(cbe_write_phys_ctr); + +/* + * "Logical" counter registers. + * These will read/write 16-bits or 32-bits depending on the + * current size of the counter. Counters 4 - 7 are always 16-bit. + */ + +u32 cbe_read_ctr(u32 cpu, u32 ctr) +{ + u32 val; + u32 phys_ctr = ctr & (NR_PHYS_CTRS - 1); + + val = cbe_read_phys_ctr(cpu, phys_ctr); + + if (cbe_get_ctr_size(cpu, phys_ctr) == 16) + val = (ctr < NR_PHYS_CTRS) ? (val >> 16) : (val & 0xffff); + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_ctr); + +void cbe_write_ctr(u32 cpu, u32 ctr, u32 val) +{ + u32 phys_ctr; + u32 phys_val; + + phys_ctr = ctr & (NR_PHYS_CTRS - 1); + + if (cbe_get_ctr_size(cpu, phys_ctr) == 16) { + phys_val = cbe_read_phys_ctr(cpu, phys_ctr); + + if (ctr < NR_PHYS_CTRS) + val = (val << 16) | (phys_val & 0xffff); + else + val = (val & 0xffff) | (phys_val & 0xffff0000); + } + + cbe_write_phys_ctr(cpu, phys_ctr, val); +} +EXPORT_SYMBOL_GPL(cbe_write_ctr); + +/* + * Counter-control registers. + * Each "logical" counter has a corresponding control register. + */ + +u32 cbe_read_pm07_control(u32 cpu, u32 ctr) +{ + u32 pm07_control = 0; + + if (ctr < NR_CTRS) + READ_SHADOW_REG(pm07_control, pm07_control[ctr]); + + return pm07_control; +} +EXPORT_SYMBOL_GPL(cbe_read_pm07_control); + +void cbe_write_pm07_control(u32 cpu, u32 ctr, u32 val) +{ + if (ctr < NR_CTRS) + WRITE_WO_MMIO(pm07_control[ctr], val); +} +EXPORT_SYMBOL_GPL(cbe_write_pm07_control); + +/* + * Other PMU control registers. Most of these are write-only. + */ + +u32 cbe_read_pm(u32 cpu, enum pm_reg_name reg) +{ + u32 val = 0; + + switch (reg) { + case group_control: + READ_SHADOW_REG(val, group_control); + break; + + case debug_bus_control: + READ_SHADOW_REG(val, debug_bus_control); + break; + + case trace_address: + READ_MMIO_UPPER32(val, trace_address); + break; + + case ext_tr_timer: + READ_SHADOW_REG(val, ext_tr_timer); + break; + + case pm_status: + READ_MMIO_UPPER32(val, pm_status); + break; + + case pm_control: + READ_SHADOW_REG(val, pm_control); + break; + + case pm_interval: + READ_SHADOW_REG(val, pm_interval); + break; + + case pm_start_stop: + READ_SHADOW_REG(val, pm_start_stop); + break; + } + + return val; +} +EXPORT_SYMBOL_GPL(cbe_read_pm); + +void cbe_write_pm(u32 cpu, enum pm_reg_name reg, u32 val) +{ + switch (reg) { + case group_control: + WRITE_WO_MMIO(group_control, val); + break; + + case debug_bus_control: + WRITE_WO_MMIO(debug_bus_control, val); + break; + + case trace_address: + WRITE_WO_MMIO(trace_address, val); + break; + + case ext_tr_timer: + WRITE_WO_MMIO(ext_tr_timer, val); + break; + + case pm_status: + WRITE_WO_MMIO(pm_status, val); + break; + + case pm_control: + WRITE_WO_MMIO(pm_control, val); + break; + + case pm_interval: + WRITE_WO_MMIO(pm_interval, val); + break; + + case pm_start_stop: + WRITE_WO_MMIO(pm_start_stop, val); + break; + } +} +EXPORT_SYMBOL_GPL(cbe_write_pm); + +/* + * Get/set the size of a physical counter to either 16 or 32 bits. + */ + +u32 cbe_get_ctr_size(u32 cpu, u32 phys_ctr) +{ + u32 pm_ctrl, size = 0; + + if (phys_ctr < NR_PHYS_CTRS) { + pm_ctrl = cbe_read_pm(cpu, pm_control); + size = (pm_ctrl & CBE_PM_16BIT_CTR(phys_ctr)) ? 16 : 32; + } + + return size; +} +EXPORT_SYMBOL_GPL(cbe_get_ctr_size); + +void cbe_set_ctr_size(u32 cpu, u32 phys_ctr, u32 ctr_size) +{ + u32 pm_ctrl; + + if (phys_ctr < NR_PHYS_CTRS) { + pm_ctrl = cbe_read_pm(cpu, pm_control); + switch (ctr_size) { + case 16: + pm_ctrl |= CBE_PM_16BIT_CTR(phys_ctr); + break; + + case 32: + pm_ctrl &= ~CBE_PM_16BIT_CTR(phys_ctr); + break; + } + cbe_write_pm(cpu, pm_control, pm_ctrl); + } +} +EXPORT_SYMBOL_GPL(cbe_set_ctr_size); + +/* + * Enable/disable the entire performance monitoring unit. + * When we enable the PMU, all pending writes to counters get committed. + */ + +void cbe_enable_pm(u32 cpu) +{ + struct cbe_pmd_shadow_regs *shadow_regs; + u32 pm_ctrl; + + shadow_regs = cbe_get_cpu_pmd_shadow_regs(cpu); + shadow_regs->counter_value_in_latch = 0; + + pm_ctrl = cbe_read_pm(cpu, pm_control) | CBE_PM_ENABLE_PERF_MON; + cbe_write_pm(cpu, pm_control, pm_ctrl); +} +EXPORT_SYMBOL_GPL(cbe_enable_pm); + +void cbe_disable_pm(u32 cpu) +{ + u32 pm_ctrl; + pm_ctrl = cbe_read_pm(cpu, pm_control) & ~CBE_PM_ENABLE_PERF_MON; + cbe_write_pm(cpu, pm_control, pm_ctrl); +} +EXPORT_SYMBOL_GPL(cbe_disable_pm); + +/* + * Reading from the trace_buffer. + * The trace buffer is two 64-bit registers. Reading from + * the second half automatically increments the trace_address. + */ + +void cbe_read_trace_buffer(u32 cpu, u64 *buf) +{ + struct cbe_pmd_regs __iomem *pmd_regs = cbe_get_cpu_pmd_regs(cpu); + + *buf++ = in_be64(&pmd_regs->trace_buffer_0_63); + *buf++ = in_be64(&pmd_regs->trace_buffer_64_127); +} +EXPORT_SYMBOL_GPL(cbe_read_trace_buffer); + +/* + * Enabling/disabling interrupts for the entire performance monitoring unit. + */ + +u32 cbe_query_pm_interrupts(u32 cpu) +{ + return cbe_read_pm(cpu, pm_status); +} +EXPORT_SYMBOL_GPL(cbe_query_pm_interrupts); + +u32 cbe_clear_pm_interrupts(u32 cpu) +{ + /* Reading pm_status clears the interrupt bits. */ + return cbe_query_pm_interrupts(cpu); +} +EXPORT_SYMBOL_GPL(cbe_clear_pm_interrupts); + +void cbe_enable_pm_interrupts(u32 cpu, u32 thread, u32 mask) +{ + /* Set which node and thread will handle the next interrupt. */ + iic_set_interrupt_routing(cpu, thread, 0); + + /* Enable the interrupt bits in the pm_status register. */ + if (mask) + cbe_write_pm(cpu, pm_status, mask); +} +EXPORT_SYMBOL_GPL(cbe_enable_pm_interrupts); + +void cbe_disable_pm_interrupts(u32 cpu) +{ + cbe_clear_pm_interrupts(cpu); + cbe_write_pm(cpu, pm_status, 0); +} +EXPORT_SYMBOL_GPL(cbe_disable_pm_interrupts); + +static irqreturn_t cbe_pm_irq(int irq, void *dev_id) +{ + perf_irq(get_irq_regs()); + return IRQ_HANDLED; +} + +int __init cbe_init_pm_irq(void) +{ + unsigned int irq; + int rc, node; + + for_each_node(node) { + irq = irq_create_mapping(NULL, IIC_IRQ_IOEX_PMI | + (node << IIC_IRQ_NODE_SHIFT)); + if (irq == NO_IRQ) { + printk("ERROR: Unable to allocate irq for node %d\n", + node); + return -EINVAL; + } + + rc = request_irq(irq, cbe_pm_irq, + IRQF_DISABLED, "cbe-pmu-0", NULL); + if (rc) { + printk("ERROR: Request for irq on node %d failed\n", + node); + return rc; + } + } + + return 0; +} +arch_initcall(cbe_init_pm_irq); + +void cbe_sync_irq(int node) +{ + unsigned int irq; + + irq = irq_find_mapping(NULL, + IIC_IRQ_IOEX_PMI + | (node << IIC_IRQ_NODE_SHIFT)); + + if (irq == NO_IRQ) { + printk(KERN_WARNING "ERROR, unable to get existing irq %d " \ + "for node %d\n", irq, node); + return; + } + + synchronize_irq(irq); +} +EXPORT_SYMBOL_GPL(cbe_sync_irq); + diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 22c228a49c33..36989c2eee66 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -50,9 +50,10 @@ #include <asm/spu.h> #include <asm/spu_priv1.h> #include <asm/udbg.h> +#include <asm/mpic.h> +#include <asm/of_platform.h> #include "interrupt.h" -#include "iommu.h" #include "cbe_regs.h" #include "pervasive.h" #include "ras.h" @@ -80,24 +81,72 @@ static void cell_progress(char *s, unsigned short hex) printk("*** %04x : %s\n", hex, s ? s : ""); } -static void __init cell_pcibios_fixup(void) +static int __init cell_publish_devices(void) { - struct pci_dev *dev = NULL; + if (!machine_is(cell)) + return 0; + + /* Publish OF platform devices for southbridge IOs */ + of_platform_bus_probe(NULL, NULL, NULL); + + return 0; +} +device_initcall(cell_publish_devices); + +static void cell_mpic_cascade(unsigned int irq, struct irq_desc *desc) +{ + struct mpic *mpic = desc->handler_data; + unsigned int virq; + + virq = mpic_get_one_irq(mpic); + if (virq != NO_IRQ) + generic_handle_irq(virq); + desc->chip->eoi(irq); +} - for_each_pci_dev(dev) - pci_read_irq_line(dev); +static void __init mpic_init_IRQ(void) +{ + struct device_node *dn; + struct mpic *mpic; + unsigned int virq; + + for (dn = NULL; + (dn = of_find_node_by_name(dn, "interrupt-controller"));) { + if (!device_is_compatible(dn, "CBEA,platform-open-pic")) + continue; + + /* The MPIC driver will get everything it needs from the + * device-tree, just pass 0 to all arguments + */ + mpic = mpic_alloc(dn, 0, 0, 0, 0, " MPIC "); + if (mpic == NULL) + continue; + mpic_init(mpic); + + virq = irq_of_parse_and_map(dn, 0); + if (virq == NO_IRQ) + continue; + + printk(KERN_INFO "%s : hooking up to IRQ %d\n", + dn->full_name, virq); + set_irq_data(virq, mpic); + set_irq_chained_handler(virq, cell_mpic_cascade); + } } + static void __init cell_init_irq(void) { iic_init_IRQ(); spider_init_IRQ(); + mpic_init_IRQ(); } static void __init cell_setup_arch(void) { #ifdef CONFIG_SPU_BASE - spu_priv1_ops = &spu_priv1_mmio_ops; + spu_priv1_ops = &spu_priv1_mmio_ops; + spu_management_ops = &spu_management_of_ops; #endif cbe_regs_init(); @@ -109,7 +158,6 @@ static void __init cell_setup_arch(void) #ifdef CONFIG_SMP smp_init_cell(); #endif - /* init to some ~sane value until calibrate_delay() runs */ loops_per_jiffy = 50000000; @@ -129,19 +177,6 @@ static void __init cell_setup_arch(void) mmio_nvram_init(); } -/* - * Early initialization. Relocation is on but do not reference unbolted pages - */ -static void __init cell_init_early(void) -{ - DBG(" -> cell_init_early()\n"); - - cell_init_iommu(); - - DBG(" <- cell_init_early()\n"); -} - - static int __init cell_probe(void) { unsigned long root = of_get_flat_dt_root(); @@ -168,7 +203,6 @@ define_machine(cell) { .name = "Cell", .probe = cell_probe, .setup_arch = cell_setup_arch, - .init_early = cell_init_early, .show_cpuinfo = cell_show_cpuinfo, .restart = rtas_restart, .power_off = rtas_power_off, @@ -180,7 +214,7 @@ define_machine(cell) { .check_legacy_ioport = cell_check_legacy_ioport, .progress = cell_progress, .init_IRQ = cell_init_irq, - .pcibios_fixup = cell_pcibios_fixup, + .pci_setup_phb = rtas_setup_phb, #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 7aa809d5a244..bd7bffc3ddd0 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -25,22 +25,17 @@ #include <linux/interrupt.h> #include <linux/list.h> #include <linux/module.h> -#include <linux/pci.h> -#include <linux/poll.h> #include <linux/ptrace.h> #include <linux/slab.h> #include <linux/wait.h> - -#include <asm/firmware.h> -#include <asm/io.h> -#include <asm/prom.h> +#include <linux/mm.h> +#include <linux/io.h> #include <linux/mutex.h> #include <asm/spu.h> #include <asm/spu_priv1.h> -#include <asm/mmu_context.h> - -#include "interrupt.h" +#include <asm/xmon.h> +const struct spu_management_ops *spu_management_ops; const struct spu_priv1_ops *spu_priv1_ops; EXPORT_SYMBOL_GPL(spu_priv1_ops); @@ -89,7 +84,30 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) printk("%s: invalid access during switch!\n", __func__); return 1; } - if (!mm || (REGION_ID(ea) != USER_REGION_ID)) { + esid = (ea & ESID_MASK) | SLB_ESID_V; + + switch(REGION_ID(ea)) { + case USER_REGION_ID: +#ifdef CONFIG_HUGETLB_PAGE + if (in_hugepage_area(mm->context, ea)) + llp = mmu_psize_defs[mmu_huge_psize].sllp; + else +#endif + llp = mmu_psize_defs[mmu_virtual_psize].sllp; + vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | + SLB_VSID_USER | llp; + break; + case VMALLOC_REGION_ID: + llp = mmu_psize_defs[mmu_virtual_psize].sllp; + vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | + SLB_VSID_KERNEL | llp; + break; + case KERNEL_REGION_ID: + llp = mmu_psize_defs[mmu_linear_psize].sllp; + vsid = (get_kernel_vsid(ea) << SLB_VSID_SHIFT) | + SLB_VSID_KERNEL | llp; + break; + default: /* Future: support kernel segments so that drivers * can use SPUs. */ @@ -97,16 +115,6 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) return 1; } - esid = (ea & ESID_MASK) | SLB_ESID_V; -#ifdef CONFIG_HUGETLB_PAGE - if (in_hugepage_area(mm->context, ea)) - llp = mmu_psize_defs[mmu_huge_psize].sllp; - else -#endif - llp = mmu_psize_defs[mmu_virtual_psize].sllp; - vsid = (get_vsid(mm->context.id, ea) << SLB_VSID_SHIFT) | - SLB_VSID_USER | llp; - out_be64(&priv2->slb_index_W, spu->slb_replace); out_be64(&priv2->slb_vsid_RW, vsid); out_be64(&priv2->slb_esid_RW, esid); @@ -320,6 +328,7 @@ static void spu_free_irqs(struct spu *spu) } static struct list_head spu_list[MAX_NUMNODES]; +static LIST_HEAD(spu_full_list); static DEFINE_MUTEX(spu_mutex); static void spu_init_channels(struct spu *spu) @@ -364,8 +373,7 @@ struct spu *spu_alloc_node(int node) if (!list_empty(&spu_list[node])) { spu = list_entry(spu_list[node].next, struct spu, list); list_del_init(&spu->list); - pr_debug("Got SPU %x %d %d\n", - spu->isrc, spu->number, spu->node); + pr_debug("Got SPU %d %d\n", spu->number, spu->node); spu_init_channels(spu); } mutex_unlock(&spu_mutex); @@ -493,280 +501,65 @@ int spu_irq_class_1_bottom(struct spu *spu) if (!error) { spu_restart_dma(spu); } else { - __spu_trap_invalid_dma(spu); + spu->dma_callback(spu, SPE_EVENT_SPE_DATA_STORAGE); } return ret; } -static int __init find_spu_node_id(struct device_node *spe) -{ - const unsigned int *id; - struct device_node *cpu; - cpu = spe->parent->parent; - id = get_property(cpu, "node-id", NULL); - return id ? *id : 0; -} - -static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe, - const char *prop) -{ - static DEFINE_MUTEX(add_spumem_mutex); - - const struct address_prop { - unsigned long address; - unsigned int len; - } __attribute__((packed)) *p; - int proplen; - - unsigned long start_pfn, nr_pages; - struct pglist_data *pgdata; - struct zone *zone; - int ret; - - p = get_property(spe, prop, &proplen); - WARN_ON(proplen != sizeof (*p)); - - start_pfn = p->address >> PAGE_SHIFT; - nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT; - - pgdata = NODE_DATA(spu->nid); - zone = pgdata->node_zones; - - /* XXX rethink locking here */ - mutex_lock(&add_spumem_mutex); - ret = __add_pages(zone, start_pfn, nr_pages); - mutex_unlock(&add_spumem_mutex); - - return ret; -} +struct sysdev_class spu_sysdev_class = { + set_kset_name("spu") +}; -static void __iomem * __init map_spe_prop(struct spu *spu, - struct device_node *n, const char *name) +int spu_add_sysdev_attr(struct sysdev_attribute *attr) { - const struct address_prop { - unsigned long address; - unsigned int len; - } __attribute__((packed)) *prop; - - const void *p; - int proplen; - void __iomem *ret = NULL; - int err = 0; - - p = get_property(n, name, &proplen); - if (proplen != sizeof (struct address_prop)) - return NULL; - - prop = p; - - err = cell_spuprop_present(spu, n, name); - if (err && (err != -EEXIST)) - goto out; - - ret = ioremap(prop->address, prop->len); - - out: - return ret; -} + struct spu *spu; + mutex_lock(&spu_mutex); -static void spu_unmap(struct spu *spu) -{ - iounmap(spu->priv2); - iounmap(spu->priv1); - iounmap(spu->problem); - iounmap((__force u8 __iomem *)spu->local_store); -} + list_for_each_entry(spu, &spu_full_list, full_list) + sysdev_create_file(&spu->sysdev, attr); -/* This function shall be abstracted for HV platforms */ -static int __init spu_map_interrupts_old(struct spu *spu, struct device_node *np) -{ - unsigned int isrc; - const u32 *tmp; - - /* Get the interrupt source unit from the device-tree */ - tmp = get_property(np, "isrc", NULL); - if (!tmp) - return -ENODEV; - isrc = tmp[0]; - - /* Add the node number */ - isrc |= spu->node << IIC_IRQ_NODE_SHIFT; - spu->isrc = isrc; - - /* Now map interrupts of all 3 classes */ - spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc); - spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc); - spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc); - - /* Right now, we only fail if class 2 failed */ - return spu->irqs[2] == NO_IRQ ? -EINVAL : 0; + mutex_unlock(&spu_mutex); + return 0; } +EXPORT_SYMBOL_GPL(spu_add_sysdev_attr); -static int __init spu_map_device_old(struct spu *spu, struct device_node *node) +int spu_add_sysdev_attr_group(struct attribute_group *attrs) { - const char *prop; - int ret; - - ret = -ENODEV; - spu->name = get_property(node, "name", NULL); - if (!spu->name) - goto out; - - prop = get_property(node, "local-store", NULL); - if (!prop) - goto out; - spu->local_store_phys = *(unsigned long *)prop; - - /* we use local store as ram, not io memory */ - spu->local_store = (void __force *) - map_spe_prop(spu, node, "local-store"); - if (!spu->local_store) - goto out; - - prop = get_property(node, "problem", NULL); - if (!prop) - goto out_unmap; - spu->problem_phys = *(unsigned long *)prop; - - spu->problem= map_spe_prop(spu, node, "problem"); - if (!spu->problem) - goto out_unmap; - - spu->priv1= map_spe_prop(spu, node, "priv1"); - /* priv1 is not available on a hypervisor */ - - spu->priv2= map_spe_prop(spu, node, "priv2"); - if (!spu->priv2) - goto out_unmap; - ret = 0; - goto out; - -out_unmap: - spu_unmap(spu); -out: - return ret; -} + struct spu *spu; + mutex_lock(&spu_mutex); -static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) -{ - struct of_irq oirq; - int ret; - int i; + list_for_each_entry(spu, &spu_full_list, full_list) + sysfs_create_group(&spu->sysdev.kobj, attrs); - for (i=0; i < 3; i++) { - ret = of_irq_map_one(np, i, &oirq); - if (ret) { - pr_debug("spu_new: failed to get irq %d\n", i); - goto err; - } - ret = -EINVAL; - pr_debug(" irq %d no 0x%x on %s\n", i, oirq.specifier[0], - oirq.controller->full_name); - spu->irqs[i] = irq_create_of_mapping(oirq.controller, - oirq.specifier, oirq.size); - if (spu->irqs[i] == NO_IRQ) { - pr_debug("spu_new: failed to map it !\n"); - goto err; - } - } + mutex_unlock(&spu_mutex); return 0; - -err: - pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier, spu->name); - for (; i >= 0; i--) { - if (spu->irqs[i] != NO_IRQ) - irq_dispose_mapping(spu->irqs[i]); - } - return ret; } +EXPORT_SYMBOL_GPL(spu_add_sysdev_attr_group); -static int spu_map_resource(struct device_node *node, int nr, - void __iomem** virt, unsigned long *phys) -{ - struct resource resource = { }; - int ret; - - ret = of_address_to_resource(node, nr, &resource); - if (ret) - goto out; - if (phys) - *phys = resource.start; - *virt = ioremap(resource.start, resource.end - resource.start); - if (!*virt) - ret = -EINVAL; - -out: - return ret; -} - -static int __init spu_map_device(struct spu *spu, struct device_node *node) +void spu_remove_sysdev_attr(struct sysdev_attribute *attr) { - int ret = -ENODEV; - spu->name = get_property(node, "name", NULL); - if (!spu->name) - goto out; - - ret = spu_map_resource(node, 0, (void __iomem**)&spu->local_store, - &spu->local_store_phys); - if (ret) { - pr_debug("spu_new: failed to map %s resource 0\n", - node->full_name); - goto out; - } - ret = spu_map_resource(node, 1, (void __iomem**)&spu->problem, - &spu->problem_phys); - if (ret) { - pr_debug("spu_new: failed to map %s resource 1\n", - node->full_name); - goto out_unmap; - } - ret = spu_map_resource(node, 2, (void __iomem**)&spu->priv2, - NULL); - if (ret) { - pr_debug("spu_new: failed to map %s resource 2\n", - node->full_name); - goto out_unmap; - } - - if (!firmware_has_feature(FW_FEATURE_LPAR)) - ret = spu_map_resource(node, 3, (void __iomem**)&spu->priv1, - NULL); - if (ret) { - pr_debug("spu_new: failed to map %s resource 3\n", - node->full_name); - goto out_unmap; - } - pr_debug("spu_new: %s maps:\n", node->full_name); - pr_debug(" local store : 0x%016lx -> 0x%p\n", - spu->local_store_phys, spu->local_store); - pr_debug(" problem state : 0x%016lx -> 0x%p\n", - spu->problem_phys, spu->problem); - pr_debug(" priv2 : 0x%p\n", spu->priv2); - pr_debug(" priv1 : 0x%p\n", spu->priv1); + struct spu *spu; + mutex_lock(&spu_mutex); - return 0; + list_for_each_entry(spu, &spu_full_list, full_list) + sysdev_remove_file(&spu->sysdev, attr); -out_unmap: - spu_unmap(spu); -out: - pr_debug("failed to map spe %s: %d\n", spu->name, ret); - return ret; + mutex_unlock(&spu_mutex); } +EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr); -struct sysdev_class spu_sysdev_class = { - set_kset_name("spu") -}; - -static ssize_t spu_show_isrc(struct sys_device *sysdev, char *buf) +void spu_remove_sysdev_attr_group(struct attribute_group *attrs) { - struct spu *spu = container_of(sysdev, struct spu, sysdev); - return sprintf(buf, "%d\n", spu->isrc); + struct spu *spu; + mutex_lock(&spu_mutex); -} -static SYSDEV_ATTR(isrc, 0400, spu_show_isrc, NULL); + list_for_each_entry(spu, &spu_full_list, full_list) + sysfs_remove_group(&spu->sysdev.kobj, attrs); -extern int attach_sysdev_to_node(struct sys_device *dev, int nid); + mutex_unlock(&spu_mutex); +} +EXPORT_SYMBOL_GPL(spu_remove_sysdev_attr_group); static int spu_create_sysdev(struct spu *spu) { @@ -781,21 +574,18 @@ static int spu_create_sysdev(struct spu *spu) return ret; } - if (spu->isrc != 0) - sysdev_create_file(&spu->sysdev, &attr_isrc); - sysfs_add_device_to_node(&spu->sysdev, spu->nid); + sysfs_add_device_to_node(&spu->sysdev, spu->node); return 0; } static void spu_destroy_sysdev(struct spu *spu) { - sysdev_remove_file(&spu->sysdev, &attr_isrc); - sysfs_remove_device_from_node(&spu->sysdev, spu->nid); + sysfs_remove_device_from_node(&spu->sysdev, spu->node); sysdev_unregister(&spu->sysdev); } -static int __init create_spu(struct device_node *spe) +static int __init create_spu(void *data) { struct spu *spu; int ret; @@ -806,57 +596,37 @@ static int __init create_spu(struct device_node *spe) if (!spu) goto out; - spu->node = find_spu_node_id(spe); - if (spu->node >= MAX_NUMNODES) { - printk(KERN_WARNING "SPE %s on node %d ignored," - " node number too big\n", spe->full_name, spu->node); - printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n"); - return -ENODEV; - } - spu->nid = of_node_to_nid(spe); - if (spu->nid == -1) - spu->nid = 0; + spin_lock_init(&spu->register_lock); + mutex_lock(&spu_mutex); + spu->number = number++; + mutex_unlock(&spu_mutex); + + ret = spu_create_spu(spu, data); - ret = spu_map_device(spu, spe); - /* try old method */ - if (ret) - ret = spu_map_device_old(spu, spe); if (ret) goto out_free; - ret = spu_map_interrupts(spu, spe); - if (ret) - ret = spu_map_interrupts_old(spu, spe); - if (ret) - goto out_unmap; - spin_lock_init(&spu->register_lock); - spu_mfc_sdr_set(spu, mfspr(SPRN_SDR1)); + spu_mfc_sdr_setup(spu); spu_mfc_sr1_set(spu, 0x33); - mutex_lock(&spu_mutex); - - spu->number = number++; ret = spu_request_irqs(spu); if (ret) - goto out_unlock; + goto out_destroy; ret = spu_create_sysdev(spu); if (ret) goto out_free_irqs; + mutex_lock(&spu_mutex); list_add(&spu->list, &spu_list[spu->node]); + list_add(&spu->full_list, &spu_full_list); mutex_unlock(&spu_mutex); - pr_debug(KERN_DEBUG "Using SPE %s %02x %p %p %p %p %d\n", - spu->name, spu->isrc, spu->local_store, - spu->problem, spu->priv1, spu->priv2, spu->number); goto out; out_free_irqs: spu_free_irqs(spu); -out_unlock: - mutex_unlock(&spu_mutex); -out_unmap: - spu_unmap(spu); +out_destroy: + spu_destroy_spu(spu); out_free: kfree(spu); out: @@ -866,10 +636,11 @@ out: static void destroy_spu(struct spu *spu) { list_del_init(&spu->list); + list_del_init(&spu->full_list); spu_destroy_sysdev(spu); spu_free_irqs(spu); - spu_unmap(spu); + spu_destroy_spu(spu); kfree(spu); } @@ -890,9 +661,11 @@ module_exit(cleanup_spu_base); static int __init init_spu_base(void) { - struct device_node *node; int i, ret; + if (!spu_management_ops) + return 0; + /* create sysdev class for spus */ ret = sysdev_class_register(&spu_sysdev_class); if (ret) @@ -901,17 +674,17 @@ static int __init init_spu_base(void) for (i = 0; i < MAX_NUMNODES; i++) INIT_LIST_HEAD(&spu_list[i]); - ret = -ENODEV; - for (node = of_find_node_by_type(NULL, "spe"); - node; node = of_find_node_by_type(node, "spe")) { - ret = create_spu(node); - if (ret) { - printk(KERN_WARNING "%s: Error initializing %s\n", - __FUNCTION__, node->name); - cleanup_spu_base(); - break; - } + ret = spu_enumerate_spus(create_spu); + + if (ret) { + printk(KERN_WARNING "%s: Error initializing spus\n", + __FUNCTION__); + cleanup_spu_base(); + return ret; } + + xmon_register_spus(&spu_full_list); + return ret; } module_init(init_spu_base); diff --git a/arch/powerpc/platforms/cell/spu_coredump.c b/arch/powerpc/platforms/cell/spu_coredump.c new file mode 100644 index 000000000000..6915b418ee73 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_coredump.c @@ -0,0 +1,81 @@ +/* + * SPU core dump code + * + * (C) Copyright 2006 IBM Corp. + * + * Author: Dwayne Grant McConnell <decimal@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/file.h> +#include <linux/module.h> +#include <linux/syscalls.h> + +#include <asm/spu.h> + +static struct spu_coredump_calls spu_coredump_calls; +static DEFINE_MUTEX(spu_coredump_mutex); + +int arch_notes_size(void) +{ + long ret; + struct module *owner = spu_coredump_calls.owner; + + ret = -ENOSYS; + mutex_lock(&spu_coredump_mutex); + if (owner && try_module_get(owner)) { + ret = spu_coredump_calls.arch_notes_size(); + module_put(owner); + } + mutex_unlock(&spu_coredump_mutex); + return ret; +} + +void arch_write_notes(struct file *file) +{ + struct module *owner = spu_coredump_calls.owner; + + mutex_lock(&spu_coredump_mutex); + if (owner && try_module_get(owner)) { + spu_coredump_calls.arch_write_notes(file); + module_put(owner); + } + mutex_unlock(&spu_coredump_mutex); +} + +int register_arch_coredump_calls(struct spu_coredump_calls *calls) +{ + if (spu_coredump_calls.owner) + return -EBUSY; + + mutex_lock(&spu_coredump_mutex); + spu_coredump_calls.arch_notes_size = calls->arch_notes_size; + spu_coredump_calls.arch_write_notes = calls->arch_write_notes; + spu_coredump_calls.owner = calls->owner; + mutex_unlock(&spu_coredump_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(register_arch_coredump_calls); + +void unregister_arch_coredump_calls(struct spu_coredump_calls *calls) +{ + BUG_ON(spu_coredump_calls.owner != calls->owner); + + mutex_lock(&spu_coredump_mutex); + spu_coredump_calls.owner = NULL; + mutex_unlock(&spu_coredump_mutex); +} +EXPORT_SYMBOL_GPL(unregister_arch_coredump_calls); diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c index 71b69f0a1a48..a5de0430c56d 100644 --- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c @@ -18,120 +18,498 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include <linux/interrupt.h> +#include <linux/list.h> #include <linux/module.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/wait.h> +#include <linux/mm.h> +#include <linux/io.h> +#include <linux/mutex.h> +#include <linux/device.h> -#include <asm/io.h> #include <asm/spu.h> #include <asm/spu_priv1.h> +#include <asm/firmware.h> +#include <asm/prom.h> #include "interrupt.h" +#include "spu_priv1_mmio.h" + +struct spu_pdata { + int nid; + struct device_node *devnode; + struct spu_priv1 __iomem *priv1; +}; + +static struct spu_pdata *spu_get_pdata(struct spu *spu) +{ + BUG_ON(!spu->pdata); + return spu->pdata; +} + +struct device_node *spu_devnode(struct spu *spu) +{ + return spu_get_pdata(spu)->devnode; +} + +EXPORT_SYMBOL_GPL(spu_devnode); + +static int __init find_spu_node_id(struct device_node *spe) +{ + const unsigned int *id; + struct device_node *cpu; + cpu = spe->parent->parent; + id = get_property(cpu, "node-id", NULL); + return id ? *id : 0; +} + +static int __init cell_spuprop_present(struct spu *spu, struct device_node *spe, + const char *prop) +{ + static DEFINE_MUTEX(add_spumem_mutex); + + const struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *p; + int proplen; + + unsigned long start_pfn, nr_pages; + struct pglist_data *pgdata; + struct zone *zone; + int ret; + + p = get_property(spe, prop, &proplen); + WARN_ON(proplen != sizeof (*p)); + + start_pfn = p->address >> PAGE_SHIFT; + nr_pages = ((unsigned long)p->len + PAGE_SIZE - 1) >> PAGE_SHIFT; + + pgdata = NODE_DATA(spu_get_pdata(spu)->nid); + zone = pgdata->node_zones; + + /* XXX rethink locking here */ + mutex_lock(&add_spumem_mutex); + ret = __add_pages(zone, start_pfn, nr_pages); + mutex_unlock(&add_spumem_mutex); + + return ret; +} + +static void __iomem * __init map_spe_prop(struct spu *spu, + struct device_node *n, const char *name) +{ + const struct address_prop { + unsigned long address; + unsigned int len; + } __attribute__((packed)) *prop; + + const void *p; + int proplen; + void __iomem *ret = NULL; + int err = 0; + + p = get_property(n, name, &proplen); + if (proplen != sizeof (struct address_prop)) + return NULL; + + prop = p; + + err = cell_spuprop_present(spu, n, name); + if (err && (err != -EEXIST)) + goto out; + + ret = ioremap(prop->address, prop->len); + + out: + return ret; +} + +static void spu_unmap(struct spu *spu) +{ + iounmap(spu->priv2); + iounmap(spu_get_pdata(spu)->priv1); + iounmap(spu->problem); + iounmap((__force u8 __iomem *)spu->local_store); +} + +static int __init spu_map_interrupts_old(struct spu *spu, + struct device_node *np) +{ + unsigned int isrc; + const u32 *tmp; + + /* Get the interrupt source unit from the device-tree */ + tmp = get_property(np, "isrc", NULL); + if (!tmp) + return -ENODEV; + isrc = tmp[0]; + + /* Add the node number */ + isrc |= spu->node << IIC_IRQ_NODE_SHIFT; + + /* Now map interrupts of all 3 classes */ + spu->irqs[0] = irq_create_mapping(NULL, IIC_IRQ_CLASS_0 | isrc); + spu->irqs[1] = irq_create_mapping(NULL, IIC_IRQ_CLASS_1 | isrc); + spu->irqs[2] = irq_create_mapping(NULL, IIC_IRQ_CLASS_2 | isrc); + + /* Right now, we only fail if class 2 failed */ + return spu->irqs[2] == NO_IRQ ? -EINVAL : 0; +} + +static int __init spu_map_device_old(struct spu *spu, struct device_node *node) +{ + const char *prop; + int ret; + + ret = -ENODEV; + spu->name = get_property(node, "name", NULL); + if (!spu->name) + goto out; + + prop = get_property(node, "local-store", NULL); + if (!prop) + goto out; + spu->local_store_phys = *(unsigned long *)prop; + + /* we use local store as ram, not io memory */ + spu->local_store = (void __force *) + map_spe_prop(spu, node, "local-store"); + if (!spu->local_store) + goto out; + + prop = get_property(node, "problem", NULL); + if (!prop) + goto out_unmap; + spu->problem_phys = *(unsigned long *)prop; + + spu->problem= map_spe_prop(spu, node, "problem"); + if (!spu->problem) + goto out_unmap; + + spu_get_pdata(spu)->priv1= map_spe_prop(spu, node, "priv1"); + + spu->priv2= map_spe_prop(spu, node, "priv2"); + if (!spu->priv2) + goto out_unmap; + ret = 0; + goto out; + +out_unmap: + spu_unmap(spu); +out: + return ret; +} + +static int __init spu_map_interrupts(struct spu *spu, struct device_node *np) +{ + struct of_irq oirq; + int ret; + int i; + + for (i=0; i < 3; i++) { + ret = of_irq_map_one(np, i, &oirq); + if (ret) { + pr_debug("spu_new: failed to get irq %d\n", i); + goto err; + } + ret = -EINVAL; + pr_debug(" irq %d no 0x%x on %s\n", i, oirq.specifier[0], + oirq.controller->full_name); + spu->irqs[i] = irq_create_of_mapping(oirq.controller, + oirq.specifier, oirq.size); + if (spu->irqs[i] == NO_IRQ) { + pr_debug("spu_new: failed to map it !\n"); + goto err; + } + } + return 0; + +err: + pr_debug("failed to map irq %x for spu %s\n", *oirq.specifier, + spu->name); + for (; i >= 0; i--) { + if (spu->irqs[i] != NO_IRQ) + irq_dispose_mapping(spu->irqs[i]); + } + return ret; +} + +static int spu_map_resource(struct device_node *node, int nr, + void __iomem** virt, unsigned long *phys) +{ + struct resource resource = { }; + int ret; + + ret = of_address_to_resource(node, nr, &resource); + if (ret) + goto out; + + if (phys) + *phys = resource.start; + *virt = ioremap(resource.start, resource.end - resource.start); + if (!*virt) + ret = -EINVAL; + +out: + return ret; +} + +static int __init spu_map_device(struct spu *spu, struct device_node *node) +{ + int ret = -ENODEV; + spu->name = get_property(node, "name", NULL); + if (!spu->name) + goto out; + + ret = spu_map_resource(node, 0, (void __iomem**)&spu->local_store, + &spu->local_store_phys); + if (ret) { + pr_debug("spu_new: failed to map %s resource 0\n", + node->full_name); + goto out; + } + ret = spu_map_resource(node, 1, (void __iomem**)&spu->problem, + &spu->problem_phys); + if (ret) { + pr_debug("spu_new: failed to map %s resource 1\n", + node->full_name); + goto out_unmap; + } + ret = spu_map_resource(node, 2, (void __iomem**)&spu->priv2, + NULL); + if (ret) { + pr_debug("spu_new: failed to map %s resource 2\n", + node->full_name); + goto out_unmap; + } + if (!firmware_has_feature(FW_FEATURE_LPAR)) + ret = spu_map_resource(node, 3, + (void __iomem**)&spu_get_pdata(spu)->priv1, NULL); + if (ret) { + pr_debug("spu_new: failed to map %s resource 3\n", + node->full_name); + goto out_unmap; + } + pr_debug("spu_new: %s maps:\n", node->full_name); + pr_debug(" local store : 0x%016lx -> 0x%p\n", + spu->local_store_phys, spu->local_store); + pr_debug(" problem state : 0x%016lx -> 0x%p\n", + spu->problem_phys, spu->problem); + pr_debug(" priv2 : 0x%p\n", spu->priv2); + pr_debug(" priv1 : 0x%p\n", + spu_get_pdata(spu)->priv1); + + return 0; + +out_unmap: + spu_unmap(spu); +out: + pr_debug("failed to map spe %s: %d\n", spu->name, ret); + return ret; +} + +static int __init of_enumerate_spus(int (*fn)(void *data)) +{ + int ret; + struct device_node *node; + + ret = -ENODEV; + for (node = of_find_node_by_type(NULL, "spe"); + node; node = of_find_node_by_type(node, "spe")) { + ret = fn(node); + if (ret) { + printk(KERN_WARNING "%s: Error initializing %s\n", + __FUNCTION__, node->name); + break; + } + } + return ret; +} + +static int __init of_create_spu(struct spu *spu, void *data) +{ + int ret; + struct device_node *spe = (struct device_node *)data; + + spu->pdata = kzalloc(sizeof(struct spu_pdata), + GFP_KERNEL); + if (!spu->pdata) { + ret = -ENOMEM; + goto out; + } + + spu->node = find_spu_node_id(spe); + if (spu->node >= MAX_NUMNODES) { + printk(KERN_WARNING "SPE %s on node %d ignored," + " node number too big\n", spe->full_name, spu->node); + printk(KERN_WARNING "Check if CONFIG_NUMA is enabled.\n"); + ret = -ENODEV; + goto out_free; + } + + spu_get_pdata(spu)->nid = of_node_to_nid(spe); + if (spu_get_pdata(spu)->nid == -1) + spu_get_pdata(spu)->nid = 0; + + ret = spu_map_device(spu, spe); + /* try old method */ + if (ret) + ret = spu_map_device_old(spu, spe); + if (ret) + goto out_free; + + ret = spu_map_interrupts(spu, spe); + if (ret) + ret = spu_map_interrupts_old(spu, spe); + if (ret) + goto out_unmap; + + spu_get_pdata(spu)->devnode = of_node_get(spe); + + pr_debug(KERN_DEBUG "Using SPE %s %p %p %p %p %d\n", spu->name, + spu->local_store, spu->problem, spu_get_pdata(spu)->priv1, + spu->priv2, spu->number); + goto out; + +out_unmap: + spu_unmap(spu); +out_free: + kfree(spu->pdata); + spu->pdata = NULL; +out: + return ret; +} + +static int of_destroy_spu(struct spu *spu) +{ + spu_unmap(spu); + of_node_put(spu_get_pdata(spu)->devnode); + kfree(spu->pdata); + spu->pdata = NULL; + return 0; +} + +const struct spu_management_ops spu_management_of_ops = { + .enumerate_spus = of_enumerate_spus, + .create_spu = of_create_spu, + .destroy_spu = of_destroy_spu, +}; static void int_mask_and(struct spu *spu, int class, u64 mask) { u64 old_mask; - old_mask = in_be64(&spu->priv1->int_mask_RW[class]); - out_be64(&spu->priv1->int_mask_RW[class], old_mask & mask); + old_mask = in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]); + out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class], + old_mask & mask); } static void int_mask_or(struct spu *spu, int class, u64 mask) { u64 old_mask; - old_mask = in_be64(&spu->priv1->int_mask_RW[class]); - out_be64(&spu->priv1->int_mask_RW[class], old_mask | mask); + old_mask = in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]); + out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class], + old_mask | mask); } static void int_mask_set(struct spu *spu, int class, u64 mask) { - out_be64(&spu->priv1->int_mask_RW[class], mask); + out_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class], mask); } static u64 int_mask_get(struct spu *spu, int class) { - return in_be64(&spu->priv1->int_mask_RW[class]); + return in_be64(&spu_get_pdata(spu)->priv1->int_mask_RW[class]); } static void int_stat_clear(struct spu *spu, int class, u64 stat) { - out_be64(&spu->priv1->int_stat_RW[class], stat); + out_be64(&spu_get_pdata(spu)->priv1->int_stat_RW[class], stat); } static u64 int_stat_get(struct spu *spu, int class) { - return in_be64(&spu->priv1->int_stat_RW[class]); + return in_be64(&spu_get_pdata(spu)->priv1->int_stat_RW[class]); } static void cpu_affinity_set(struct spu *spu, int cpu) { u64 target = iic_get_target_id(cpu); u64 route = target << 48 | target << 32 | target << 16; - out_be64(&spu->priv1->int_route_RW, route); + out_be64(&spu_get_pdata(spu)->priv1->int_route_RW, route); } static u64 mfc_dar_get(struct spu *spu) { - return in_be64(&spu->priv1->mfc_dar_RW); + return in_be64(&spu_get_pdata(spu)->priv1->mfc_dar_RW); } static u64 mfc_dsisr_get(struct spu *spu) { - return in_be64(&spu->priv1->mfc_dsisr_RW); + return in_be64(&spu_get_pdata(spu)->priv1->mfc_dsisr_RW); } static void mfc_dsisr_set(struct spu *spu, u64 dsisr) { - out_be64(&spu->priv1->mfc_dsisr_RW, dsisr); + out_be64(&spu_get_pdata(spu)->priv1->mfc_dsisr_RW, dsisr); } -static void mfc_sdr_set(struct spu *spu, u64 sdr) +static void mfc_sdr_setup(struct spu *spu) { - out_be64(&spu->priv1->mfc_sdr_RW, sdr); + out_be64(&spu_get_pdata(spu)->priv1->mfc_sdr_RW, mfspr(SPRN_SDR1)); } static void mfc_sr1_set(struct spu *spu, u64 sr1) { - out_be64(&spu->priv1->mfc_sr1_RW, sr1); + out_be64(&spu_get_pdata(spu)->priv1->mfc_sr1_RW, sr1); } static u64 mfc_sr1_get(struct spu *spu) { - return in_be64(&spu->priv1->mfc_sr1_RW); + return in_be64(&spu_get_pdata(spu)->priv1->mfc_sr1_RW); } static void mfc_tclass_id_set(struct spu *spu, u64 tclass_id) { - out_be64(&spu->priv1->mfc_tclass_id_RW, tclass_id); + out_be64(&spu_get_pdata(spu)->priv1->mfc_tclass_id_RW, tclass_id); } static u64 mfc_tclass_id_get(struct spu *spu) { - return in_be64(&spu->priv1->mfc_tclass_id_RW); + return in_be64(&spu_get_pdata(spu)->priv1->mfc_tclass_id_RW); } static void tlb_invalidate(struct spu *spu) { - out_be64(&spu->priv1->tlb_invalidate_entry_W, 0ul); + out_be64(&spu_get_pdata(spu)->priv1->tlb_invalidate_entry_W, 0ul); } static void resource_allocation_groupID_set(struct spu *spu, u64 id) { - out_be64(&spu->priv1->resource_allocation_groupID_RW, id); + out_be64(&spu_get_pdata(spu)->priv1->resource_allocation_groupID_RW, + id); } static u64 resource_allocation_groupID_get(struct spu *spu) { - return in_be64(&spu->priv1->resource_allocation_groupID_RW); + return in_be64( + &spu_get_pdata(spu)->priv1->resource_allocation_groupID_RW); } static void resource_allocation_enable_set(struct spu *spu, u64 enable) { - out_be64(&spu->priv1->resource_allocation_enable_RW, enable); + out_be64(&spu_get_pdata(spu)->priv1->resource_allocation_enable_RW, + enable); } static u64 resource_allocation_enable_get(struct spu *spu) { - return in_be64(&spu->priv1->resource_allocation_enable_RW); + return in_be64( + &spu_get_pdata(spu)->priv1->resource_allocation_enable_RW); } const struct spu_priv1_ops spu_priv1_mmio_ops = @@ -146,7 +524,7 @@ const struct spu_priv1_ops spu_priv1_mmio_ops = .mfc_dar_get = mfc_dar_get, .mfc_dsisr_get = mfc_dsisr_get, .mfc_dsisr_set = mfc_dsisr_set, - .mfc_sdr_set = mfc_sdr_set, + .mfc_sdr_setup = mfc_sdr_setup, .mfc_sr1_set = mfc_sr1_set, .mfc_sr1_get = mfc_sr1_get, .mfc_tclass_id_set = mfc_tclass_id_set, diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.h b/arch/powerpc/platforms/cell/spu_priv1_mmio.h new file mode 100644 index 000000000000..7b62bd1cc256 --- /dev/null +++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.h @@ -0,0 +1,26 @@ +/* + * spu hypervisor abstraction for direct hardware access. + * + * Copyright (C) 2006 Sony Computer Entertainment Inc. + * Copyright 2006 Sony Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef SPU_PRIV1_MMIO_H +#define SPU_PRIV1_MMIO_H + +struct device_node *spu_devnode(struct spu *spu); + +#endif /* SPU_PRIV1_MMIO_H */ diff --git a/arch/powerpc/platforms/cell/spufs/Makefile b/arch/powerpc/platforms/cell/spufs/Makefile index ecdfbb35f82e..472217d19faf 100644 --- a/arch/powerpc/platforms/cell/spufs/Makefile +++ b/arch/powerpc/platforms/cell/spufs/Makefile @@ -1,7 +1,7 @@ obj-y += switch.o obj-$(CONFIG_SPU_FS) += spufs.o -spufs-y += inode.o file.o context.o syscalls.o +spufs-y += inode.o file.o context.o syscalls.o coredump.o spufs-y += sched.o backing_ops.o hw_ops.o run.o gang.o # Rules to build switch.o with the help of SPU tool chain diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c index 2d22cd59d6fc..1898f0d3a8b8 100644 --- a/arch/powerpc/platforms/cell/spufs/backing_ops.c +++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c @@ -36,6 +36,7 @@ #include <asm/io.h> #include <asm/spu.h> #include <asm/spu_csa.h> +#include <asm/spu_info.h> #include <asm/mmu_context.h> #include "spufs.h" @@ -267,6 +268,11 @@ static char *spu_backing_get_ls(struct spu_context *ctx) return ctx->csa.lscsa->ls; } +static u32 spu_backing_runcntl_read(struct spu_context *ctx) +{ + return ctx->csa.prob.spu_runcntl_RW; +} + static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val) { spin_lock(&ctx->csa.register_lock); @@ -279,9 +285,26 @@ static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val) spin_unlock(&ctx->csa.register_lock); } -static void spu_backing_runcntl_stop(struct spu_context *ctx) +static void spu_backing_master_start(struct spu_context *ctx) +{ + struct spu_state *csa = &ctx->csa; + u64 sr1; + + spin_lock(&csa->register_lock); + sr1 = csa->priv1.mfc_sr1_RW | MFC_STATE1_MASTER_RUN_CONTROL_MASK; + csa->priv1.mfc_sr1_RW = sr1; + spin_unlock(&csa->register_lock); +} + +static void spu_backing_master_stop(struct spu_context *ctx) { - spu_backing_runcntl_write(ctx, SPU_RUNCNTL_STOP); + struct spu_state *csa = &ctx->csa; + u64 sr1; + + spin_lock(&csa->register_lock); + sr1 = csa->priv1.mfc_sr1_RW & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + csa->priv1.mfc_sr1_RW = sr1; + spin_unlock(&csa->register_lock); } static int spu_backing_set_mfc_query(struct spu_context * ctx, u32 mask, @@ -345,8 +368,10 @@ struct spu_context_ops spu_backing_ops = { .npc_write = spu_backing_npc_write, .status_read = spu_backing_status_read, .get_ls = spu_backing_get_ls, + .runcntl_read = spu_backing_runcntl_read, .runcntl_write = spu_backing_runcntl_write, - .runcntl_stop = spu_backing_runcntl_stop, + .master_start = spu_backing_master_start, + .master_stop = spu_backing_master_stop, .set_mfc_query = spu_backing_set_mfc_query, .read_mfc_tagstatus = spu_backing_read_mfc_tagstatus, .get_mfc_free_elements = spu_backing_get_mfc_free_elements, diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c index 034cf6af53a2..0870009f56db 100644 --- a/arch/powerpc/platforms/cell/spufs/context.c +++ b/arch/powerpc/platforms/cell/spufs/context.c @@ -120,6 +120,33 @@ void spu_unmap_mappings(struct spu_context *ctx) unmap_mapping_range(ctx->signal2, 0, 0x4000, 1); } +int spu_acquire_exclusive(struct spu_context *ctx) +{ + int ret = 0; + + down_write(&ctx->state_sema); + /* ctx is about to be freed, can't acquire any more */ + if (!ctx->owner) { + ret = -EINVAL; + goto out; + } + + if (ctx->state == SPU_STATE_SAVED) { + ret = spu_activate(ctx, 0); + if (ret) + goto out; + ctx->state = SPU_STATE_RUNNABLE; + } else { + /* We need to exclude userspace access to the context. */ + spu_unmap_mappings(ctx); + } + +out: + if (ret) + up_write(&ctx->state_sema); + return ret; +} + int spu_acquire_runnable(struct spu_context *ctx) { int ret = 0; diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c new file mode 100644 index 000000000000..26945c491f6b --- /dev/null +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -0,0 +1,238 @@ +/* + * SPU core dump code + * + * (C) Copyright 2006 IBM Corp. + * + * Author: Dwayne Grant McConnell <decimal@us.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/elf.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/syscalls.h> + +#include <asm/uaccess.h> + +#include "spufs.h" + +struct spufs_ctx_info { + struct list_head list; + int dfd; + int memsize; /* in bytes */ + struct spu_context *ctx; +}; + +static LIST_HEAD(ctx_info_list); + +static ssize_t do_coredump_read(int num, struct spu_context *ctx, void __user *buffer, + size_t size, loff_t *off) +{ + u64 data; + int ret; + + if (spufs_coredump_read[num].read) + return spufs_coredump_read[num].read(ctx, buffer, size, off); + + data = spufs_coredump_read[num].get(ctx); + ret = copy_to_user(buffer, &data, 8); + return ret ? -EFAULT : 8; +} + +/* + * These are the only things you should do on a core-file: use only these + * functions to write out all the necessary info. + */ +static int spufs_dump_write(struct file *file, const void *addr, int nr) +{ + return file->f_op->write(file, addr, nr, &file->f_pos) == nr; +} + +static int spufs_dump_seek(struct file *file, loff_t off) +{ + if (file->f_op->llseek) { + if (file->f_op->llseek(file, off, 0) != off) + return 0; + } else + file->f_pos = off; + return 1; +} + +static void spufs_fill_memsize(struct spufs_ctx_info *ctx_info) +{ + struct spu_context *ctx; + unsigned long long lslr; + + ctx = ctx_info->ctx; + lslr = ctx->csa.priv2.spu_lslr_RW; + ctx_info->memsize = lslr + 1; +} + +static int spufs_ctx_note_size(struct spufs_ctx_info *ctx_info) +{ + int dfd, memsize, i, sz, total = 0; + char *name; + char fullname[80]; + + dfd = ctx_info->dfd; + memsize = ctx_info->memsize; + + for (i = 0; spufs_coredump_read[i].name; i++) { + name = spufs_coredump_read[i].name; + sz = spufs_coredump_read[i].size; + + sprintf(fullname, "SPU/%d/%s", dfd, name); + + total += sizeof(struct elf_note); + total += roundup(strlen(fullname) + 1, 4); + if (!strcmp(name, "mem")) + total += roundup(memsize, 4); + else + total += roundup(sz, 4); + } + + return total; +} + +static int spufs_add_one_context(struct file *file, int dfd) +{ + struct spu_context *ctx; + struct spufs_ctx_info *ctx_info; + int size; + + ctx = SPUFS_I(file->f_dentry->d_inode)->i_ctx; + if (ctx->flags & SPU_CREATE_NOSCHED) + return 0; + + ctx_info = kzalloc(sizeof(*ctx_info), GFP_KERNEL); + if (unlikely(!ctx_info)) + return -ENOMEM; + + ctx_info->dfd = dfd; + ctx_info->ctx = ctx; + + spufs_fill_memsize(ctx_info); + + size = spufs_ctx_note_size(ctx_info); + list_add(&ctx_info->list, &ctx_info_list); + return size; +} + +/* + * The additional architecture-specific notes for Cell are various + * context files in the spu context. + * + * This function iterates over all open file descriptors and sees + * if they are a directory in spufs. In that case we use spufs + * internal functionality to dump them without needing to actually + * open the files. + */ +static int spufs_arch_notes_size(void) +{ + struct fdtable *fdt = files_fdtable(current->files); + int size = 0, fd; + + for (fd = 0; fd < fdt->max_fdset && fd < fdt->max_fds; fd++) { + if (FD_ISSET(fd, fdt->open_fds)) { + struct file *file = fcheck(fd); + + if (file && file->f_op == &spufs_context_fops) { + int rval = spufs_add_one_context(file, fd); + if (rval < 0) + break; + size += rval; + } + } + } + + return size; +} + +static void spufs_arch_write_note(struct spufs_ctx_info *ctx_info, int i, + struct file *file) +{ + struct spu_context *ctx; + loff_t pos = 0; + int sz, dfd, rc, total = 0; + const int bufsz = 4096; + char *name; + char fullname[80], *buf; + struct elf_note en; + + buf = kmalloc(bufsz, GFP_KERNEL); + if (!buf) + return; + + dfd = ctx_info->dfd; + name = spufs_coredump_read[i].name; + + if (!strcmp(name, "mem")) + sz = ctx_info->memsize; + else + sz = spufs_coredump_read[i].size; + + ctx = ctx_info->ctx; + if (!ctx) { + return; + } + + sprintf(fullname, "SPU/%d/%s", dfd, name); + en.n_namesz = strlen(fullname) + 1; + en.n_descsz = sz; + en.n_type = NT_SPU; + + if (!spufs_dump_write(file, &en, sizeof(en))) + return; + if (!spufs_dump_write(file, fullname, en.n_namesz)) + return; + if (!spufs_dump_seek(file, roundup((unsigned long)file->f_pos, 4))) + return; + + do { + rc = do_coredump_read(i, ctx, buf, bufsz, &pos); + if (rc > 0) { + if (!spufs_dump_write(file, buf, rc)) + return; + total += rc; + } + } while (rc == bufsz && total < sz); + + spufs_dump_seek(file, roundup((unsigned long)file->f_pos + - total + sz, 4)); +} + +static void spufs_arch_write_notes(struct file *file) +{ + int j; + struct spufs_ctx_info *ctx_info, *next; + + list_for_each_entry_safe(ctx_info, next, &ctx_info_list, list) { + spu_acquire_saved(ctx_info->ctx); + for (j = 0; j < spufs_coredump_num_notes; j++) + spufs_arch_write_note(ctx_info, j, file); + spu_release(ctx_info->ctx); + list_del(&ctx_info->list); + kfree(ctx_info); + } +} + +struct spu_coredump_calls spufs_coredump_calls = { + .arch_notes_size = spufs_arch_notes_size, + .arch_write_notes = spufs_arch_write_notes, + .owner = THIS_MODULE, +}; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 533e2723e184..347eff56fcbd 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -32,13 +32,13 @@ #include <asm/io.h> #include <asm/semaphore.h> #include <asm/spu.h> +#include <asm/spu_info.h> #include <asm/uaccess.h> #include "spufs.h" #define SPUFS_MMAP_4K (PAGE_SIZE == 0x1000) - static int spufs_mem_open(struct inode *inode, struct file *file) { @@ -51,18 +51,23 @@ spufs_mem_open(struct inode *inode, struct file *file) } static ssize_t +__spufs_mem_read(struct spu_context *ctx, char __user *buffer, + size_t size, loff_t *pos) +{ + char *local_store = ctx->ops->get_ls(ctx); + return simple_read_from_buffer(buffer, size, pos, local_store, + LS_SIZE); +} + +static ssize_t spufs_mem_read(struct file *file, char __user *buffer, size_t size, loff_t *pos) { - struct spu_context *ctx = file->private_data; - char *local_store; int ret; + struct spu_context *ctx = file->private_data; spu_acquire(ctx); - - local_store = ctx->ops->get_ls(ctx); - ret = simple_read_from_buffer(buffer, size, pos, local_store, LS_SIZE); - + ret = __spufs_mem_read(ctx, buffer, size, pos); spu_release(ctx); return ret; } @@ -104,11 +109,11 @@ spufs_mem_mmap_nopage(struct vm_area_struct *vma, if (ctx->state == SPU_STATE_SAVED) { vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) - & ~(_PAGE_NO_CACHE | _PAGE_GUARDED)); + & ~_PAGE_NO_CACHE); page = vmalloc_to_page(ctx->csa.lscsa->ls + offset); } else { vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) - | _PAGE_NO_CACHE | _PAGE_GUARDED); + | _PAGE_NO_CACHE); page = pfn_to_page((ctx->spu->local_store_phys + offset) >> PAGE_SHIFT); } @@ -131,7 +136,7 @@ spufs_mem_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - /* FIXME: */ + vma->vm_flags |= VM_IO; vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_NO_CACHE); @@ -200,7 +205,7 @@ static int spufs_cntl_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO; vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_NO_CACHE | _PAGE_GUARDED); @@ -261,18 +266,23 @@ spufs_regs_open(struct inode *inode, struct file *file) } static ssize_t +__spufs_regs_read(struct spu_context *ctx, char __user *buffer, + size_t size, loff_t *pos) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return simple_read_from_buffer(buffer, size, pos, + lscsa->gprs, sizeof lscsa->gprs); +} + +static ssize_t spufs_regs_read(struct file *file, char __user *buffer, size_t size, loff_t *pos) { - struct spu_context *ctx = file->private_data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; int ret; + struct spu_context *ctx = file->private_data; spu_acquire_saved(ctx); - - ret = simple_read_from_buffer(buffer, size, pos, - lscsa->gprs, sizeof lscsa->gprs); - + ret = __spufs_regs_read(ctx, buffer, size, pos); spu_release(ctx); return ret; } @@ -307,18 +317,23 @@ static struct file_operations spufs_regs_fops = { }; static ssize_t +__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer, + size_t size, loff_t * pos) +{ + struct spu_lscsa *lscsa = ctx->csa.lscsa; + return simple_read_from_buffer(buffer, size, pos, + &lscsa->fpcr, sizeof(lscsa->fpcr)); +} + +static ssize_t spufs_fpcr_read(struct file *file, char __user * buffer, size_t size, loff_t * pos) { - struct spu_context *ctx = file->private_data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; int ret; + struct spu_context *ctx = file->private_data; spu_acquire_saved(ctx); - - ret = simple_read_from_buffer(buffer, size, pos, - &lscsa->fpcr, sizeof(lscsa->fpcr)); - + ret = __spufs_fpcr_read(ctx, buffer, size, pos); spu_release(ctx); return ret; } @@ -718,23 +733,41 @@ static int spufs_signal1_open(struct inode *inode, struct file *file) return nonseekable_open(inode, file); } -static ssize_t spufs_signal1_read(struct file *file, char __user *buf, +static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { - struct spu_context *ctx = file->private_data; + int ret = 0; u32 data; if (len < 4) return -EINVAL; - spu_acquire(ctx); - data = ctx->ops->signal1_read(ctx); - spu_release(ctx); + if (ctx->csa.spu_chnlcnt_RW[3]) { + data = ctx->csa.spu_chnldata_RW[3]; + ret = 4; + } + + if (!ret) + goto out; if (copy_to_user(buf, &data, 4)) return -EFAULT; - return 4; +out: + return ret; +} + +static ssize_t spufs_signal1_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + spu_acquire_saved(ctx); + ret = __spufs_signal1_read(ctx, buf, len, pos); + spu_release(ctx); + + return ret; } static ssize_t spufs_signal1_write(struct file *file, const char __user *buf, @@ -782,7 +815,7 @@ static int spufs_signal1_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO; vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_NO_CACHE | _PAGE_GUARDED); @@ -807,25 +840,41 @@ static int spufs_signal2_open(struct inode *inode, struct file *file) return nonseekable_open(inode, file); } -static ssize_t spufs_signal2_read(struct file *file, char __user *buf, +static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf, size_t len, loff_t *pos) { - struct spu_context *ctx; + int ret = 0; u32 data; - ctx = file->private_data; - if (len < 4) return -EINVAL; - spu_acquire(ctx); - data = ctx->ops->signal2_read(ctx); - spu_release(ctx); + if (ctx->csa.spu_chnlcnt_RW[4]) { + data = ctx->csa.spu_chnldata_RW[4]; + ret = 4; + } + + if (!ret) + goto out; if (copy_to_user(buf, &data, 4)) return -EFAULT; - return 4; +out: + return ret; +} + +static ssize_t spufs_signal2_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + spu_acquire_saved(ctx); + ret = __spufs_signal2_read(ctx, buf, len, pos); + spu_release(ctx); + + return ret; } static ssize_t spufs_signal2_write(struct file *file, const char __user *buf, @@ -874,8 +923,7 @@ static int spufs_signal2_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - /* FIXME: */ - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO; vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_NO_CACHE | _PAGE_GUARDED); @@ -902,13 +950,19 @@ static void spufs_signal1_type_set(void *data, u64 val) spu_release(ctx); } +static u64 __spufs_signal1_type_get(void *data) +{ + struct spu_context *ctx = data; + return ctx->ops->signal1_type_get(ctx); +} + static u64 spufs_signal1_type_get(void *data) { struct spu_context *ctx = data; u64 ret; spu_acquire(ctx); - ret = ctx->ops->signal1_type_get(ctx); + ret = __spufs_signal1_type_get(data); spu_release(ctx); return ret; @@ -925,13 +979,19 @@ static void spufs_signal2_type_set(void *data, u64 val) spu_release(ctx); } +static u64 __spufs_signal2_type_get(void *data) +{ + struct spu_context *ctx = data; + return ctx->ops->signal2_type_get(ctx); +} + static u64 spufs_signal2_type_get(void *data) { struct spu_context *ctx = data; u64 ret; spu_acquire(ctx); - ret = ctx->ops->signal2_type_get(ctx); + ret = __spufs_signal2_type_get(data); spu_release(ctx); return ret; @@ -958,7 +1018,7 @@ static int spufs_mss_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO; vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_NO_CACHE | _PAGE_GUARDED); @@ -1000,7 +1060,7 @@ static int spufs_psmap_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO; vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_NO_CACHE | _PAGE_GUARDED); @@ -1041,7 +1101,7 @@ static int spufs_mfc_mmap(struct file *file, struct vm_area_struct *vma) if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_IO; vma->vm_page_prot = __pgprot(pgprot_val(vma->vm_page_prot) | _PAGE_NO_CACHE | _PAGE_GUARDED); @@ -1265,6 +1325,7 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer, goto out; ctx->tagwait |= 1 << cmd.tag; + ret = size; out: return ret; @@ -1360,7 +1421,8 @@ static u64 spufs_npc_get(void *data) spu_release(ctx); return ret; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, "%llx\n") +DEFINE_SIMPLE_ATTRIBUTE(spufs_npc_ops, spufs_npc_get, spufs_npc_set, + "0x%llx\n") static void spufs_decr_set(void *data, u64 val) { @@ -1371,18 +1433,24 @@ static void spufs_decr_set(void *data, u64 val) spu_release(ctx); } -static u64 spufs_decr_get(void *data) +static u64 __spufs_decr_get(void *data) { struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->decr.slot[0]; +} + +static u64 spufs_decr_get(void *data) +{ + struct spu_context *ctx = data; u64 ret; spu_acquire_saved(ctx); - ret = lscsa->decr.slot[0]; + ret = __spufs_decr_get(data); spu_release(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_ops, spufs_decr_get, spufs_decr_set, - "%llx\n") + "0x%llx\n") static void spufs_decr_status_set(void *data, u64 val) { @@ -1393,62 +1461,76 @@ static void spufs_decr_status_set(void *data, u64 val) spu_release(ctx); } -static u64 spufs_decr_status_get(void *data) +static u64 __spufs_decr_status_get(void *data) { struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->decr_status.slot[0]; +} + +static u64 spufs_decr_status_get(void *data) +{ + struct spu_context *ctx = data; u64 ret; spu_acquire_saved(ctx); - ret = lscsa->decr_status.slot[0]; + ret = __spufs_decr_status_get(data); spu_release(ctx); return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_decr_status_ops, spufs_decr_status_get, - spufs_decr_status_set, "%llx\n") + spufs_decr_status_set, "0x%llx\n") -static void spufs_spu_tag_mask_set(void *data, u64 val) +static void spufs_event_mask_set(void *data, u64 val) { struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; spu_acquire_saved(ctx); - lscsa->tag_mask.slot[0] = (u32) val; + lscsa->event_mask.slot[0] = (u32) val; spu_release(ctx); } -static u64 spufs_spu_tag_mask_get(void *data) +static u64 __spufs_event_mask_get(void *data) { struct spu_context *ctx = data; struct spu_lscsa *lscsa = ctx->csa.lscsa; + return lscsa->event_mask.slot[0]; +} + +static u64 spufs_event_mask_get(void *data) +{ + struct spu_context *ctx = data; u64 ret; spu_acquire_saved(ctx); - ret = lscsa->tag_mask.slot[0]; + ret = __spufs_event_mask_get(data); spu_release(ctx); return ret; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_spu_tag_mask_ops, spufs_spu_tag_mask_get, - spufs_spu_tag_mask_set, "%llx\n") +DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, + spufs_event_mask_set, "0x%llx\n") -static void spufs_event_mask_set(void *data, u64 val) +static u64 __spufs_event_status_get(void *data) { struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; - spu_acquire_saved(ctx); - lscsa->event_mask.slot[0] = (u32) val; - spu_release(ctx); + struct spu_state *state = &ctx->csa; + u64 stat; + stat = state->spu_chnlcnt_RW[0]; + if (stat) + return state->spu_chnldata_RW[0]; + return 0; } -static u64 spufs_event_mask_get(void *data) +static u64 spufs_event_status_get(void *data) { struct spu_context *ctx = data; - struct spu_lscsa *lscsa = ctx->csa.lscsa; - u64 ret; + u64 ret = 0; + spu_acquire_saved(ctx); - ret = lscsa->event_mask.slot[0]; + ret = __spufs_event_status_get(data); spu_release(ctx); return ret; } -DEFINE_SIMPLE_ATTRIBUTE(spufs_event_mask_ops, spufs_event_mask_get, - spufs_event_mask_set, "%llx\n") +DEFINE_SIMPLE_ATTRIBUTE(spufs_event_status_ops, spufs_event_status_get, + NULL, "0x%llx\n") static void spufs_srr0_set(void *data, u64 val) { @@ -1470,7 +1552,7 @@ static u64 spufs_srr0_get(void *data) return ret; } DEFINE_SIMPLE_ATTRIBUTE(spufs_srr0_ops, spufs_srr0_get, spufs_srr0_set, - "%llx\n") + "0x%llx\n") static u64 spufs_id_get(void *data) { @@ -1488,12 +1570,18 @@ static u64 spufs_id_get(void *data) } DEFINE_SIMPLE_ATTRIBUTE(spufs_id_ops, spufs_id_get, NULL, "0x%llx\n") -static u64 spufs_object_id_get(void *data) +static u64 __spufs_object_id_get(void *data) { struct spu_context *ctx = data; return ctx->object_id; } +static u64 spufs_object_id_get(void *data) +{ + /* FIXME: Should there really be no locking here? */ + return __spufs_object_id_get(data); +} + static void spufs_object_id_set(void *data, u64 id) { struct spu_context *ctx = data; @@ -1503,6 +1591,250 @@ static void spufs_object_id_set(void *data, u64 id) DEFINE_SIMPLE_ATTRIBUTE(spufs_object_id_ops, spufs_object_id_get, spufs_object_id_set, "0x%llx\n"); +static u64 __spufs_lslr_get(void *data) +{ + struct spu_context *ctx = data; + return ctx->csa.priv2.spu_lslr_RW; +} + +static u64 spufs_lslr_get(void *data) +{ + struct spu_context *ctx = data; + u64 ret; + + spu_acquire_saved(ctx); + ret = __spufs_lslr_get(data); + spu_release(ctx); + + return ret; +} +DEFINE_SIMPLE_ATTRIBUTE(spufs_lslr_ops, spufs_lslr_get, NULL, "0x%llx\n") + +static int spufs_info_open(struct inode *inode, struct file *file) +{ + struct spufs_inode_info *i = SPUFS_I(inode); + struct spu_context *ctx = i->i_ctx; + file->private_data = ctx; + return 0; +} + +static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + u32 mbox_stat; + u32 data; + + mbox_stat = ctx->csa.prob.mb_stat_R; + if (mbox_stat & 0x0000ff) { + data = ctx->csa.prob.pu_mb_R; + } + + return simple_read_from_buffer(buf, len, pos, &data, sizeof data); +} + +static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + int ret; + struct spu_context *ctx = file->private_data; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + spu_acquire_saved(ctx); + spin_lock(&ctx->csa.register_lock); + ret = __spufs_mbox_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release(ctx); + + return ret; +} + +static struct file_operations spufs_mbox_info_fops = { + .open = spufs_info_open, + .read = spufs_mbox_info_read, + .llseek = generic_file_llseek, +}; + +static ssize_t __spufs_ibox_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + u32 ibox_stat; + u32 data; + + ibox_stat = ctx->csa.prob.mb_stat_R; + if (ibox_stat & 0xff0000) { + data = ctx->csa.priv2.puint_mb_R; + } + + return simple_read_from_buffer(buf, len, pos, &data, sizeof data); +} + +static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + spu_acquire_saved(ctx); + spin_lock(&ctx->csa.register_lock); + ret = __spufs_ibox_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release(ctx); + + return ret; +} + +static struct file_operations spufs_ibox_info_fops = { + .open = spufs_info_open, + .read = spufs_ibox_info_read, + .llseek = generic_file_llseek, +}; + +static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + int i, cnt; + u32 data[4]; + u32 wbox_stat; + + wbox_stat = ctx->csa.prob.mb_stat_R; + cnt = 4 - ((wbox_stat & 0x00ff00) >> 8); + for (i = 0; i < cnt; i++) { + data[i] = ctx->csa.spu_mailbox_data[i]; + } + + return simple_read_from_buffer(buf, len, pos, &data, + cnt * sizeof(u32)); +} + +static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + spu_acquire_saved(ctx); + spin_lock(&ctx->csa.register_lock); + ret = __spufs_wbox_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release(ctx); + + return ret; +} + +static struct file_operations spufs_wbox_info_fops = { + .open = spufs_info_open, + .read = spufs_wbox_info_read, + .llseek = generic_file_llseek, +}; + +static ssize_t __spufs_dma_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + struct spu_dma_info info; + struct mfc_cq_sr *qp, *spuqp; + int i; + + info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; + info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; + info.dma_info_status = ctx->csa.spu_chnldata_RW[24]; + info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; + info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; + for (i = 0; i < 16; i++) { + qp = &info.dma_info_command_data[i]; + spuqp = &ctx->csa.priv2.spuq[i]; + + qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW; + } + + return simple_read_from_buffer(buf, len, pos, &info, + sizeof info); +} + +static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + spu_acquire_saved(ctx); + spin_lock(&ctx->csa.register_lock); + ret = __spufs_dma_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release(ctx); + + return ret; +} + +static struct file_operations spufs_dma_info_fops = { + .open = spufs_info_open, + .read = spufs_dma_info_read, +}; + +static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, + char __user *buf, size_t len, loff_t *pos) +{ + struct spu_proxydma_info info; + struct mfc_cq_sr *qp, *puqp; + int ret = sizeof info; + int i; + + if (len < ret) + return -EINVAL; + + if (!access_ok(VERIFY_WRITE, buf, len)) + return -EFAULT; + + info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW; + info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; + info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; + for (i = 0; i < 8; i++) { + qp = &info.proxydma_info_command_data[i]; + puqp = &ctx->csa.priv2.puq[i]; + + qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; + qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; + qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; + qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; + } + + return simple_read_from_buffer(buf, len, pos, &info, + sizeof info); +} + +static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, + size_t len, loff_t *pos) +{ + struct spu_context *ctx = file->private_data; + int ret; + + spu_acquire_saved(ctx); + spin_lock(&ctx->csa.register_lock); + ret = __spufs_proxydma_info_read(ctx, buf, len, pos); + spin_unlock(&ctx->csa.register_lock); + spu_release(ctx); + + return ret; +} + +static struct file_operations spufs_proxydma_info_fops = { + .open = spufs_info_open, + .read = spufs_proxydma_info_read, +}; + struct tree_descr spufs_dir_contents[] = { { "mem", &spufs_mem_fops, 0666, }, { "regs", &spufs_regs_fops, 0666, }, @@ -1516,18 +1848,70 @@ struct tree_descr spufs_dir_contents[] = { { "signal2", &spufs_signal2_fops, 0666, }, { "signal1_type", &spufs_signal1_type, 0666, }, { "signal2_type", &spufs_signal2_type, 0666, }, - { "mss", &spufs_mss_fops, 0666, }, - { "mfc", &spufs_mfc_fops, 0666, }, { "cntl", &spufs_cntl_fops, 0666, }, - { "npc", &spufs_npc_ops, 0666, }, { "fpcr", &spufs_fpcr_fops, 0666, }, + { "lslr", &spufs_lslr_ops, 0444, }, + { "mfc", &spufs_mfc_fops, 0666, }, + { "mss", &spufs_mss_fops, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, + { "srr0", &spufs_srr0_ops, 0666, }, { "decr", &spufs_decr_ops, 0666, }, { "decr_status", &spufs_decr_status_ops, 0666, }, - { "spu_tag_mask", &spufs_spu_tag_mask_ops, 0666, }, { "event_mask", &spufs_event_mask_ops, 0666, }, - { "srr0", &spufs_srr0_ops, 0666, }, + { "event_status", &spufs_event_status_ops, 0444, }, + { "psmap", &spufs_psmap_fops, 0666, }, + { "phys-id", &spufs_id_ops, 0666, }, + { "object-id", &spufs_object_id_ops, 0666, }, + { "mbox_info", &spufs_mbox_info_fops, 0444, }, + { "ibox_info", &spufs_ibox_info_fops, 0444, }, + { "wbox_info", &spufs_wbox_info_fops, 0444, }, + { "dma_info", &spufs_dma_info_fops, 0444, }, + { "proxydma_info", &spufs_proxydma_info_fops, 0444, }, + {}, +}; + +struct tree_descr spufs_dir_nosched_contents[] = { + { "mem", &spufs_mem_fops, 0666, }, + { "mbox", &spufs_mbox_fops, 0444, }, + { "ibox", &spufs_ibox_fops, 0444, }, + { "wbox", &spufs_wbox_fops, 0222, }, + { "mbox_stat", &spufs_mbox_stat_fops, 0444, }, + { "ibox_stat", &spufs_ibox_stat_fops, 0444, }, + { "wbox_stat", &spufs_wbox_stat_fops, 0444, }, + { "signal1", &spufs_signal1_fops, 0666, }, + { "signal2", &spufs_signal2_fops, 0666, }, + { "signal1_type", &spufs_signal1_type, 0666, }, + { "signal2_type", &spufs_signal2_type, 0666, }, + { "mss", &spufs_mss_fops, 0666, }, + { "mfc", &spufs_mfc_fops, 0666, }, + { "cntl", &spufs_cntl_fops, 0666, }, + { "npc", &spufs_npc_ops, 0666, }, { "psmap", &spufs_psmap_fops, 0666, }, { "phys-id", &spufs_id_ops, 0666, }, { "object-id", &spufs_object_id_ops, 0666, }, {}, }; + +struct spufs_coredump_reader spufs_coredump_read[] = { + { "regs", __spufs_regs_read, NULL, 128 * 16 }, + { "fpcr", __spufs_fpcr_read, NULL, 16 }, + { "lslr", NULL, __spufs_lslr_get, 11 }, + { "decr", NULL, __spufs_decr_get, 11 }, + { "decr_status", NULL, __spufs_decr_status_get, 11 }, + { "mem", __spufs_mem_read, NULL, 256 * 1024, }, + { "signal1", __spufs_signal1_read, NULL, 4 }, + { "signal1_type", NULL, __spufs_signal1_type_get, 2 }, + { "signal2", __spufs_signal2_read, NULL, 4 }, + { "signal2_type", NULL, __spufs_signal2_type_get, 2 }, + { "event_mask", NULL, __spufs_event_mask_get, 8 }, + { "event_status", NULL, __spufs_event_status_get, 8 }, + { "mbox_info", __spufs_mbox_info_read, NULL, 4 }, + { "ibox_info", __spufs_ibox_info_read, NULL, 4 }, + { "wbox_info", __spufs_wbox_info_read, NULL, 16 }, + { "dma_info", __spufs_dma_info_read, NULL, 69 * 8 }, + { "proxydma_info", __spufs_proxydma_info_read, NULL, 35 * 8 }, + { "object-id", NULL, __spufs_object_id_get, 19 }, + { }, +}; +int spufs_coredump_num_notes = ARRAY_SIZE(spufs_coredump_read) - 1; + diff --git a/arch/powerpc/platforms/cell/spufs/hw_ops.c b/arch/powerpc/platforms/cell/spufs/hw_ops.c index d805ffed892d..ae42e03b8c86 100644 --- a/arch/powerpc/platforms/cell/spufs/hw_ops.c +++ b/arch/powerpc/platforms/cell/spufs/hw_ops.c @@ -135,21 +135,11 @@ static int spu_hw_wbox_write(struct spu_context *ctx, u32 data) return ret; } -static u32 spu_hw_signal1_read(struct spu_context *ctx) -{ - return in_be32(&ctx->spu->problem->signal_notify1); -} - static void spu_hw_signal1_write(struct spu_context *ctx, u32 data) { out_be32(&ctx->spu->problem->signal_notify1, data); } -static u32 spu_hw_signal2_read(struct spu_context *ctx) -{ - return in_be32(&ctx->spu->problem->signal_notify2); -} - static void spu_hw_signal2_write(struct spu_context *ctx, u32 data) { out_be32(&ctx->spu->problem->signal_notify2, data); @@ -217,21 +207,42 @@ static char *spu_hw_get_ls(struct spu_context *ctx) return ctx->spu->local_store; } -static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val) +static u32 spu_hw_runcntl_read(struct spu_context *ctx) { - eieio(); - out_be32(&ctx->spu->problem->spu_runcntl_RW, val); + return in_be32(&ctx->spu->problem->spu_runcntl_RW); } -static void spu_hw_runcntl_stop(struct spu_context *ctx) +static void spu_hw_runcntl_write(struct spu_context *ctx, u32 val) { spin_lock_irq(&ctx->spu->register_lock); - out_be32(&ctx->spu->problem->spu_runcntl_RW, SPU_RUNCNTL_STOP); - while (in_be32(&ctx->spu->problem->spu_status_R) & SPU_STATUS_RUNNING) - cpu_relax(); + if (val & SPU_RUNCNTL_ISOLATE) + out_be64(&ctx->spu->priv2->spu_privcntl_RW, 4LL); + out_be32(&ctx->spu->problem->spu_runcntl_RW, val); spin_unlock_irq(&ctx->spu->register_lock); } +static void spu_hw_master_start(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + u64 sr1; + + spin_lock_irq(&spu->register_lock); + sr1 = spu_mfc_sr1_get(spu) | MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, sr1); + spin_unlock_irq(&spu->register_lock); +} + +static void spu_hw_master_stop(struct spu_context *ctx) +{ + struct spu *spu = ctx->spu; + u64 sr1; + + spin_lock_irq(&spu->register_lock); + sr1 = spu_mfc_sr1_get(spu) & ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, sr1); + spin_unlock_irq(&spu->register_lock); +} + static int spu_hw_set_mfc_query(struct spu_context * ctx, u32 mask, u32 mode) { struct spu_problem __iomem *prob = ctx->spu->problem; @@ -291,9 +302,7 @@ struct spu_context_ops spu_hw_ops = { .mbox_stat_poll = spu_hw_mbox_stat_poll, .ibox_read = spu_hw_ibox_read, .wbox_write = spu_hw_wbox_write, - .signal1_read = spu_hw_signal1_read, .signal1_write = spu_hw_signal1_write, - .signal2_read = spu_hw_signal2_read, .signal2_write = spu_hw_signal2_write, .signal1_type_set = spu_hw_signal1_type_set, .signal1_type_get = spu_hw_signal1_type_get, @@ -303,8 +312,10 @@ struct spu_context_ops spu_hw_ops = { .npc_write = spu_hw_npc_write, .status_read = spu_hw_status_read, .get_ls = spu_hw_get_ls, + .runcntl_read = spu_hw_runcntl_read, .runcntl_write = spu_hw_runcntl_write, - .runcntl_stop = spu_hw_runcntl_stop, + .master_start = spu_hw_master_start, + .master_stop = spu_hw_master_stop, .set_mfc_query = spu_hw_set_mfc_query, .read_mfc_tagstatus = spu_hw_read_mfc_tagstatus, .get_mfc_free_elements = spu_hw_get_mfc_free_elements, diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 427d00a4f6a0..c7d010749a18 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -33,7 +33,7 @@ #include <linux/slab.h> #include <linux/parser.h> -#include <asm/io.h> +#include <asm/prom.h> #include <asm/semaphore.h> #include <asm/spu.h> #include <asm/uaccess.h> @@ -41,6 +41,7 @@ #include "spufs.h" static kmem_cache_t *spufs_inode_cache; +char *isolated_loader; static struct inode * spufs_alloc_inode(struct super_block *sb) @@ -231,6 +232,7 @@ struct file_operations spufs_context_fops = { .readdir = dcache_readdir, .fsync = simple_sync_file, }; +EXPORT_SYMBOL_GPL(spufs_context_fops); static int spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, @@ -255,10 +257,14 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, goto out_iput; ctx->flags = flags; - inode->i_op = &spufs_dir_inode_operations; inode->i_fop = &simple_dir_operations; - ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx); + if (flags & SPU_CREATE_NOSCHED) + ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents, + mode, ctx); + else + ret = spufs_fill_dir(dentry, spufs_dir_contents, mode, ctx); + if (ret) goto out_free_ctx; @@ -307,6 +313,20 @@ static int spufs_create_context(struct inode *inode, { int ret; + ret = -EPERM; + if ((flags & SPU_CREATE_NOSCHED) && + !capable(CAP_SYS_NICE)) + goto out_unlock; + + ret = -EINVAL; + if ((flags & (SPU_CREATE_NOSCHED | SPU_CREATE_ISOLATE)) + == SPU_CREATE_ISOLATE) + goto out_unlock; + + ret = -ENODEV; + if ((flags & SPU_CREATE_ISOLATE) && !isolated_loader) + goto out_unlock; + ret = spufs_mkdir(inode, dentry, flags, mode & S_IRWXUGO); if (ret) goto out_unlock; @@ -540,6 +560,30 @@ spufs_parse_options(char *options, struct inode *root) return 1; } +static void +spufs_init_isolated_loader(void) +{ + struct device_node *dn; + const char *loader; + int size; + + dn = of_find_node_by_path("/spu-isolation"); + if (!dn) + return; + + loader = get_property(dn, "loader", &size); + if (!loader) + return; + + /* kmalloc should align on a 16 byte boundary..* */ + isolated_loader = kmalloc(size, GFP_KERNEL); + if (!isolated_loader) + return; + + memcpy(isolated_loader, loader, size); + printk(KERN_INFO "spufs: SPU isolation mode enabled\n"); +} + static int spufs_create_root(struct super_block *sb, void *data) { @@ -608,6 +652,7 @@ static struct file_system_type spufs_type = { static int __init spufs_init(void) { int ret; + ret = -ENOMEM; spufs_inode_cache = kmem_cache_create("spufs_inode_cache", sizeof(struct spufs_inode_info), 0, @@ -625,6 +670,12 @@ static int __init spufs_init(void) ret = register_spu_syscalls(&spufs_calls); if (ret) goto out_fs; + ret = register_arch_coredump_calls(&spufs_coredump_calls); + if (ret) + goto out_fs; + + spufs_init_isolated_loader(); + return 0; out_fs: unregister_filesystem(&spufs_type); @@ -638,6 +689,7 @@ module_init(spufs_init); static void __exit spufs_exit(void) { spu_sched_exit(); + unregister_arch_coredump_calls(&spufs_coredump_calls); unregister_spu_syscalls(&spufs_calls); unregister_filesystem(&spufs_type); kmem_cache_destroy(spufs_inode_cache); diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index 63df8cf4ba16..1acc2ffef8c8 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -1,7 +1,11 @@ +#define DEBUG + #include <linux/wait.h> #include <linux/ptrace.h> #include <asm/spu.h> +#include <asm/spu_priv1.h> +#include <asm/io.h> #include <asm/unistd.h> #include "spufs.h" @@ -24,6 +28,7 @@ void spufs_dma_callback(struct spu *spu, int type) } else { switch (type) { case SPE_EVENT_DMA_ALIGNMENT: + case SPE_EVENT_SPE_DATA_STORAGE: case SPE_EVENT_INVALID_DMA: force_sig(SIGBUS, /* info, */ current); break; @@ -48,15 +53,122 @@ static inline int spu_stopped(struct spu_context *ctx, u32 * stat) return (!(*stat & 0x1) || pte_fault || spu->class_0_pending) ? 1 : 0; } +static int spu_setup_isolated(struct spu_context *ctx) +{ + int ret; + u64 __iomem *mfc_cntl; + u64 sr1; + u32 status; + unsigned long timeout; + const u32 status_loading = SPU_STATUS_RUNNING + | SPU_STATUS_ISOLATED_STATE | SPU_STATUS_ISOLATED_LOAD_STATUS; + + if (!isolated_loader) + return -ENODEV; + + ret = spu_acquire_exclusive(ctx); + if (ret) + goto out; + + mfc_cntl = &ctx->spu->priv2->mfc_control_RW; + + /* purge the MFC DMA queue to ensure no spurious accesses before we + * enter kernel mode */ + timeout = jiffies + HZ; + out_be64(mfc_cntl, MFC_CNTL_PURGE_DMA_REQUEST); + while ((in_be64(mfc_cntl) & MFC_CNTL_PURGE_DMA_STATUS_MASK) + != MFC_CNTL_PURGE_DMA_COMPLETE) { + if (time_after(jiffies, timeout)) { + printk(KERN_ERR "%s: timeout flushing MFC DMA queue\n", + __FUNCTION__); + ret = -EIO; + goto out_unlock; + } + cond_resched(); + } + + /* put the SPE in kernel mode to allow access to the loader */ + sr1 = spu_mfc_sr1_get(ctx->spu); + sr1 &= ~MFC_STATE1_PROBLEM_STATE_MASK; + spu_mfc_sr1_set(ctx->spu, sr1); + + /* start the loader */ + ctx->ops->signal1_write(ctx, (unsigned long)isolated_loader >> 32); + ctx->ops->signal2_write(ctx, + (unsigned long)isolated_loader & 0xffffffff); + + ctx->ops->runcntl_write(ctx, + SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); + + ret = 0; + timeout = jiffies + HZ; + while (((status = ctx->ops->status_read(ctx)) & status_loading) == + status_loading) { + if (time_after(jiffies, timeout)) { + printk(KERN_ERR "%s: timeout waiting for loader\n", + __FUNCTION__); + ret = -EIO; + goto out_drop_priv; + } + cond_resched(); + } + + if (!(status & SPU_STATUS_RUNNING)) { + /* If isolated LOAD has failed: run SPU, we will get a stop-and + * signal later. */ + pr_debug("%s: isolated LOAD failed\n", __FUNCTION__); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); + ret = -EACCES; + + } else if (!(status & SPU_STATUS_ISOLATED_STATE)) { + /* This isn't allowed by the CBEA, but check anyway */ + pr_debug("%s: SPU fell out of isolated mode?\n", __FUNCTION__); + ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_STOP); + ret = -EINVAL; + } + +out_drop_priv: + /* Finished accessing the loader. Drop kernel mode */ + sr1 |= MFC_STATE1_PROBLEM_STATE_MASK; + spu_mfc_sr1_set(ctx->spu, sr1); + +out_unlock: + spu_release_exclusive(ctx); +out: + return ret; +} + static inline int spu_run_init(struct spu_context *ctx, u32 * npc) { int ret; + unsigned long runcntl = SPU_RUNCNTL_RUNNABLE; - if ((ret = spu_acquire_runnable(ctx)) != 0) + ret = spu_acquire_runnable(ctx); + if (ret) return ret; - ctx->ops->npc_write(ctx, *npc); - ctx->ops->runcntl_write(ctx, SPU_RUNCNTL_RUNNABLE); - return 0; + + if (ctx->flags & SPU_CREATE_ISOLATE) { + if (!(ctx->ops->status_read(ctx) & SPU_STATUS_ISOLATED_STATE)) { + /* Need to release ctx, because spu_setup_isolated will + * acquire it exclusively. + */ + spu_release(ctx); + ret = spu_setup_isolated(ctx); + if (!ret) + ret = spu_acquire_runnable(ctx); + } + + /* if userspace has set the runcntrl register (eg, to issue an + * isolated exit), we need to re-set it here */ + runcntl = ctx->ops->runcntl_read(ctx) & + (SPU_RUNCNTL_RUNNABLE | SPU_RUNCNTL_ISOLATE); + if (runcntl == 0) + runcntl = SPU_RUNCNTL_RUNNABLE; + } else + ctx->ops->npc_write(ctx, *npc); + + ctx->ops->runcntl_write(ctx, runcntl); + return ret; } static inline int spu_run_fini(struct spu_context *ctx, u32 * npc, @@ -70,13 +182,7 @@ static inline int spu_run_fini(struct spu_context *ctx, u32 * npc, if (signal_pending(current)) ret = -ERESTARTSYS; - if (unlikely(current->ptrace & PT_PTRACED)) { - if ((*status & SPU_STATUS_STOPPED_BY_STOP) - && (*status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) { - force_sig(SIGTRAP, current); - ret = -ERESTARTSYS; - } - } + return ret; } @@ -204,6 +310,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, if (down_interruptible(&ctx->run_sema)) return -ERESTARTSYS; + ctx->ops->master_start(ctx); ctx->event_return = 0; ret = spu_run_init(ctx, npc); if (ret) @@ -223,7 +330,7 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, if (unlikely(ctx->state != SPU_STATE_RUNNABLE)) { ret = spu_reacquire_runnable(ctx, npc, &status); if (ret) - goto out; + goto out2; continue; } ret = spu_process_events(ctx); @@ -231,12 +338,24 @@ long spufs_run_spu(struct file *file, struct spu_context *ctx, } while (!ret && !(status & (SPU_STATUS_STOPPED_BY_STOP | SPU_STATUS_STOPPED_BY_HALT))); - ctx->ops->runcntl_stop(ctx); + ctx->ops->master_stop(ctx); ret = spu_run_fini(ctx, npc, &status); - if (!ret) - ret = status; spu_yield(ctx); +out2: + if ((ret == 0) || + ((ret == -ERESTARTSYS) && + ((status & SPU_STATUS_STOPPED_BY_HALT) || + ((status & SPU_STATUS_STOPPED_BY_STOP) && + (status >> SPU_STOP_STATUS_SHIFT != 0x2104))))) + ret = status; + + if ((status & SPU_STATUS_STOPPED_BY_STOP) + && (status >> SPU_STOP_STATUS_SHIFT) == 0x3fff) { + force_sig(SIGTRAP, current); + ret = -ERESTARTSYS; + } + out: *event = ctx->event_return; up(&ctx->run_sema); diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index a0f55ca2d488..70fb13395c04 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -29,6 +29,7 @@ #include <asm/spu.h> #include <asm/spu_csa.h> +#include <asm/spu_info.h> /* The magic number for our file system */ enum { @@ -114,13 +115,19 @@ struct spu_context_ops { void (*npc_write) (struct spu_context * ctx, u32 data); u32(*status_read) (struct spu_context * ctx); char*(*get_ls) (struct spu_context * ctx); + u32 (*runcntl_read) (struct spu_context * ctx); void (*runcntl_write) (struct spu_context * ctx, u32 data); - void (*runcntl_stop) (struct spu_context * ctx); + void (*master_start) (struct spu_context * ctx); + void (*master_stop) (struct spu_context * ctx); int (*set_mfc_query)(struct spu_context * ctx, u32 mask, u32 mode); u32 (*read_mfc_tagstatus)(struct spu_context * ctx); u32 (*get_mfc_free_elements)(struct spu_context *ctx); - int (*send_mfc_command)(struct spu_context *ctx, - struct mfc_dma_command *cmd); + int (*send_mfc_command)(struct spu_context * ctx, + struct mfc_dma_command * cmd); + void (*dma_info_read) (struct spu_context * ctx, + struct spu_dma_info * info); + void (*proxydma_info_read) (struct spu_context * ctx, + struct spu_proxydma_info * info); }; extern struct spu_context_ops spu_hw_ops; @@ -135,6 +142,7 @@ struct spufs_inode_info { container_of(inode, struct spufs_inode_info, vfs_inode) extern struct tree_descr spufs_dir_contents[]; +extern struct tree_descr spufs_dir_nosched_contents[]; /* system call implementation */ long spufs_run_spu(struct file *file, @@ -162,6 +170,12 @@ void spu_acquire(struct spu_context *ctx); void spu_release(struct spu_context *ctx); int spu_acquire_runnable(struct spu_context *ctx); void spu_acquire_saved(struct spu_context *ctx); +int spu_acquire_exclusive(struct spu_context *ctx); + +static inline void spu_release_exclusive(struct spu_context *ctx) +{ + up_write(&ctx->state_sema); +} int spu_activate(struct spu_context *ctx, u64 flags); void spu_deactivate(struct spu_context *ctx); @@ -169,6 +183,8 @@ void spu_yield(struct spu_context *ctx); int __init spu_sched_init(void); void __exit spu_sched_exit(void); +extern char *isolated_loader; + /* * spufs_wait * Same as wait_event_interruptible(), except that here @@ -207,4 +223,15 @@ void spufs_stop_callback(struct spu *spu); void spufs_mfc_callback(struct spu *spu); void spufs_dma_callback(struct spu *spu, int type); +extern struct spu_coredump_calls spufs_coredump_calls; +struct spufs_coredump_reader { + char *name; + ssize_t (*read)(struct spu_context *ctx, + char __user *buffer, size_t size, loff_t *pos); + u64 (*get)(void *data); + size_t size; +}; +extern struct spufs_coredump_reader spufs_coredump_read[]; +extern int spufs_coredump_num_notes; + #endif diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 0f782ca662ba..c08981ff7fc6 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -102,7 +102,7 @@ static inline int check_spu_isolate(struct spu_state *csa, struct spu *spu) * saved at this time. */ isolate_state = SPU_STATUS_ISOLATED_STATE | - SPU_STATUS_ISOLATED_LOAD_STAUTUS | SPU_STATUS_ISOLATED_EXIT_STAUTUS; + SPU_STATUS_ISOLATED_LOAD_STATUS | SPU_STATUS_ISOLATED_EXIT_STATUS; return (in_be32(&prob->spu_status_R) & isolate_state) ? 1 : 0; } @@ -1046,12 +1046,12 @@ static inline int suspend_spe(struct spu_state *csa, struct spu *spu) */ if (in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING) { if (in_be32(&prob->spu_status_R) & - SPU_STATUS_ISOLATED_EXIT_STAUTUS) { + SPU_STATUS_ISOLATED_EXIT_STATUS) { POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); } if ((in_be32(&prob->spu_status_R) & - SPU_STATUS_ISOLATED_LOAD_STAUTUS) + SPU_STATUS_ISOLATED_LOAD_STATUS) || (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE)) { out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); @@ -1085,7 +1085,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) */ if (!(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING)) { if (in_be32(&prob->spu_status_R) & - SPU_STATUS_ISOLATED_EXIT_STAUTUS) { + SPU_STATUS_ISOLATED_EXIT_STATUS) { spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK); eieio(); @@ -1095,7 +1095,7 @@ static inline void clear_spu_status(struct spu_state *csa, struct spu *spu) SPU_STATUS_RUNNING); } if ((in_be32(&prob->spu_status_R) & - SPU_STATUS_ISOLATED_LOAD_STAUTUS) + SPU_STATUS_ISOLATED_LOAD_STATUS) || (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE)) { spu_mfc_sr1_set(spu, @@ -1916,6 +1916,51 @@ static void save_lscsa(struct spu_state *prev, struct spu *spu) wait_spu_stopped(prev, spu); /* Step 57. */ } +static void force_spu_isolate_exit(struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + struct spu_priv2 __iomem *priv2 = spu->priv2; + + /* Stop SPE execution and wait for completion. */ + out_be32(&prob->spu_runcntl_RW, SPU_RUNCNTL_STOP); + iobarrier_rw(); + POLL_WHILE_TRUE(in_be32(&prob->spu_status_R) & SPU_STATUS_RUNNING); + + /* Restart SPE master runcntl. */ + spu_mfc_sr1_set(spu, MFC_STATE1_MASTER_RUN_CONTROL_MASK); + iobarrier_w(); + + /* Initiate isolate exit request and wait for completion. */ + out_be64(&priv2->spu_privcntl_RW, 4LL); + iobarrier_w(); + out_be32(&prob->spu_runcntl_RW, 2); + iobarrier_rw(); + POLL_WHILE_FALSE((in_be32(&prob->spu_status_R) + & SPU_STATUS_STOPPED_BY_STOP)); + + /* Reset load request to normal. */ + out_be64(&priv2->spu_privcntl_RW, SPU_PRIVCNT_LOAD_REQUEST_NORMAL); + iobarrier_w(); +} + +/** + * stop_spu_isolate + * Check SPU run-control state and force isolated + * exit function as necessary. + */ +static void stop_spu_isolate(struct spu *spu) +{ + struct spu_problem __iomem *prob = spu->problem; + + if (in_be32(&prob->spu_status_R) & SPU_STATUS_ISOLATED_STATE) { + /* The SPU is in isolated state; the only way + * to get it out is to perform an isolated + * exit (clean) operation. + */ + force_spu_isolate_exit(spu); + } +} + static void harvest(struct spu_state *prev, struct spu *spu) { /* @@ -1928,6 +1973,7 @@ static void harvest(struct spu_state *prev, struct spu *spu) inhibit_user_access(prev, spu); /* Step 3. */ terminate_spu_app(prev, spu); /* Step 4. */ set_switch_pending(prev, spu); /* Step 5. */ + stop_spu_isolate(spu); /* NEW. */ remove_other_spu_access(prev, spu); /* Step 6. */ suspend_mfc(prev, spu); /* Step 7. */ wait_suspend_mfc_complete(prev, spu); /* Step 8. */ @@ -2096,11 +2142,11 @@ int spu_save(struct spu_state *prev, struct spu *spu) acquire_spu_lock(spu); /* Step 1. */ rc = __do_spu_save(prev, spu); /* Steps 2-53. */ release_spu_lock(spu); - if (rc) { + if (rc != 0 && rc != 2 && rc != 6) { panic("%s failed on SPU[%d], rc=%d.\n", __func__, spu->number, rc); } - return rc; + return 0; } EXPORT_SYMBOL_GPL(spu_save); @@ -2165,9 +2211,6 @@ static void init_priv1(struct spu_state *csa) MFC_STATE1_PROBLEM_STATE_MASK | MFC_STATE1_RELOCATE_MASK | MFC_STATE1_BUS_TLBIE_MASK; - /* Set storage description. */ - csa->priv1.mfc_sdr_RW = mfspr(SPRN_SDR1); - /* Enable OS-specific set of interrupts. */ csa->priv1.int_mask_class0_RW = CLASS0_ENABLE_DMA_ALIGNMENT_INTR | CLASS0_ENABLE_INVALID_DMA_COMMAND_INTR | |