diff options
Diffstat (limited to 'arch/powerpc/platforms/wsp/wsp_pci.c')
-rw-r--r-- | arch/powerpc/platforms/wsp/wsp_pci.c | 1133 |
1 files changed, 1133 insertions, 0 deletions
diff --git a/arch/powerpc/platforms/wsp/wsp_pci.c b/arch/powerpc/platforms/wsp/wsp_pci.c new file mode 100644 index 000000000000..e0262cd0e2d3 --- /dev/null +++ b/arch/powerpc/platforms/wsp/wsp_pci.c @@ -0,0 +1,1133 @@ +/* + * Copyright 2010 Ben Herrenschmidt, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define DEBUG + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/irq.h> +#include <linux/interrupt.h> +#include <linux/debugfs.h> + +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/ppc-pci.h> +#include <asm/iommu.h> +#include <asm/io-workarounds.h> + +#include "wsp.h" +#include "wsp_pci.h" +#include "msi.h" + + +/* Max number of TVTs for one table. Only 32-bit tables can use + * multiple TVTs and so the max currently supported is thus 8 + * since only 2G of DMA space is supported + */ +#define MAX_TABLE_TVT_COUNT 8 + +struct wsp_dma_table { + struct list_head link; + struct iommu_table table; + struct wsp_phb *phb; + struct page *tces[MAX_TABLE_TVT_COUNT]; +}; + +/* We support DMA regions from 0...2G in 32bit space (no support for + * 64-bit DMA just yet). Each device gets a separate TCE table (TVT + * entry) with validation enabled (though not supported by SimiCS + * just yet). + * + * To simplify things, we divide this 2G space into N regions based + * on the constant below which could be turned into a tunable eventually + * + * We then assign dynamically those regions to devices as they show up. + * + * We use a bitmap as an allocator for these. + * + * Tables are allocated/created dynamically as devices are discovered, + * multiple TVT entries are used if needed + * + * When 64-bit DMA support is added we should simply use a separate set + * of larger regions (the HW supports 64 TVT entries). We can + * additionally create a bypass region in 64-bit space for performances + * though that would have a cost in term of security. + * + * If you set NUM_DMA32_REGIONS to 1, then a single table is shared + * for all devices and bus/dev/fn validation is disabled + * + * Note that a DMA32 region cannot be smaller than 256M so the max + * supported here for now is 8. We don't yet support sharing regions + * between multiple devices so the max number of devices supported + * is MAX_TABLE_TVT_COUNT. + */ +#define NUM_DMA32_REGIONS 1 + +struct wsp_phb { + struct pci_controller *hose; + + /* Lock controlling access to the list of dma tables. + * It does -not- protect against dma_* operations on + * those tables, those should be stopped before an entry + * is removed from the list. + * + * The lock is also used for error handling operations + */ + spinlock_t lock; + struct list_head dma_tables; + unsigned long dma32_map; + unsigned long dma32_base; + unsigned int dma32_num_regions; + unsigned long dma32_region_size; + + /* Debugfs stuff */ + struct dentry *ddir; + + struct list_head all; +}; +static LIST_HEAD(wsp_phbs); + +//#define cfg_debug(fmt...) pr_debug(fmt) +#define cfg_debug(fmt...) + + +static int wsp_pcie_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose; + int suboff; + u64 addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = PCIE_REG_CA_ENABLE | + ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | + ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | + ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; + suboff = offset & 3; + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + + switch (len) { + case 1: + addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; + out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); + *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) + >> (suboff << 3)) & 0xff; + cfg_debug("read 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", + bus->number, devfn >> 3, devfn & 7, + offset, suboff, addr, *val); + break; + case 2: + addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; + out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); + *val = (in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA) + >> (suboff << 3)) & 0xffff; + cfg_debug("read 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", + bus->number, devfn >> 3, devfn & 7, + offset, suboff, addr, *val); + break; + default: + addr |= 0xful << PCIE_REG_CA_BE_SHIFT; + out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); + *val = in_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA); + cfg_debug("read 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", + bus->number, devfn >> 3, devfn & 7, + offset, suboff, addr, *val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int wsp_pcie_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose; + int suboff; + u64 addr; + + hose = pci_bus_to_host(bus); + if (hose == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + if (offset >= 0x1000) + return PCIBIOS_BAD_REGISTER_NUMBER; + addr = PCIE_REG_CA_ENABLE | + ((u64)bus->number) << PCIE_REG_CA_BUS_SHIFT | + ((u64)devfn) << PCIE_REG_CA_FUNC_SHIFT | + ((u64)offset & ~3) << PCIE_REG_CA_REG_SHIFT; + suboff = offset & 3; + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + addr |= (0x8ul >> suboff) << PCIE_REG_CA_BE_SHIFT; + val <<= suboff << 3; + out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); + out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); + cfg_debug("write 1 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%02x\n", + bus->number, devfn >> 3, devfn & 7, + offset, suboff, addr, val); + break; + case 2: + addr |= (0xcul >> suboff) << PCIE_REG_CA_BE_SHIFT; + val <<= suboff << 3; + out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); + out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); + cfg_debug("write 2 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%04x\n", + bus->number, devfn >> 3, devfn & 7, + offset, suboff, addr, val); + break; + default: + addr |= 0xful << PCIE_REG_CA_BE_SHIFT; + out_be64(hose->cfg_data + PCIE_REG_CONFIG_ADDRESS, addr); + out_le32(hose->cfg_data + PCIE_REG_CONFIG_DATA, val); + cfg_debug("write 4 %02x:%02x:%02x + %02x/%x addr=0x%llx val=%08x\n", + bus->number, devfn >> 3, devfn & 7, + offset, suboff, addr, val); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops wsp_pcie_pci_ops = +{ + .read = wsp_pcie_read_config, + .write = wsp_pcie_write_config, +}; + +#define TCE_SHIFT 12 +#define TCE_PAGE_SIZE (1 << TCE_SHIFT) +#define TCE_PCI_WRITE 0x2 /* write from PCI allowed */ +#define TCE_PCI_READ 0x1 /* read from PCI allowed */ +#define TCE_RPN_MASK 0x3fffffffffful /* 42-bit RPN (4K pages) */ +#define TCE_RPN_SHIFT 12 + +//#define dma_debug(fmt...) pr_debug(fmt) +#define dma_debug(fmt...) + +static int tce_build_wsp(struct iommu_table *tbl, long index, long npages, + unsigned long uaddr, enum dma_data_direction direction, + struct dma_attrs *attrs) +{ + struct wsp_dma_table *ptbl = container_of(tbl, + struct wsp_dma_table, + table); + u64 proto_tce; + u64 *tcep; + u64 rpn; + + proto_tce = TCE_PCI_READ; +#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS + proto_tce |= TCE_PCI_WRITE; +#else + if (direction != DMA_TO_DEVICE) + proto_tce |= TCE_PCI_WRITE; +#endif + + /* XXX Make this faster by factoring out the page address for + * within a TCE table + */ + while (npages--) { + /* We don't use it->base as the table can be scattered */ + tcep = (u64 *)page_address(ptbl->tces[index >> 16]); + tcep += (index & 0xffff); + + /* can't move this out since we might cross LMB boundary */ + rpn = __pa(uaddr) >> TCE_SHIFT; + *tcep = proto_tce | (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT; + + dma_debug("[DMA] TCE %p set to 0x%016llx (dma addr: 0x%lx)\n", + tcep, *tcep, (tbl->it_offset + index) << IOMMU_PAGE_SHIFT); + + uaddr += TCE_PAGE_SIZE; + index++; + } + return 0; +} + +static void tce_free_wsp(struct iommu_table *tbl, long index, long npages) +{ + struct wsp_dma_table *ptbl = container_of(tbl, + struct wsp_dma_table, + table); +#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS + struct pci_controller *hose = ptbl->phb->hose; +#endif + u64 *tcep; + + /* XXX Make this faster by factoring out the page address for + * within a TCE table. Also use line-kill option to kill multiple + * TCEs at once + */ + while (npages--) { + /* We don't use it->base as the table can be scattered */ + tcep = (u64 *)page_address(ptbl->tces[index >> 16]); + tcep += (index & 0xffff); + dma_debug("[DMA] TCE %p cleared\n", tcep); + *tcep = 0; +#ifndef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS + /* Don't write there since it would pollute other MMIO accesses */ + out_be64(hose->cfg_data + PCIE_REG_TCE_KILL, + PCIE_REG_TCEKILL_SINGLE | PCIE_REG_TCEKILL_PS_4K | + (__pa(tcep) & PCIE_REG_TCEKILL_ADDR_MASK)); +#endif + index++; + } +} + +static struct wsp_dma_table *wsp_pci_create_dma32_table(struct wsp_phb *phb, + unsigned int region, + struct pci_dev *validate) +{ + struct pci_controller *hose = phb->hose; + unsigned long size = phb->dma32_region_size; + unsigned long addr = phb->dma32_region_size * region + phb->dma32_base; + struct wsp_dma_table *tbl; + int tvts_per_table, i, tvt, nid; + unsigned long flags; + + nid = of_node_to_nid(phb->hose->dn); + + /* Calculate how many TVTs are needed */ + tvts_per_table = size / 0x10000000; + if (tvts_per_table == 0) + tvts_per_table = 1; + + /* Calculate the base TVT index. We know all tables have the same + * size so we just do a simple multiply here + */ + tvt = region * tvts_per_table; + + pr_debug(" Region : %d\n", region); + pr_debug(" DMA range : 0x%08lx..0x%08lx\n", addr, addr + size - 1); + pr_debug(" Number of TVTs : %d\n", tvts_per_table); + pr_debug(" Base TVT : %d\n", tvt); + pr_debug(" Node : %d\n", nid); + + tbl = kzalloc_node(sizeof(struct wsp_dma_table), GFP_KERNEL, nid); + if (!tbl) + return ERR_PTR(-ENOMEM); + tbl->phb = phb; + + /* Create as many TVTs as needed, each represents 256M at most */ + for (i = 0; i < tvts_per_table; i++) { + u64 tvt_data1, tvt_data0; + + /* Allocate table. We use a 4K TCE size for now always so + * one table is always 8 * (258M / 4K) == 512K + */ + tbl->tces[i] = alloc_pages_node(nid, GFP_KERNEL, get_order(0x80000)); + if (tbl->tces[i] == NULL) + goto fail; + memset(page_address(tbl->tces[i]), 0, 0x80000); + + pr_debug(" TCE table %d at : %p\n", i, page_address(tbl->tces[i])); + + /* Table size. We currently set it to be the whole 256M region */ + tvt_data0 = 2ull << IODA_TVT0_TCE_TABLE_SIZE_SHIFT; + /* IO page size set to 4K */ + tvt_data1 = 1ull << IODA_TVT1_IO_PAGE_SIZE_SHIFT; + /* Shift in the address */ + tvt_data0 |= __pa(page_address(tbl->tces[i])) << IODA_TVT0_TTA_SHIFT; + + /* Validation stuff. We only validate fully bus/dev/fn for now + * one day maybe we can group devices but that isn't the case + * at the moment + */ + if (validate) { + tvt_data0 |= IODA_TVT0_BUSNUM_VALID_MASK; + tvt_data0 |= validate->bus->number; + tvt_data1 |= IODA_TVT1_DEVNUM_VALID; + tvt_data1 |= ((u64)PCI_SLOT(validate->devfn)) + << IODA_TVT1_DEVNUM_VALUE_SHIFT; + tvt_data1 |= IODA_TVT1_FUNCNUM_VALID; + tvt_data1 |= ((u64)PCI_FUNC(validate->devfn)) + << IODA_TVT1_FUNCNUM_VALUE_SHIFT; + } + + /* XX PE number is always 0 for now */ + + /* Program the values using the PHB lock */ + spin_lock_irqsave(&phb->lock, flags); + out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, + (tvt + i) | PCIE_REG_IODA_AD_TBL_TVT); + out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, tvt_data1); + out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, tvt_data0); + spin_unlock_irqrestore(&phb->lock, flags); + } + + /* Init bits and pieces */ + tbl->table.it_blocksize = 16; + tbl->table.it_offset = addr >> IOMMU_PAGE_SHIFT; + tbl->table.it_size = size >> IOMMU_PAGE_SHIFT; + + /* + * It's already blank but we clear it anyway. + * Consider an aditiona interface that makes cleaing optional + */ + iommu_init_table(&tbl->table, nid); + + list_add(&tbl->link, &phb->dma_tables); + return tbl; + + fail: + pr_debug(" Failed to allocate a 256M TCE table !\n"); + for (i = 0; i < tvts_per_table; i++) + if (tbl->tces[i]) + __free_pages(tbl->tces[i], get_order(0x80000)); + kfree(tbl); + return ERR_PTR(-ENOMEM); +} + +static void __devinit wsp_pci_dma_dev_setup(struct pci_dev *pdev) +{ + struct dev_archdata *archdata = &pdev->dev.archdata; + struct pci_controller *hose = pci_bus_to_host(pdev->bus); + struct wsp_phb *phb = hose->private_data; + struct wsp_dma_table *table = NULL; + unsigned long flags; + int i; + + /* Don't assign an iommu table to a bridge */ + if (pdev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + return; + + pr_debug("%s: Setting up DMA...\n", pci_name(pdev)); + + spin_lock_irqsave(&phb->lock, flags); + + /* If only one region, check if it already exist */ + if (phb->dma32_num_regions == 1) { + spin_unlock_irqrestore(&phb->lock, flags); + if (list_empty(&phb->dma_tables)) + table = wsp_pci_create_dma32_table(phb, 0, NULL); + else + table = list_first_entry(&phb->dma_tables, + struct wsp_dma_table, + link); + } else { + /* else find a free region */ + for (i = 0; i < phb->dma32_num_regions && !table; i++) { + if (__test_and_set_bit(i, &phb->dma32_map)) + continue; + spin_unlock_irqrestore(&phb->lock, flags); + table = wsp_pci_create_dma32_table(phb, i, pdev); + } + } + + /* Check if we got an error */ + if (IS_ERR(table)) { + pr_err("%s: Failed to create DMA table, err %ld !\n", + pci_name(pdev), PTR_ERR(table)); + return; + } + + /* Or a valid table */ + if (table) { + pr_info("%s: Setup iommu: 32-bit DMA region 0x%08lx..0x%08lx\n", + pci_name(pdev), + table->table.it_offset << IOMMU_PAGE_SHIFT, + (table->table.it_offset << IOMMU_PAGE_SHIFT) + + phb->dma32_region_size - 1); + archdata->dma_data.iommu_table_base = &table->table; + return; + } + + /* Or no room */ + spin_unlock_irqrestore(&phb->lock, flags); + pr_err("%s: Out of DMA space !\n", pci_name(pdev)); +} + +static void __init wsp_pcie_configure_hw(struct pci_controller *hose) +{ + u64 val; + int i; + +#define DUMP_REG(x) \ + pr_debug("%-30s : 0x%016llx\n", #x, in_be64(hose->cfg_data + x)) + +#ifdef CONFIG_WSP_DD1_WORKAROUND_BAD_PCIE_CLASS + /* WSP DD1 has a bogus class code by default in the PCI-E + * root complex's built-in P2P bridge */ + val = in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1); + pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", val); + out_be64(hose->cfg_data + PCIE_REG_SYS_CFG1, + (val & ~PCIE_REG_SYS_CFG1_CLASS_CODE) | (PCI_CLASS_BRIDGE_PCI << 8)); + pr_debug("PCI-E SYS_CFG1 : 0x%llx\n", in_be64(hose->cfg_data + PCIE_REG_SYS_CFG1)); +#endif /* CONFIG_WSP_DD1_WORKAROUND_BAD_PCIE_CLASS */ + +#ifdef CONFIG_WSP_DD1_WORKAROUND_DD1_TCE_BUGS + /* XXX Disable TCE caching, it doesn't work on DD1 */ + out_be64(hose->cfg_data + 0xe50, + in_be64(hose->cfg_data + 0xe50) | (3ull << 62)); + printk("PCI-E DEBUG CONTROL 5 = 0x%llx\n", in_be64(hose->cfg_data + 0xe50)); +#endif + + /* Configure M32A and IO. IO is hard wired to be 1M for now */ + out_be64(hose->cfg_data + PCIE_REG_IO_BASE_ADDR, hose->io_base_phys); + out_be64(hose->cfg_data + PCIE_REG_IO_BASE_MASK, + (~(hose->io_resource.end - hose->io_resource.start)) & + 0x3fffffff000ul); + out_be64(hose->cfg_data + PCIE_REG_IO_START_ADDR, 0 | 1); + + out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_ADDR, + hose->mem_resources[0].start); + printk("Want to write to M32A_BASE_MASK : 0x%llx\n", + (~(hose->mem_resources[0].end - + hose->mem_resources[0].start)) & 0x3ffffff0000ul); + out_be64(hose->cfg_data + PCIE_REG_M32A_BASE_MASK, + (~(hose->mem_resources[0].end - + hose->mem_resources[0].start)) & 0x3ffffff0000ul); + out_be64(hose->cfg_data + PCIE_REG_M32A_START_ADDR, + (hose->mem_resources[0].start - hose->pci_mem_offset) | 1); + + /* Clear all TVT entries + * + * XX Might get TVT count from device-tree + */ + for (i = 0; i < IODA_TVT_COUNT; i++) { + out_be64(hose->cfg_data + PCIE_REG_IODA_ADDR, + PCIE_REG_IODA_AD_TBL_TVT | i); + out_be64(hose->cfg_data + PCIE_REG_IODA_DATA1, 0); + out_be64(hose->cfg_data + PCIE_REG_IODA_DATA0, 0); + } + + /* Kill the TCE cache */ + out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, + in_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG) | + PCIE_REG_PHBC_64B_TCE_EN); + + /* Enable 32 & 64-bit MSIs, IO space and M32A */ + val = PCIE_REG_PHBC_32BIT_MSI_EN | + PCIE_REG_PHBC_IO_EN | + PCIE_REG_PHBC_64BIT_MSI_EN | + PCIE_REG_PHBC_M32A_EN; + if (iommu_is_off) + val |= PCIE_REG_PHBC_DMA_XLATE_BYPASS; + pr_debug("Will write config: 0x%llx\n", val); + out_be64(hose->cfg_data + PCIE_REG_PHB_CONFIG, val); + + /* Enable error reporting */ + out_be64(hose->cfg_data + 0xe00, + in_be64(hose->cfg_data + 0xe00) | 0x0008000000000000ull); + + /* Mask an error that's generated when doing config space probe + * + * XXX Maybe we should only mask it around config space cycles... that or + * ignore it when we know we had a config space cycle recently ? + */ + out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS_MASK, 0x8000000000000000ull); + out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS_MASK, 0x8000000000000000ull); + + /* Enable UTL errors, for now, all of them got to UTL irq 1 + * + * We similarily mask one UTL error caused apparently during normal + * probing. We also mask the link up error + */ + out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_ERR_SEV, 0); + out_be64(hose->cfg_data + PCIE_UTL_RC_ERR_SEVERITY, 0); + out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_ERROR_SEV, 0); + out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_IRQ_EN, 0xffffffff00000000ull); + out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_IRQ_EN, 0xff5fffff00000000ull); + out_be64(hose->cfg_data + PCIE_UTL_EP_ERR_IRQ_EN, 0xffffffff00000000ull); + + DUMP_REG(PCIE_REG_IO_BASE_ADDR); + DUMP_REG(PCIE_REG_IO_BASE_MASK); + DUMP_REG(PCIE_REG_IO_START_ADDR); + DUMP_REG(PCIE_REG_M32A_BASE_ADDR); + DUMP_REG(PCIE_REG_M32A_BASE_MASK); + DUMP_REG(PCIE_REG_M32A_START_ADDR); + DUMP_REG(PCIE_REG_M32B_BASE_ADDR); + DUMP_REG(PCIE_REG_M32B_BASE_MASK); + DUMP_REG(PCIE_REG_M32B_START_ADDR); + DUMP_REG(PCIE_REG_M64_BASE_ADDR); + DUMP_REG(PCIE_REG_M64_BASE_MASK); + DUMP_REG(PCIE_REG_M64_START_ADDR); + DUMP_REG(PCIE_REG_PHB_CONFIG); +} + +static void wsp_pci_wait_io_idle(struct wsp_phb *phb, unsigned long port) +{ + u64 val; + int i; + + for (i = 0; i < 10000; i++) { + val = in_be64(phb->hose->cfg_data + 0xe08); + if ((val & 0x1900000000000000ull) == 0x0100000000000000ull) + return; + udelay(1); + } + pr_warning("PCI IO timeout on domain %d port 0x%lx\n", + phb->hose->global_number, port); +} + +#define DEF_PCI_AC_RET_pio(name, ret, at, al, aa) \ +static ret wsp_pci_##name at \ +{ \ + struct iowa_bus *bus; \ + struct wsp_phb *phb; \ + unsigned long flags; \ + ret rval; \ + bus = iowa_pio_find_bus(aa); \ + WARN_ON(!bus); \ + phb = bus->private; \ + spin_lock_irqsave(&phb->lock, flags); \ + wsp_pci_wait_io_idle(phb, aa); \ + rval = __do_##name al; \ + spin_unlock_irqrestore(&phb->lock, flags); \ + return rval; \ +} + +#define DEF_PCI_AC_NORET_pio(name, at, al, aa) \ +static void wsp_pci_##name at \ +{ \ + struct iowa_bus *bus; \ + struct wsp_phb *phb; \ + unsigned long flags; \ + bus = iowa_pio_find_bus(aa); \ + WARN_ON(!bus); \ + phb = bus->private; \ + spin_lock_irqsave(&phb->lock, flags); \ + wsp_pci_wait_io_idle(phb, aa); \ + __do_##name al; \ + spin_unlock_irqrestore(&phb->lock, flags); \ +} + +#define DEF_PCI_AC_RET_mem(name, ret, at, al, aa) +#define DEF_PCI_AC_NORET_mem(name, at, al, aa) + +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ + DEF_PCI_AC_RET_##space(name, ret, at, al, aa) + +#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ + DEF_PCI_AC_NORET_##space(name, at, al, aa) \ + + +#include <asm/io-defs.h> + +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET + +static struct ppc_pci_io wsp_pci_iops = { + .inb = wsp_pci_inb, + .inw = wsp_pci_inw, + .inl = wsp_pci_inl, + .outb = wsp_pci_outb, + .outw = wsp_pci_outw, + .outl = wsp_pci_outl, + .insb = wsp_pci_insb, + .insw = wsp_pci_insw, + .insl = wsp_pci_insl, + .outsb = wsp_pci_outsb, + .outsw = wsp_pci_outsw, + .outsl = wsp_pci_outsl, +}; + +static int __init wsp_setup_one_phb(struct device_node *np) +{ + struct pci_controller *hose; + struct wsp_phb *phb; + + pr_info("PCI: Setting up PCIe host bridge 0x%s\n", np->full_name); + + phb = zalloc_maybe_bootmem(sizeof(struct wsp_phb), GFP_KERNEL); + if (!phb) + return -ENOMEM; + hose = pcibios_alloc_controller(np); + if (!hose) { + /* Can't really free the phb */ + return -ENOMEM; + } + hose->private_data = phb; + phb->hose = hose; + + INIT_LIST_HEAD(&phb->dma_tables); + spin_lock_init(&phb->lock); + + /* XXX Use bus-range property ? */ + hose->first_busno = 0; + hose->last_busno = 0xff; + + /* We use cfg_data as the address for the whole bridge MMIO space + */ + hose->cfg_data = of_iomap(hose->dn, 0); + + pr_debug("PCIe registers mapped at 0x%p\n", hose->cfg_data); + + /* Get the ranges of the device-tree */ + pci_process_bridge_OF_ranges(hose, np, 0); + + /* XXX Force re-assigning of everything for now */ + pci_add_flags(PCI_REASSIGN_ALL_BUS | PCI_REASSIGN_ALL_RSRC | + PCI_ENABLE_PROC_DOMAINS); + pci_probe_only = 0; + + /* Calculate how the TCE space is divided */ + phb->dma32_base = 0; + phb->dma32_num_regions = NUM_DMA32_REGIONS; + if (phb->dma32_num_regions > MAX_TABLE_TVT_COUNT) { + pr_warning("IOMMU: Clamped to %d DMA32 regions\n", + MAX_TABLE_TVT_COUNT); + phb->dma32_num_regions = MAX_TABLE_TVT_COUNT; + } + phb->dma32_region_size = 0x80000000 / phb->dma32_num_regions; + + BUG_ON(!is_power_of_2(phb->dma32_region_size)); + + /* Setup config ops */ + hose->ops = &wsp_pcie_pci_ops; + + /* Configure the HW */ + wsp_pcie_configure_hw(hose); + + /* Instanciate IO workarounds */ + iowa_register_bus(hose, &wsp_pci_iops, NULL, phb); +#ifdef CONFIG_PCI_MSI + wsp_setup_phb_msi(hose); +#endif + + /* Add to global list */ + list_add(&phb->all, &wsp_phbs); + + return 0; +} + +void __init wsp_setup_pci(void) +{ + struct device_node *np; + int rc; + + /* Find host bridges */ + for_each_compatible_node(np, "pciex", PCIE_COMPATIBLE) { + rc = wsp_setup_one_phb(np); + if (rc) + pr_err("Failed to setup PCIe bridge %s, rc=%d\n", + np->full_name, rc); + } + + /* Establish device-tree linkage */ + pci_devs_phb_init(); + + /* Set DMA ops to use TCEs */ + if (iommu_is_off) { + pr_info("PCI-E: Disabled TCEs, using direct DMA\n"); + set_pci_dma_ops(&dma_direct_ops); + } else { + ppc_md.pci_dma_dev_setup = wsp_pci_dma_dev_setup; + ppc_md.tce_build = tce_build_wsp; + ppc_md.tce_free = tce_free_wsp; + set_pci_dma_ops(&dma_iommu_ops); + } +} + +#define err_debug(fmt...) pr_debug(fmt) +//#define err_debug(fmt...) + +static int __init wsp_pci_get_err_irq_no_dt(struct device_node *np) +{ + const u32 *prop; + int hw_irq; + + /* Ok, no interrupts property, let's try to find our child P2P */ + np = of_get_next_child(np, NULL); + if (np == NULL) + return 0; + + /* Grab it's interrupt map */ + prop = of_get_property(np, "interrupt-map", NULL); + if (prop == NULL) + return 0; + + /* Grab one of the interrupts in there, keep the low 4 bits */ + hw_irq = prop[5] & 0xf; + + /* 0..4 for PHB 0 and 5..9 for PHB 1 */ + if (hw_irq < 5) + hw_irq = 4; + else + hw_irq = 9; + hw_irq |= prop[5] & ~0xf; + + err_debug("PCI: Using 0x%x as error IRQ for %s\n", + hw_irq, np->parent->full_name); + return irq_create_mapping(NULL, hw_irq); +} + +static const struct { + u32 offset; + const char *name; +} wsp_pci_regs[] = { +#define DREG(x) { PCIE_REG_##x, #x } +#define DUTL(x) { PCIE_UTL_##x, "UTL_" #x } + /* Architected registers except CONFIG_ and IODA + * to avoid side effects + */ + DREG(DMA_CHAN_STATUS), + DREG(CPU_LOADSTORE_STATUS), + DREG(LOCK0), + DREG(LOCK1), + DREG(PHB_CONFIG), + DREG(IO_BASE_ADDR), + DREG(IO_BASE_MASK), + DREG(IO_START_ADDR), + DREG(M32A_BASE_ADDR), + DREG(M32A_BASE_MASK), + DREG(M32A_START_ADDR), + DREG(M32B_BASE_ADDR), + DREG(M32B_BASE_MASK), + DREG(M32B_START_ADDR), + DREG(M64_BASE_ADDR), + DREG(M64_BASE_MASK), + DREG(M64_START_ADDR), + DREG(TCE_KILL), + DREG(LOCK2), + DREG(PHB_GEN_CAP), + DREG(PHB_TCE_CAP), + DREG(PHB_IRQ_CAP), + DREG(PHB_EEH_CAP), + DREG(PAPR_ERR_INJ_CONTROL), + DREG(PAPR_ERR_INJ_ADDR), + DREG(PAPR_ERR_INJ_MASK), + + /* UTL core regs */ + DUTL(SYS_BUS_CONTROL), + DUTL(STATUS), + DUTL(SYS_BUS_AGENT_STATUS), + DUTL(SYS_BUS_AGENT_ERR_SEV), + DUTL(SYS_BUS_AGENT_IRQ_EN), + DUTL(SYS_BUS_BURST_SZ_CONF), + DUTL(REVISION_ID), + DUTL(OUT_POST_HDR_BUF_ALLOC), + DUTL(OUT_POST_DAT_BUF_ALLOC), + DUTL(IN_POST_HDR_BUF_ALLOC), + DUTL(IN_POST_DAT_BUF_ALLOC), + DUTL(OUT_NP_BUF_ALLOC), + DUTL(IN_NP_BUF_ALLOC), + DUTL(PCIE_TAGS_ALLOC), + DUTL(GBIF_READ_TAGS_ALLOC), + + DUTL(PCIE_PORT_CONTROL), + DUTL(PCIE_PORT_STATUS), + DUTL(PCIE_PORT_ERROR_SEV), + DUTL(PCIE_PORT_IRQ_EN), + DUTL(RC_STATUS), + DUTL(RC_ERR_SEVERITY), + DUTL(RC_IRQ_EN), + DUTL(EP_STATUS), + DUTL(EP_ERR_SEVERITY), + DUTL(EP_ERR_IRQ_EN), + DUTL(PCI_PM_CTRL1), + DUTL(PCI_PM_CTRL2), + + /* PCIe stack regs */ + DREG(SYSTEM_CONFIG1), + DREG(SYSTEM_CONFIG2), + DREG(EP_SYSTEM_CONFIG), + DREG(EP_FLR), + DREG(EP_BAR_CONFIG), + DREG(LINK_CONFIG), + DREG(PM_CONFIG), + DREG(DLP_CONTROL), + DREG(DLP_STATUS), + DREG(ERR_REPORT_CONTROL), + DREG(SLOT_CONTROL1), + DREG(SLOT_CONTROL2), + DREG(UTL_CONFIG), + DREG(BUFFERS_CONFIG), + DREG(ERROR_INJECT), + DREG(SRIOV_CONFIG), + DREG(PF0_SRIOV_STATUS), + DREG(PF1_SRIOV_STATUS), + DREG(PORT_NUMBER), + DREG(POR_SYSTEM_CONFIG), + + /* Internal logic regs */ + DREG(PHB_VERSION), + DREG(RESET), + DREG(PHB_CONTROL), + DREG(PHB_TIMEOUT_CONTROL1), + DREG(PHB_QUIESCE_DMA), + DREG(PHB_DMA_READ_TAG_ACTV), + DREG(PHB_TCE_READ_TAG_ACTV), + + /* FIR registers */ + DREG(LEM_FIR_ACCUM), + DREG(LEM_FIR_AND_MASK), + DREG(LEM_FIR_OR_MASK), + DREG(LEM_ACTION0), + DREG(LEM_ACTION1), + DREG(LEM_ERROR_MASK), + DREG(LEM_ERROR_AND_MASK), + DREG(LEM_ERROR_OR_MASK), + + /* Error traps registers */ + DREG(PHB_ERR_STATUS), + DREG(PHB_ERR_STATUS), + DREG(PHB_ERR1_STATUS), + DREG(PHB_ERR_INJECT), + DREG(PHB_ERR_LEM_ENABLE), + DREG(PHB_ERR_IRQ_ENABLE), + DREG(PHB_ERR_FREEZE_ENABLE), + DREG(PHB_ERR_SIDE_ENABLE), + DREG(PHB_ERR_LOG_0), + DREG(PHB_ERR_LOG_1), + DREG(PHB_ERR_STATUS_MASK), + DREG(PHB_ERR1_STATUS_MASK), + DREG(MMIO_ERR_STATUS), + DREG(MMIO_ERR1_STATUS), + DREG(MMIO_ERR_INJECT), + DREG(MMIO_ERR_LEM_ENABLE), + DREG(MMIO_ERR_IRQ_ENABLE), + DREG(MMIO_ERR_FREEZE_ENABLE), + DREG(MMIO_ERR_SIDE_ENABLE), + DREG(MMIO_ERR_LOG_0), + DREG(MMIO_ERR_LOG_1), + DREG(MMIO_ERR_STATUS_MASK), + DREG(MMIO_ERR1_STATUS_MASK), + DREG(DMA_ERR_STATUS), + DREG(DMA_ERR1_STATUS), + DREG(DMA_ERR_INJECT), + DREG(DMA_ERR_LEM_ENABLE), + DREG(DMA_ERR_IRQ_ENABLE), + DREG(DMA_ERR_FREEZE_ENABLE), + DREG(DMA_ERR_SIDE_ENABLE), + DREG(DMA_ERR_LOG_0), + DREG(DMA_ERR_LOG_1), + DREG(DMA_ERR_STATUS_MASK), + DREG(DMA_ERR1_STATUS_MASK), + + /* Debug and Trace registers */ + DREG(PHB_DEBUG_CONTROL0), + DREG(PHB_DEBUG_STATUS0), + DREG(PHB_DEBUG_CONTROL1), + DREG(PHB_DEBUG_STATUS1), + DREG(PHB_DEBUG_CONTROL2), + DREG(PHB_DEBUG_STATUS2), + DREG(PHB_DEBUG_CONTROL3), + DREG(PHB_DEBUG_STATUS3), + DREG(PHB_DEBUG_CONTROL4), + DREG(PHB_DEBUG_STATUS4), + DREG(PHB_DEBUG_CONTROL5), + DREG(PHB_DEBUG_STATUS5), + + /* Don't seem to exist ... + DREG(PHB_DEBUG_CONTROL6), + DREG(PHB_DEBUG_STATUS6), + */ +}; + +static int wsp_pci_regs_show(struct seq_file *m, void *private) +{ + struct wsp_phb *phb = m->private; + struct pci_controller *hose = phb->hose; + int i; + + for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { + /* Skip write-only regs */ + if (wsp_pci_regs[i].offset == 0xc08 || + wsp_pci_regs[i].offset == 0xc10 || + wsp_pci_regs[i].offset == 0xc38 || + wsp_pci_regs[i].offset == 0xc40) + continue; + seq_printf(m, "0x%03x: 0x%016llx %s\n", + wsp_pci_regs[i].offset, + in_be64(hose->cfg_data + wsp_pci_regs[i].offset), + wsp_pci_regs[i].name); + } + return 0; +} + +static int wsp_pci_regs_open(struct inode *inode, struct file *file) +{ + return single_open(file, wsp_pci_regs_show, inode->i_private); +} + +static const struct file_operations wsp_pci_regs_fops = { + .open = wsp_pci_regs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int wsp_pci_reg_set(void *data, u64 val) +{ + out_be64((void __iomem *)data, val); + return 0; +} + +static int wsp_pci_reg_get(void *data, u64 *val) +{ + *val = in_be64((void __iomem *)data); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(wsp_pci_reg_fops, wsp_pci_reg_get, wsp_pci_reg_set, "0x%llx\n"); + +static irqreturn_t wsp_pci_err_irq(int irq, void *dev_id) +{ + struct wsp_phb *phb = dev_id; + struct pci_controller *hose = phb->hose; + irqreturn_t handled = IRQ_NONE; + struct wsp_pcie_err_log_data ed; + + pr_err("PCI: Error interrupt on %s (PHB %d)\n", + hose->dn->full_name, hose->global_number); + again: + memset(&ed, 0, sizeof(ed)); + + /* Read and clear UTL errors */ + ed.utl_sys_err = in_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS); + if (ed.utl_sys_err) + out_be64(hose->cfg_data + PCIE_UTL_SYS_BUS_AGENT_STATUS, ed.utl_sys_err); + ed.utl_port_err = in_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS); + if (ed.utl_port_err) + out_be64(hose->cfg_data + PCIE_UTL_PCIE_PORT_STATUS, ed.utl_port_err); + ed.utl_rc_err = in_be64(hose->cfg_data + PCIE_UTL_RC_STATUS); + if (ed.utl_rc_err) + out_be64(hose->cfg_data + PCIE_UTL_RC_STATUS, ed.utl_rc_err); + + /* Read and clear main trap errors */ + ed.phb_err = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS); + if (ed.phb_err) { + ed.phb_err1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS); + ed.phb_log0 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_0); + ed.phb_log1 = in_be64(hose->cfg_data + PCIE_REG_PHB_ERR_LOG_1); + out_be64(hose->cfg_data + PCIE_REG_PHB_ERR1_STATUS, 0); + out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_STATUS, 0); + } + ed.mmio_err = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS); + if (ed.mmio_err) { + ed.mmio_err1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS); + ed.mmio_log0 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_0); + ed.mmio_log1 = in_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_LOG_1); + out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR1_STATUS, 0); + out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_STATUS, 0); + } + ed.dma_err = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS); + if (ed.dma_err) { + ed.dma_err1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS); + ed.dma_log0 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_0); + ed.dma_log1 = in_be64(hose->cfg_data + PCIE_REG_DMA_ERR_LOG_1); + out_be64(hose->cfg_data + PCIE_REG_DMA_ERR1_STATUS, 0); + out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_STATUS, 0); + } + + /* Now print things out */ + if (ed.phb_err) { + pr_err(" PHB Error Status : 0x%016llx\n", ed.phb_err); + pr_err(" PHB First Error Status: 0x%016llx\n", ed.phb_err1); + pr_err(" PHB Error Log 0 : 0x%016llx\n", ed.phb_log0); + pr_err(" PHB Error Log 1 : 0x%016llx\n", ed.phb_log1); + } + if (ed.mmio_err) { + pr_err(" MMIO Error Status : 0x%016llx\n", ed.mmio_err); + pr_err(" MMIO First Error Status: 0x%016llx\n", ed.mmio_err1); + pr_err(" MMIO Error Log 0 : 0x%016llx\n", ed.mmio_log0); + pr_err(" MMIO Error Log 1 : 0x%016llx\n", ed.mmio_log1); + } + if (ed.dma_err) { + pr_err(" DMA Error Status : 0x%016llx\n", ed.dma_err); + pr_err(" DMA First Error Status: 0x%016llx\n", ed.dma_err1); + pr_err(" DMA Error Log 0 : 0x%016llx\n", ed.dma_log0); + pr_err(" DMA Error Log 1 : 0x%016llx\n", ed.dma_log1); + } + if (ed.utl_sys_err) + pr_err(" UTL Sys Error Status : 0x%016llx\n", ed.utl_sys_err); + if (ed.utl_port_err) + pr_err(" UTL Port Error Status : 0x%016llx\n", ed.utl_port_err); + if (ed.utl_rc_err) + pr_err(" UTL RC Error Status : 0x%016llx\n", ed.utl_rc_err); + + /* Interrupts are caused by the error traps. If we had any error there + * we loop again in case the UTL buffered some new stuff between + * going there and going to the traps + */ + if (ed.dma_err || ed.mmio_err || ed.phb_err) { + handled = IRQ_HANDLED; + goto again; + } + return handled; +} + +static void __init wsp_setup_pci_err_reporting(struct wsp_phb *phb) +{ + struct pci_controller *hose = phb->hose; + int err_irq, i, rc; + char fname[16]; + + /* Create a debugfs file for that PHB */ + sprintf(fname, "phb%d", phb->hose->global_number); + phb->ddir = debugfs_create_dir(fname, powerpc_debugfs_root); + + /* Some useful debug output */ + if (phb->ddir) { + struct dentry *d = debugfs_create_dir("regs", phb->ddir); + char tmp[64]; + + for (i = 0; i < ARRAY_SIZE(wsp_pci_regs); i++) { + sprintf(tmp, "%03x_%s", wsp_pci_regs[i].offset, + wsp_pci_regs[i].name); + debugfs_create_file(tmp, 0600, d, + hose->cfg_data + wsp_pci_regs[i].offset, + &wsp_pci_reg_fops); + } + debugfs_create_file("all_regs", 0600, phb->ddir, phb, &wsp_pci_regs_fops); + } + + /* Find the IRQ number for that PHB */ + err_irq = irq_of_parse_and_map(hose->dn, 0); + if (err_irq == 0) + /* XXX Error IRQ lacking from device-tree */ + err_irq = wsp_pci_get_err_irq_no_dt(hose->dn); + if (err_irq == 0) { + pr_err("PCI: Failed to fetch error interrupt for %s\n", + hose->dn->full_name); + return; + } + /* Request it */ + rc = request_irq(err_irq, wsp_pci_err_irq, 0, "wsp_pci error", phb); + if (rc) { + pr_err("PCI: Failed to request interrupt for %s\n", + hose->dn->full_name); + } + /* Enable interrupts for all errors for now */ + out_be64(hose->cfg_data + PCIE_REG_PHB_ERR_IRQ_ENABLE, 0xffffffffffffffffull); + out_be64(hose->cfg_data + PCIE_REG_MMIO_ERR_IRQ_ENABLE, 0xffffffffffffffffull); + out_be64(hose->cfg_data + PCIE_REG_DMA_ERR_IRQ_ENABLE, 0xffffffffffffffffull); +} + +/* + * This is called later to hookup with the error interrupt + */ +static int __init wsp_setup_pci_late(void) +{ + struct wsp_phb *phb; + + list_for_each_entry(phb, &wsp_phbs, all) + wsp_setup_pci_err_reporting(phb); + + return 0; +} +arch_initcall(wsp_setup_pci_late); |