diff options
author | Jan Glauber <jang@linux.vnet.ibm.com> | 2012-11-29 13:05:05 +0100 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2012-11-30 17:47:21 +0100 |
commit | 9a4da8a5b109906a64bed5aaeb83bf4edb1f5888 (patch) | |
tree | 2788d7c8fe3e90333555435c7539d9f31d2c520e /arch/s390/pci/pci.c | |
parent | e56e4e87e370a0f121450d52337969aa1be21ff7 (diff) | |
download | linux-9a4da8a5b109906a64bed5aaeb83bf4edb1f5888.tar.gz linux-9a4da8a5b109906a64bed5aaeb83bf4edb1f5888.tar.bz2 linux-9a4da8a5b109906a64bed5aaeb83bf4edb1f5888.zip |
s390/pci: PCI adapter interrupts for MSI/MSI-X
Support PCI adapter interrupts using the Single-IRQ-mode. Single-IRQ-mode
disables an adapter IRQ automatically after delivering it until the SIC
instruction enables it again. This is used to reduce the number of IRQs
for streaming workloads.
Up to 64 MSI handlers can be registered per PCI function.
A hash table is used to map interrupt numbers to MSI descriptors.
The interrupt vector is scanned using the flogr instruction.
Only MSI/MSI-X interrupts are supported, no legacy INTs.
Signed-off-by: Jan Glauber <jang@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch/s390/pci/pci.c')
-rw-r--r-- | arch/s390/pci/pci.c | 464 |
1 files changed, 463 insertions, 1 deletions
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 70f6c56c8d0f..d11dc8a25f34 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -23,17 +23,25 @@ #include <linux/err.h> #include <linux/export.h> #include <linux/delay.h> +#include <linux/irq.h> +#include <linux/kernel_stat.h> #include <linux/seq_file.h> #include <linux/pci.h> #include <linux/msi.h> +#include <asm/isc.h> +#include <asm/airq.h> #include <asm/facility.h> #include <asm/pci_insn.h> #include <asm/pci_clp.h> #define DEBUG /* enable pr_debug */ +#define SIC_IRQ_MODE_ALL 0 +#define SIC_IRQ_MODE_SINGLE 1 + #define ZPCI_NR_DMA_SPACES 1 +#define ZPCI_MSI_VEC_BITS 6 #define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS /* list of all detected zpci devices */ @@ -43,12 +51,63 @@ DEFINE_MUTEX(zpci_list_lock); static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES); static DEFINE_SPINLOCK(zpci_domain_lock); +struct callback { + irq_handler_t handler; + void *data; +}; + +struct zdev_irq_map { + unsigned long aibv; /* AI bit vector */ + int msi_vecs; /* consecutive MSI-vectors used */ + int __unused; + struct callback cb[ZPCI_NR_MSI_VECS]; /* callback handler array */ + spinlock_t lock; /* protect callbacks against de-reg */ +}; + +struct intr_bucket { + /* amap of adapters, one bit per dev, corresponds to one irq nr */ + unsigned long *alloc; + /* AI summary bit, global page for all devices */ + unsigned long *aisb; + /* pointer to aibv and callback data in zdev */ + struct zdev_irq_map *imap[ZPCI_NR_DEVICES]; + /* protects the whole bucket struct */ + spinlock_t lock; +}; + +static struct intr_bucket *bucket; + +/* Adapter local summary indicator */ +static u8 *zpci_irq_si; + +static atomic_t irq_retries = ATOMIC_INIT(0); + /* I/O Map */ static DEFINE_SPINLOCK(zpci_iomap_lock); static DECLARE_BITMAP(zpci_iomap, ZPCI_IOMAP_MAX_ENTRIES); struct zpci_iomap_entry *zpci_iomap_start; EXPORT_SYMBOL_GPL(zpci_iomap_start); +/* highest irq summary bit */ +static int __read_mostly aisb_max; + +static struct kmem_cache *zdev_irq_cache; + +static inline int irq_to_msi_nr(unsigned int irq) +{ + return irq & ZPCI_MSI_MASK; +} + +static inline int irq_to_dev_nr(unsigned int irq) +{ + return irq >> ZPCI_MSI_VEC_BITS; +} + +static inline struct zdev_irq_map *get_imap(unsigned int irq) +{ + return bucket->imap[irq_to_dev_nr(irq)]; +} + struct zpci_dev *get_zdev(struct pci_dev *pdev) { return (struct zpci_dev *) pdev->sysdata; @@ -120,6 +179,67 @@ static int zpci_store_fib(struct zpci_dev *zdev, u8 *fc) return (cc) ? -EIO : 0; } +/* Modify PCI: Register adapter interruptions */ +static int zpci_register_airq(struct zpci_dev *zdev, unsigned int aisb, + u64 aibv) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); + struct zpci_fib *fib; + int rc; + + fib = (void *) get_zeroed_page(GFP_KERNEL); + if (!fib) + return -ENOMEM; + + fib->isc = PCI_ISC; + fib->noi = zdev->irq_map->msi_vecs; + fib->sum = 1; /* enable summary notifications */ + fib->aibv = aibv; + fib->aibvo = 0; /* every function has its own page */ + fib->aisb = (u64) bucket->aisb + aisb / 8; + fib->aisbo = aisb & ZPCI_MSI_MASK; + + rc = mpcifc_instr(req, fib); + pr_debug("%s mpcifc returned noi: %d\n", __func__, fib->noi); + + free_page((unsigned long) fib); + return rc; +} + +struct mod_pci_args { + u64 base; + u64 limit; + u64 iota; +}; + +static int mod_pci(struct zpci_dev *zdev, int fn, u8 dmaas, struct mod_pci_args *args) +{ + u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, fn); + struct zpci_fib *fib; + int rc; + + /* The FIB must be available even if it's not used */ + fib = (void *) get_zeroed_page(GFP_KERNEL); + if (!fib) + return -ENOMEM; + + fib->pba = args->base; + fib->pal = args->limit; + fib->iota = args->iota; + + rc = mpcifc_instr(req, fib); + free_page((unsigned long) fib); + return rc; +} + +/* Modify PCI: Unregister adapter interruptions */ +static int zpci_unregister_airq(struct zpci_dev *zdev) +{ + struct mod_pci_args args = { 0, 0, 0 }; + + return mod_pci(zdev, ZPCI_MOD_FC_DEREG_INT, 0, &args); +} + #define ZPCI_PCIAS_CFGSPC 15 static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len) @@ -150,6 +270,55 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len) return rc; } +void synchronize_irq(unsigned int irq) +{ + /* + * Not needed, the handler is protected by a lock and IRQs that occur + * after the handler is deleted are just NOPs. + */ +} +EXPORT_SYMBOL_GPL(synchronize_irq); + +void enable_irq(unsigned int irq) +{ + struct msi_desc *msi = irq_get_msi_desc(irq); + + zpci_msi_set_mask_bits(msi, 1, 0); +} +EXPORT_SYMBOL_GPL(enable_irq); + +void disable_irq(unsigned int irq) +{ + struct msi_desc *msi = irq_get_msi_desc(irq); + + zpci_msi_set_mask_bits(msi, 1, 1); +} +EXPORT_SYMBOL_GPL(disable_irq); + +void disable_irq_nosync(unsigned int irq) +{ + disable_irq(irq); +} +EXPORT_SYMBOL_GPL(disable_irq_nosync); + +unsigned long probe_irq_on(void) +{ + return 0; +} +EXPORT_SYMBOL_GPL(probe_irq_on); + +int probe_irq_off(unsigned long val) +{ + return 0; +} +EXPORT_SYMBOL_GPL(probe_irq_off); + +unsigned int probe_irq_mask(unsigned long val) +{ + return val; +} +EXPORT_SYMBOL_GPL(probe_irq_mask); + void __devinit pcibios_fixup_bus(struct pci_bus *bus) { } @@ -219,6 +388,155 @@ static struct pci_ops pci_root_ops = { .write = pci_write, }; +/* store the last handled bit to implement fair scheduling of devices */ +static DEFINE_PER_CPU(unsigned long, next_sbit); + +static void zpci_irq_handler(void *dont, void *need) +{ + unsigned long sbit, mbit, last = 0, start = __get_cpu_var(next_sbit); + int rescan = 0, max = aisb_max; + struct zdev_irq_map *imap; + + kstat_cpu(smp_processor_id()).irqs[IOINT_PCI]++; + sbit = start; + +scan: + /* find summary_bit */ + for_each_set_bit_left_cont(sbit, bucket->aisb, max) { + clear_bit(63 - (sbit & 63), bucket->aisb + (sbit >> 6)); + last = sbit; + + /* find vector bit */ + imap = bucket->imap[sbit]; + for_each_set_bit_left(mbit, &imap->aibv, imap->msi_vecs) { + kstat_cpu(smp_processor_id()).irqs[IOINT_MSI]++; + clear_bit(63 - mbit, &imap->aibv); + + spin_lock(&imap->lock); + if (imap->cb[mbit].handler) + imap->cb[mbit].handler(mbit, + imap->cb[mbit].data); + spin_unlock(&imap->lock); + } + } + + if (rescan) + goto out; + + /* scan the skipped bits */ + if (start > 0) { + sbit = 0; + max = start; + start = 0; + goto scan; + } + + /* enable interrupts again */ + sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + + /* check again to not lose initiative */ + rmb(); + max = aisb_max; + sbit = find_first_bit_left(bucket->aisb, max); + if (sbit != max) { + atomic_inc(&irq_retries); + rescan++; + goto scan; + } +out: + /* store next device bit to scan */ + __get_cpu_var(next_sbit) = (++last >= aisb_max) ? 0 : last; +} + +/* msi_vecs - number of requested interrupts, 0 place function to error state */ +static int zpci_setup_msi(struct pci_dev *pdev, int msi_vecs) +{ + struct zpci_dev *zdev = get_zdev(pdev); + unsigned int aisb, msi_nr; + struct msi_desc *msi; + int rc; + + /* store the number of used MSI vectors */ + zdev->irq_map->msi_vecs = min(msi_vecs, ZPCI_NR_MSI_VECS); + + spin_lock(&bucket->lock); + aisb = find_first_zero_bit(bucket->alloc, PAGE_SIZE); + /* alloc map exhausted? */ + if (aisb == PAGE_SIZE) { + spin_unlock(&bucket->lock); + return -EIO; + } + set_bit(aisb, bucket->alloc); + spin_unlock(&bucket->lock); + + zdev->aisb = aisb; + if (aisb + 1 > aisb_max) + aisb_max = aisb + 1; + + /* wire up IRQ shortcut pointer */ + bucket->imap[zdev->aisb] = zdev->irq_map; + pr_debug("%s: imap[%u] linked to %p\n", __func__, zdev->aisb, zdev->irq_map); + + /* TODO: irq number 0 wont be found if we return less than requested MSIs. + * ignore it for now and fix in common code. + */ + msi_nr = aisb << ZPCI_MSI_VEC_BITS; + + list_for_each_entry(msi, &pdev->msi_list, list) { + rc = zpci_setup_msi_irq(zdev, msi, msi_nr, + aisb << ZPCI_MSI_VEC_BITS); + if (rc) + return rc; + msi_nr++; + } + + rc = zpci_register_airq(zdev, aisb, (u64) &zdev->irq_map->aibv); + if (rc) { + clear_bit(aisb, bucket->alloc); + dev_err(&pdev->dev, "register MSI failed with: %d\n", rc); + return rc; + } + return (zdev->irq_map->msi_vecs == msi_vecs) ? + 0 : zdev->irq_map->msi_vecs; +} + +static void zpci_teardown_msi(struct pci_dev *pdev) +{ + struct zpci_dev *zdev = get_zdev(pdev); + struct msi_desc *msi; + int aisb, rc; + + rc = zpci_unregister_airq(zdev); + if (rc) { + dev_err(&pdev->dev, "deregister MSI failed with: %d\n", rc); + return; + } + + msi = list_first_entry(&pdev->msi_list, struct msi_desc, list); + aisb = irq_to_dev_nr(msi->irq); + + list_for_each_entry(msi, &pdev->msi_list, list) + zpci_teardown_msi_irq(zdev, msi); + + clear_bit(aisb, bucket->alloc); + if (aisb + 1 == aisb_max) + aisb_max--; +} + +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + pr_debug("%s: requesting %d MSI-X interrupts...", __func__, nvec); + if (type != PCI_CAP_ID_MSIX && type != PCI_CAP_ID_MSI) + return -EINVAL; + return zpci_setup_msi(pdev, nvec); +} + +void arch_teardown_msi_irqs(struct pci_dev *pdev) +{ + pr_info("%s: on pdev: %p\n", __func__, pdev); + zpci_teardown_msi(pdev); +} + static void zpci_map_resources(struct zpci_dev *zdev) { struct pci_dev *pdev = zdev->pdev; @@ -257,11 +575,23 @@ struct zpci_dev *zpci_alloc_device(void) zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); if (!zdev) return ERR_PTR(-ENOMEM); + + /* Alloc aibv & callback space */ + zdev->irq_map = kmem_cache_alloc(zdev_irq_cache, GFP_KERNEL); + if (!zdev->irq_map) + goto error; + memset(zdev->irq_map, 0, sizeof(*zdev->irq_map)); + WARN_ON((u64) zdev->irq_map & 0xff); return zdev; + +error: + kfree(zdev); + return ERR_PTR(-ENOMEM); } void zpci_free_device(struct zpci_dev *zdev) { + kmem_cache_free(zdev_irq_cache, zdev->irq_map); kfree(zdev); } @@ -320,6 +650,118 @@ void pcibios_disable_device(struct pci_dev *pdev) pdev->sysdata = NULL; } +int zpci_request_irq(unsigned int irq, irq_handler_t handler, void *data) +{ + int msi_nr = irq_to_msi_nr(irq); + struct zdev_irq_map *imap; + struct msi_desc *msi; + + msi = irq_get_msi_desc(irq); + if (!msi) + return -EIO; + + imap = get_imap(irq); + spin_lock_init(&imap->lock); + + pr_debug("%s: register handler for IRQ:MSI %d:%d\n", __func__, irq >> 6, msi_nr); + imap->cb[msi_nr].handler = handler; + imap->cb[msi_nr].data = data; + + /* + * The generic MSI code returns with the interrupt disabled on the + * card, using the MSI mask bits. Firmware doesn't appear to unmask + * at that level, so we do it here by hand. + */ + zpci_msi_set_mask_bits(msi, 1, 0); + return 0; +} + +void zpci_free_irq(unsigned int irq) +{ + struct zdev_irq_map *imap = get_imap(irq); + int msi_nr = irq_to_msi_nr(irq); + unsigned long flags; + + pr_debug("%s: for irq: %d\n", __func__, irq); + + spin_lock_irqsave(&imap->lock, flags); + imap->cb[msi_nr].handler = NULL; + imap->cb[msi_nr].data = NULL; + spin_unlock_irqrestore(&imap->lock, flags); +} + +int request_irq(unsigned int irq, irq_handler_t handler, + unsigned long irqflags, const char *devname, void *dev_id) +{ + pr_debug("%s: irq: %d handler: %p flags: %lx dev: %s\n", + __func__, irq, handler, irqflags, devname); + + return zpci_request_irq(irq, handler, dev_id); +} +EXPORT_SYMBOL_GPL(request_irq); + +void free_irq(unsigned int irq, void *dev_id) +{ + zpci_free_irq(irq); +} +EXPORT_SYMBOL_GPL(free_irq); + +static int __init zpci_irq_init(void) +{ + int cpu, rc; + + bucket = kzalloc(sizeof(*bucket), GFP_KERNEL); + if (!bucket) + return -ENOMEM; + + bucket->aisb = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!bucket->aisb) { + rc = -ENOMEM; + goto out_aisb; + } + + bucket->alloc = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!bucket->alloc) { + rc = -ENOMEM; + goto out_alloc; + } + + isc_register(PCI_ISC); + zpci_irq_si = s390_register_adapter_interrupt(&zpci_irq_handler, NULL, PCI_ISC); + if (IS_ERR(zpci_irq_si)) { + rc = PTR_ERR(zpci_irq_si); + zpci_irq_si = NULL; + goto out_ai; + } + + for_each_online_cpu(cpu) + per_cpu(next_sbit, cpu) = 0; + + spin_lock_init(&bucket->lock); + /* set summary to 1 to be called every time for the ISC */ + *zpci_irq_si = 1; + sic_instr(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC); + return 0; + +out_ai: + isc_unregister(PCI_ISC); + free_page((unsigned long) bucket->alloc); +out_alloc: + free_page((unsigned long) bucket->aisb); +out_aisb: + kfree(bucket); + return rc; +} + +static void zpci_irq_exit(void) +{ + free_page((unsigned long) bucket->alloc); + free_page((unsigned long) bucket->aisb); + s390_unregister_adapter_interrupt(zpci_irq_si, PCI_ISC); + isc_unregister(PCI_ISC); + kfree(bucket); +} + static struct resource *zpci_alloc_bus_resource(unsigned long start, unsigned long size, unsigned long flags, int domain) { @@ -523,13 +965,20 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { + zdev_irq_cache = kmem_cache_create("PCI_IRQ_cache", sizeof(struct zdev_irq_map), + L1_CACHE_BYTES, SLAB_HWCACHE_ALIGN, NULL); + if (!zdev_irq_cache) + goto error_zdev; + /* TODO: use realloc */ zpci_iomap_start = kzalloc(ZPCI_IOMAP_MAX_ENTRIES * sizeof(*zpci_iomap_start), GFP_KERNEL); if (!zpci_iomap_start) - goto error_zdev; + goto error_iomap; return 0; +error_iomap: + kmem_cache_destroy(zdev_irq_cache); error_zdev: return -ENOMEM; } @@ -537,6 +986,7 @@ error_zdev: static void zpci_mem_exit(void) { kfree(zpci_iomap_start); + kmem_cache_destroy(zdev_irq_cache); } unsigned int pci_probe = 1; @@ -570,6 +1020,14 @@ static int __init pci_base_init(void) if (rc) goto out_mem; + rc = zpci_msihash_init(); + if (rc) + goto out_hash; + + rc = zpci_irq_init(); + if (rc) + goto out_irq; + rc = clp_find_pci_devices(); if (rc) goto out_find; @@ -578,6 +1036,10 @@ static int __init pci_base_init(void) return 0; out_find: + zpci_irq_exit(); +out_irq: + zpci_msihash_exit(); +out_hash: zpci_mem_exit(); out_mem: return rc; |