From 674c6479b7bdc78528ea83dd43897e3161558b8b Mon Sep 17 00:00:00 2001 From: Colin Ngam Date: Wed, 3 Aug 2005 13:35:00 -0700 Subject: [IA64-SGI] Altix only: Add PCI Domain number support. This patch enables PCI Domain numbering on Altix. Signed-off-by: Colin Ngam Signed-off-by: Tony Luck --- arch/ia64/sn/include/xtalk/hubdev.h | 6 ++++-- arch/ia64/sn/kernel/io_init.c | 33 +++++++++++++++++++++++++++------ arch/ia64/sn/pci/pcibr/pcibr_provider.c | 11 +++++++---- arch/ia64/sn/pci/tioca_provider.c | 5 +++-- 4 files changed, 41 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h index 580a1c0403a7..ca4e250cdf27 100644 --- a/arch/ia64/sn/include/xtalk/hubdev.h +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -34,7 +34,8 @@ struct sn_flush_device_list { unsigned long sfdl_force_int_addr; unsigned long sfdl_flush_value; volatile unsigned long *sfdl_flush_addr; - uint64_t sfdl_persistent_busnum; + uint32_t sfdl_persistent_busnum; + uint32_t sfdl_persistent_segment; struct pcibus_info *sfdl_pcibus_info; spinlock_t sfdl_flush_lock; }; @@ -58,7 +59,8 @@ struct hubdev_info { void *hdi_nodepda; void *hdi_node_vertex; - void *hdi_xtalk_vertex; + uint32_t max_segment_number; + uint32_t max_pcibus_number; }; extern void hubdev_init_node(nodepda_t *, cnodeid_t); diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index a6649baf629a..829ea7985017 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -44,6 +44,9 @@ int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */ struct sn_pcibus_provider *sn_pci_provider[PCIIO_ASIC_MAX_TYPES]; /* indexed by asic type */ +static int max_segment_number = 0; /* Default highest segment number */ +static int max_pcibus_number = 255; /* Default highest pci bus number */ + /* * Hooks and struct for unsupported pci providers */ @@ -157,13 +160,28 @@ static void sn_fixup_ionodes(void) uint64_t nasid; int i, widget; + /* + * Get SGI Specific HUB chipset information. + * Inform Prom that this kernel can support domain bus numbering. + */ for (i = 0; i < numionodes; i++) { hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo); nasid = cnodeid_to_nasid(i); + hubdev->max_segment_number = 0xffffffff; + hubdev->max_pcibus_number = 0xff; status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev)); if (status) continue; + /* Save the largest Domain and pcibus numbers found. */ + if (hubdev->max_segment_number) { + /* + * Dealing with a Prom that supports segments. + */ + max_segment_number = hubdev->max_segment_number; + max_pcibus_number = hubdev->max_pcibus_number; + } + /* Attach the error interrupt handlers */ if (nasid & 1) ice_error_init(hubdev); @@ -229,7 +247,7 @@ void sn_pci_unfixup_slot(struct pci_dev *dev) void sn_pci_fixup_slot(struct pci_dev *dev) { int idx; - int segment = 0; + int segment = pci_domain_nr(dev->bus); int status = 0; struct pcibus_bussoft *bs; struct pci_bus *host_pci_bus; @@ -282,9 +300,9 @@ void sn_pci_fixup_slot(struct pci_dev *dev) * PCI host_pci_dev struct and set up host bus linkages */ - bus_no = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32; + bus_no = (SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32) & 0xff; devfn = SN_PCIDEV_INFO(dev)->pdi_slot_host_handle & 0xffffffff; - host_pci_bus = pci_find_bus(pci_domain_nr(dev->bus), bus_no); + host_pci_bus = pci_find_bus(segment, bus_no); host_pci_dev = pci_get_slot(host_pci_bus, devfn); SN_PCIDEV_INFO(dev)->host_pci_dev = host_pci_dev; @@ -332,6 +350,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) prom_bussoft_ptr = __va(prom_bussoft_ptr); controller = kcalloc(1,sizeof(struct pci_controller), GFP_KERNEL); + controller->segment = segment; if (!controller) BUG(); @@ -387,7 +406,7 @@ void sn_pci_controller_fixup(int segment, int busnum, struct pci_bus *bus) if (controller->node >= num_online_nodes()) { struct pcibus_bussoft *b = SN_PCIBUS_BUSSOFT(bus); - printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%lu" + printk(KERN_WARNING "Device ASIC=%u XID=%u PBUSNUM=%u" "L_IO=%lx L_MEM=%lx BASE=%lx\n", b->bs_asic_type, b->bs_xid, b->bs_persist_busnum, b->bs_legacy_io, b->bs_legacy_mem, b->bs_base); @@ -442,6 +461,7 @@ sn_sysdata_free_start: static int __init sn_pci_init(void) { int i = 0; + int j = 0; struct pci_dev *pci_dev = NULL; extern void sn_init_cpei_timer(void); #ifdef CONFIG_PROC_FS @@ -476,8 +496,9 @@ static int __init sn_pci_init(void) #endif /* busses are not known yet ... */ - for (i = 0; i < PCI_BUSES_TO_SCAN; i++) - sn_pci_controller_fixup(0, i, NULL); + for (i = 0; i <= max_segment_number; i++) + for (j = 0; j <= max_pcibus_number; j++) + sn_pci_controller_fixup(i, j, NULL); /* * Generic Linux PCI Layer has created the pci_bus and pci_dev diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index b95e928636a1..dfaf01e5be80 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -60,7 +60,7 @@ static int sal_pcibr_error_interrupt(struct pcibus_info *soft) ret_stuff.status = 0; ret_stuff.v0 = 0; - segment = 0; + segment = soft->pbi_buscommon.bs_persist_segment; busnum = soft->pbi_buscommon.bs_persist_busnum; SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_ERROR_INTERRUPT, @@ -142,9 +142,12 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont j++, sn_flush_device_list++) { if (sn_flush_device_list->sfdl_slot == -1) continue; - if (sn_flush_device_list-> - sfdl_persistent_busnum == - soft->pbi_buscommon.bs_persist_busnum) + if ((sn_flush_device_list-> + sfdl_persistent_segment == + soft->pbi_buscommon.bs_persist_segment) && + (sn_flush_device_list-> + sfdl_persistent_busnum == + soft->pbi_buscommon.bs_persist_busnum)) sn_flush_device_list->sfdl_pcibus_info = soft; } diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 5d76a7581465..2fef7c10d6e5 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -559,7 +559,7 @@ tioca_error_intr_handler(int irq, void *arg, struct pt_regs *pt) ret_stuff.status = 0; ret_stuff.v0 = 0; - segment = 0; + segment = soft->ca_common.bs_persist_segment; busnum = soft->ca_common.bs_persist_busnum; SAL_CALL_NOLOCK(ret_stuff, @@ -622,7 +622,8 @@ tioca_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont nasid_to_cnodeid(tioca_common->ca_closest_nasid); tioca_common->ca_kernel_private = (uint64_t) tioca_kern; - bus = pci_find_bus(0, tioca_common->ca_common.bs_persist_busnum); + bus = pci_find_bus(tioca_common->ca_common.bs_persist_segment, + tioca_common->ca_common.bs_persist_busnum); BUG_ON(!bus); tioca_kern->ca_devices = &bus->devices; -- cgit v1.2.3 From 89963d16dc50a5d91ed09914a1232d59e6461fd6 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:06:00 -0700 Subject: [IA64-SGI] altix: cosmetic rename of SGI_PCIBR_ERROR Cosmetic altix patch to rename SGI_PCIBR_ERROR to something more generic and remove a duplicate #define. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index dfaf01e5be80..f8cfe8acdc3c 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -115,7 +115,7 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont /* * register the bridge's error interrupt handler */ - if (request_irq(SGI_PCIBR_ERROR, (void *)pcibr_error_intr_handler, + if (request_irq(SGI_PCIASIC_ERROR, (void *)pcibr_error_intr_handler, SA_SHIRQ, "PCIBR error", (void *)(soft))) { printk(KERN_WARNING "pcibr cannot allocate interrupt for error handler\n"); -- cgit v1.2.3 From 735e60f4c67823a3e01655c990296e2e56574885 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:06:00 -0700 Subject: [IA64-SGI] abstract force_interrupt() mechanism Altix patch to abstract the force_interrupt() mechanism away from the pcibr provider. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/irq.c | 50 ++++++++++++++++++--------------- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 4 +++ arch/ia64/sn/pci/tioca_provider.c | 1 + 3 files changed, 33 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 84d276a14ecb..af061dba246f 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -317,6 +317,16 @@ void sn_irq_unfixup(struct pci_dev *pci_dev) pci_dev_put(pci_dev); } +static inline void +sn_call_force_intr_provider(struct sn_irq_info *sn_irq_info) +{ + struct sn_pcibus_provider *pci_provider; + + pci_provider = sn_pci_provider[sn_irq_info->irq_bridge_type]; + if (pci_provider && pci_provider->force_interrupt) + (*pci_provider->force_interrupt)(sn_irq_info); +} + static void force_interrupt(int irq) { struct sn_irq_info *sn_irq_info; @@ -325,11 +335,9 @@ static void force_interrupt(int irq) return; rcu_read_lock(); - list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) { - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) - pcibr_force_interrupt(sn_irq_info); - } + list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[irq], list) + sn_call_force_intr_provider(sn_irq_info); + rcu_read_unlock(); } @@ -351,6 +359,14 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) struct pcidev_info *pcidev_info; struct pcibus_info *pcibus_info; + /* + * Bridge types attached to TIO (anything but PIC) do not need this WAR + * since they do not target Shub II interrupt registers. If that + * ever changes, this check needs to accomodate. + */ + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_PIC) + return; + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; if (!pcidev_info) return; @@ -377,16 +393,12 @@ static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info) break; } if (!test_bit(irr_bit, &irr_reg)) { - if (!test_bit(irq, pda->sn_soft_irr)) { - if (!test_bit(irq, pda->sn_in_service_ivecs)) { - regval &= 0xff; - if (sn_irq_info->irq_int_bit & regval & - sn_irq_info->irq_last_intr) { - regval &= - ~(sn_irq_info-> - irq_int_bit & regval); - pcibr_force_interrupt(sn_irq_info); - } + if (!test_bit(irq, pda->sn_in_service_ivecs)) { + regval &= 0xff; + if (sn_irq_info->irq_int_bit & regval & + sn_irq_info->irq_last_intr) { + regval &= ~(sn_irq_info->irq_int_bit & regval); + sn_call_force_intr_provider(sn_irq_info); } } } @@ -404,13 +416,7 @@ void sn_lb_int_war_check(void) rcu_read_lock(); for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) { list_for_each_entry_rcu(sn_irq_info, sn_irq_lh[i], list) { - /* - * Only call for PCI bridges that are fully - * initialized. - */ - if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) && - (sn_irq_info->irq_bridge != NULL)) - sn_check_intr(i, sn_irq_info); + sn_check_intr(i, sn_irq_info); } } rcu_read_unlock(); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index f8cfe8acdc3c..ff9c7de925d1 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -178,6 +178,9 @@ void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info) struct pcibus_info *pcibus_info; int bit = sn_irq_info->irq_int_bit; + if (! sn_irq_info->irq_bridge) + return; + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; if (pcidev_info) { pcibus_info = @@ -222,6 +225,7 @@ struct sn_pcibus_provider pcibr_provider = { .dma_map_consistent = pcibr_dma_map_consistent, .dma_unmap = pcibr_dma_unmap, .bus_fixup = pcibr_bus_fixup, + .force_interrupt = pcibr_force_interrupt }; int diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 2fef7c10d6e5..4ea04cfa30ff 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -657,6 +657,7 @@ static struct sn_pcibus_provider tioca_pci_interfaces = { .dma_map_consistent = tioca_dma_map, .dma_unmap = tioca_dma_unmap, .bus_fixup = tioca_bus_fixup, + .force_interrupt = NULL }; /** -- cgit v1.2.3 From 5b53ed1f2ed6c85e2b1c39d97cc112ea32004609 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:06:00 -0700 Subject: [IA64-SGI] add support for TIO huge-window Altix patch to add TIO "huge-window" address support to sn_dma_flush(). Update copyright in affected files. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/include/tio.h | 6 +++- arch/ia64/sn/include/xtalk/hubdev.h | 5 +++- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 55 ++++++++++++++++++++++--------------- 3 files changed, 42 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/include/tio.h b/arch/ia64/sn/include/tio.h index 0139124dd54a..6b2e7b75eb19 100644 --- a/arch/ia64/sn/include/tio.h +++ b/arch/ia64/sn/include/tio.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_TIO_H @@ -26,6 +26,10 @@ #define TIO_ITTE_VALID_MASK 0x1 #define TIO_ITTE_VALID_SHIFT 16 +#define TIO_ITTE_WIDGET(itte) \ + (((itte) >> TIO_ITTE_WIDGET_SHIFT) & TIO_ITTE_WIDGET_MASK) +#define TIO_ITTE_VALID(itte) \ + (((itte) >> TIO_ITTE_VALID_SHIFT) & TIO_ITTE_VALID_MASK) #define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \ REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \ diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h index ca4e250cdf27..71c2b271b4c6 100644 --- a/arch/ia64/sn/include/xtalk/hubdev.h +++ b/arch/ia64/sn/include/xtalk/hubdev.h @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 1992 - 1997, 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #ifndef _ASM_IA64_SN_XTALK_HUBDEV_H #define _ASM_IA64_SN_XTALK_HUBDEV_H @@ -16,6 +16,9 @@ #define IIO_ITTE_WIDGET_MASK ((1<> IIO_ITTE_WIDGET_SHIFT) & IIO_ITTE_WIDGET_MASK) + /* * Use the top big window as a surrogate for the first small window */ diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index b058dc2a0b9d..ae455b6b1897 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2001-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -215,8 +215,8 @@ void sn_dma_flush(uint64_t addr) int is_tio; int wid_num; int i, j; - int bwin; uint64_t flags; + uint64_t itte; struct hubdev_info *hubinfo; volatile struct sn_flush_device_list *p; struct sn_flush_nasid_entry *flush_nasid_list; @@ -233,31 +233,36 @@ void sn_dma_flush(uint64_t addr) if (!hubinfo) { BUG(); } - is_tio = (nasid & 1); - if (is_tio) { - wid_num = TIO_SWIN_WIDGETNUM(addr); - bwin = TIO_BWIN_WINDOWNUM(addr); - } else { - wid_num = SWIN_WIDGETNUM(addr); - bwin = BWIN_WINDOWNUM(addr); - } flush_nasid_list = &hubinfo->hdi_flush_nasid_list; if (flush_nasid_list->widget_p == NULL) return; - if (bwin > 0) { - uint64_t itte = flush_nasid_list->iio_itte[bwin]; - if (is_tio) { - wid_num = (itte >> TIO_ITTE_WIDGET_SHIFT) & - TIO_ITTE_WIDGET_MASK; - } else { - wid_num = (itte >> IIO_ITTE_WIDGET_SHIFT) & - IIO_ITTE_WIDGET_MASK; - } + is_tio = (nasid & 1); + if (is_tio) { + int itte_index; + + if (TIO_HWIN(addr)) + itte_index = 0; + else if (TIO_BWIN_WINDOWNUM(addr)) + itte_index = TIO_BWIN_WINDOWNUM(addr); + else + itte_index = -1; + + if (itte_index >= 0) { + itte = flush_nasid_list->iio_itte[itte_index]; + if (! TIO_ITTE_VALID(itte)) + return; + wid_num = TIO_ITTE_WIDGET(itte); + } else + wid_num = TIO_SWIN_WIDGETNUM(addr); + } else { + if (BWIN_WINDOWNUM(addr)) { + itte = flush_nasid_list->iio_itte[BWIN_WINDOWNUM(addr)]; + wid_num = IIO_ITTE_WIDGET(itte); + } else + wid_num = SWIN_WIDGETNUM(addr); } - if (flush_nasid_list->widget_p == NULL) - return; if (flush_nasid_list->widget_p[wid_num] == NULL) return; p = &flush_nasid_list->widget_p[wid_num][0]; @@ -283,9 +288,15 @@ void sn_dma_flush(uint64_t addr) /* * For TIOCP use the Device(x) Write Request Buffer Flush Bridge * register since it ensures the data has entered the coherence - * domain, unlike PIC + * domain, unlike PIC. */ if (is_tio) { + /* + * Note: devices behind TIOCE should never be matched in the + * above code, and so the following code is PIC/CP centric. + * If CE ever needs the sn_dma_flush mechanism, we will have + * to account for that here and in tioce_bus_fixup(). + */ uint32_t tio_id = REMOTE_HUB_L(nasid, TIO_NODE_ID); uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id); -- cgit v1.2.3 From c9221da9f2796f082642c3498edb2c8783ad4774 Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Wed, 3 Aug 2005 14:07:00 -0700 Subject: [IA64-SGI] sn pci provider for TIOCE (pci Altix patch to add an SN pci provider for TIOCE, which is SGI's PCI Express implementation. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/io_init.c | 2 + arch/ia64/sn/pci/Makefile | 2 +- arch/ia64/sn/pci/tioce_provider.c | 733 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 736 insertions(+), 1 deletion(-) create mode 100644 arch/ia64/sn/pci/tioce_provider.c (limited to 'arch') diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c index 829ea7985017..d1fc09b1d518 100644 --- a/arch/ia64/sn/kernel/io_init.c +++ b/arch/ia64/sn/kernel/io_init.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "xtalk/hubdev.h" #include "xtalk/xwidgetdev.h" @@ -481,6 +482,7 @@ static int __init sn_pci_init(void) pcibr_init_provider(); tioca_init_provider(); + tioce_init_provider(); /* * This is needed to avoid bounce limit checks in the blk layer diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile index 2f915bce25f9..321576b1b425 100644 --- a/arch/ia64/sn/pci/Makefile +++ b/arch/ia64/sn/pci/Makefile @@ -7,4 +7,4 @@ # # Makefile for the sn pci general routines. -obj-y := pci_dma.o tioca_provider.o pcibr/ +obj-y := pci_dma.o tioca_provider.o tioce_provider.o pcibr/ diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c new file mode 100644 index 000000000000..d9081369cf96 --- /dev/null +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -0,0 +1,733 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2003-2005 Silicon Graphics, Inc. All Rights Reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/** + * Bus address ranges for the 5 flavors of TIOCE DMA + */ + +#define TIOCE_D64_MIN 0x8000000000000000UL +#define TIOCE_D64_MAX 0xffffffffffffffffUL +#define TIOCE_D64_ADDR(a) ((a) >= TIOCE_D64_MIN) + +#define TIOCE_D32_MIN 0x0000000080000000UL +#define TIOCE_D32_MAX 0x00000000ffffffffUL +#define TIOCE_D32_ADDR(a) ((a) >= TIOCE_D32_MIN && (a) <= TIOCE_D32_MAX) + +#define TIOCE_M32_MIN 0x0000000000000000UL +#define TIOCE_M32_MAX 0x000000007fffffffUL +#define TIOCE_M32_ADDR(a) ((a) >= TIOCE_M32_MIN && (a) <= TIOCE_M32_MAX) + +#define TIOCE_M40_MIN 0x0000004000000000UL +#define TIOCE_M40_MAX 0x0000007fffffffffUL +#define TIOCE_M40_ADDR(a) ((a) >= TIOCE_M40_MIN && (a) <= TIOCE_M40_MAX) + +#define TIOCE_M40S_MIN 0x0000008000000000UL +#define TIOCE_M40S_MAX 0x000000ffffffffffUL +#define TIOCE_M40S_ADDR(a) ((a) >= TIOCE_M40S_MIN && (a) <= TIOCE_M40S_MAX) + +/* + * ATE manipulation macros. + */ + +#define ATE_PAGESHIFT(ps) (__ffs(ps)) +#define ATE_PAGEMASK(ps) ((ps)-1) + +#define ATE_PAGE(x, ps) ((x) >> ATE_PAGESHIFT(ps)) +#define ATE_NPAGES(start, len, pagesize) \ + (ATE_PAGE((start)+(len)-1, pagesize) - ATE_PAGE(start, pagesize) + 1) + +#define ATE_VALID(ate) ((ate) & (1UL << 63)) +#define ATE_MAKE(addr, ps) (((addr) & ~ATE_PAGEMASK(ps)) | (1UL << 63)) + +/* + * Flavors of ate-based mapping supported by tioce_alloc_map() + */ + +#define TIOCE_ATE_M32 1 +#define TIOCE_ATE_M40 2 +#define TIOCE_ATE_M40S 3 + +#define KB(x) ((x) << 10) +#define MB(x) ((x) << 20) +#define GB(x) ((x) << 30) + +/** + * tioce_dma_d64 - create a DMA mapping using 64-bit direct mode + * @ct_addr: system coretalk address + * + * Map @ct_addr into 64-bit CE bus space. No device context is necessary + * and no CE mapping are consumed. + * + * Bits 53:0 come from the coretalk address. The remaining bits are set as + * follows: + * + * 63 - must be 1 to indicate d64 mode to CE hardware + * 62 - barrier bit ... controlled with tioce_dma_barrier() + * 61 - 0 since this is not an MSI transaction + * 60:54 - reserved, MBZ + */ +static uint64_t +tioce_dma_d64(unsigned long ct_addr) +{ + uint64_t bus_addr; + + bus_addr = ct_addr | (1UL << 63); + + return bus_addr; +} + +/** + * pcidev_to_tioce - return misc ce related pointers given a pci_dev + * @pci_dev: pci device context + * @base: ptr to store struct tioce_mmr * for the CE holding this device + * @kernel: ptr to store struct tioce_kernel * for the CE holding this device + * @port: ptr to store the CE port number that this device is on + * + * Return pointers to various CE-related structures for the CE upstream of + * @pci_dev. + */ +static inline void +pcidev_to_tioce(struct pci_dev *pdev, struct tioce **base, + struct tioce_kernel **kernel, int *port) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce_kernel *ce_kernel; + + pcidev_info = SN_PCIDEV_INFO(pdev); + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_kernel = (struct tioce_kernel *)ce_common->ce_kernel_private; + + if (base) + *base = (struct tioce *)ce_common->ce_pcibus.bs_base; + if (kernel) + *kernel = ce_kernel; + + /* + * we use port as a zero-based value internally, even though the + * documentation is 1-based. + */ + if (port) + *port = + (pdev->bus->number < ce_kernel->ce_port1_secondary) ? 0 : 1; +} + +/** + * tioce_alloc_map - Given a coretalk address, map it to pcie bus address + * space using one of the various ATE-based address modes. + * @ce_kern: tioce context + * @type: map mode to use + * @port: 0-based port that the requesting device is downstream of + * @ct_addr: the coretalk address to map + * @len: number of bytes to map + * + * Given the addressing type, set up various paramaters that define the + * ATE pool to use. Search for a contiguous block of entries to cover the + * length, and if enough resources exist, fill in the ATE's and construct a + * tioce_dmamap struct to track the mapping. + */ +static uint64_t +tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port, + uint64_t ct_addr, int len) +{ + int i; + int j; + int first; + int last; + int entries; + int nates; + int pagesize; + uint64_t *ate_shadow; + uint64_t *ate_reg; + uint64_t addr; + struct tioce *ce_mmr; + uint64_t bus_base; + struct tioce_dmamap *map; + + ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base; + + switch (type) { + case TIOCE_ATE_M32: + /* + * The first 64 entries of the ate3240 pool are dedicated to + * super-page (TIOCE_ATE_M40S) mode. + */ + first = 64; + entries = TIOCE_NUM_M3240_ATES - 64; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = ce_kern->ce_ate3240_pagesize; + bus_base = TIOCE_M32_MIN; + break; + case TIOCE_ATE_M40: + first = 0; + entries = TIOCE_NUM_M40_ATES; + ate_shadow = ce_kern->ce_ate40_shadow; + ate_reg = ce_mmr->ce_ure_ate40; + pagesize = MB(64); + bus_base = TIOCE_M40_MIN; + break; + case TIOCE_ATE_M40S: + /* + * ate3240 entries 0-31 are dedicated to port1 super-page + * mappings. ate3240 entries 32-63 are dedicated to port2. + */ + first = port * 32; + entries = 32; + ate_shadow = ce_kern->ce_ate3240_shadow; + ate_reg = ce_mmr->ce_ure_ate3240; + pagesize = GB(16); + bus_base = TIOCE_M40S_MIN; + break; + default: + return 0; + } + + nates = ATE_NPAGES(ct_addr, len, pagesize); + if (nates > entries) + return 0; + + last = first + entries - nates; + for (i = first; i <= last; i++) { + if (ATE_VALID(ate_shadow[i])) + continue; + + for (j = i; j < i + nates; j++) + if (ATE_VALID(ate_shadow[j])) + break; + + if (j >= i + nates) + break; + } + + if (i > last) + return 0; + + map = kcalloc(1, sizeof(struct tioce_dmamap), GFP_ATOMIC); + if (!map) + return 0; + + addr = ct_addr; + for (j = 0; j < nates; j++) { + uint64_t ate; + + ate = ATE_MAKE(addr, pagesize); + ate_shadow[i + j] = ate; + ate_reg[i + j] = ate; + addr += pagesize; + } + + map->refcnt = 1; + map->nbytes = nates * pagesize; + map->ct_start = ct_addr & ~ATE_PAGEMASK(pagesize); + map->pci_start = bus_base + (i * pagesize); + map->ate_hw = &ate_reg[i]; + map->ate_shadow = &ate_shadow[i]; + map->ate_count = nates; + + list_add(&map->ce_dmamap_list, &ce_kern->ce_dmamap_list); + + return (map->pci_start + (ct_addr - map->ct_start)); +} + +/** + * tioce_dma_d32 - create a DMA mapping using 32-bit direct mode + * @pdev: linux pci_dev representing the function + * @paddr: system physical address + * + * Map @paddr into 32-bit bus space of the CE associated with @pcidev_info. + */ +static uint64_t +tioce_dma_d32(struct pci_dev *pdev, uint64_t ct_addr) +{ + int dma_ok; + int port; + struct tioce *ce_mmr; + struct tioce_kernel *ce_kern; + uint64_t ct_upper; + uint64_t ct_lower; + dma_addr_t bus_addr; + + ct_upper = ct_addr & ~0x3fffffffUL; + ct_lower = ct_addr & 0x3fffffffUL; + + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + if (ce_kern->ce_port[port].dirmap_refcnt == 0) { + volatile uint64_t tmp; + + ce_kern->ce_port[port].dirmap_shadow = ct_upper; + ce_mmr->ce_ure_dir_map[port] = ct_upper; + tmp = ce_mmr->ce_ure_dir_map[port]; + dma_ok = 1; + } else + dma_ok = (ce_kern->ce_port[port].dirmap_shadow == ct_upper); + + if (dma_ok) { + ce_kern->ce_port[port].dirmap_refcnt++; + bus_addr = TIOCE_D32_MIN + ct_lower; + } else + bus_addr = 0; + + return bus_addr; +} + +/** + * tioce_dma_barrier - swizzle a TIOCE bus address to include or exclude + * the barrier bit. + * @bus_addr: bus address to swizzle + * + * Given a TIOCE bus address, set the appropriate bit to indicate barrier + * attributes. + */ +static uint64_t +tioce_dma_barrier(uint64_t bus_addr, int on) +{ + uint64_t barrier_bit; + + /* barrier not supported in M40/M40S mode */ + if (TIOCE_M40_ADDR(bus_addr) || TIOCE_M40S_ADDR(bus_addr)) + return bus_addr; + + if (TIOCE_D64_ADDR(bus_addr)) + barrier_bit = (1UL << 62); + else /* must be m32 or d32 */ + barrier_bit = (1UL << 30); + + return (on) ? (bus_addr | barrier_bit) : (bus_addr & ~barrier_bit); +} + +/** + * tioce_dma_unmap - release CE mapping resources + * @pdev: linux pci_dev representing the function + * @bus_addr: bus address returned by an earlier tioce_dma_map + * @dir: mapping direction (unused) + * + * Locate mapping resources associated with @bus_addr and release them. + * For mappings created using the direct modes there are no resources + * to release. + */ +void +tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir) +{ + int i; + int port; + struct tioce_kernel *ce_kern; + struct tioce *ce_mmr; + unsigned long flags; + + bus_addr = tioce_dma_barrier(bus_addr, 0); + pcidev_to_tioce(pdev, &ce_mmr, &ce_kern, &port); + + /* nothing to do for D64 */ + + if (TIOCE_D64_ADDR(bus_addr)) + return; + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + if (TIOCE_D32_ADDR(bus_addr)) { + if (--ce_kern->ce_port[port].dirmap_refcnt == 0) { + ce_kern->ce_port[port].dirmap_shadow = 0; + ce_mmr->ce_ure_dir_map[port] = 0; + } + } else { + struct tioce_dmamap *map; + + list_for_each_entry(map, &ce_kern->ce_dmamap_list, + ce_dmamap_list) { + uint64_t last; + + last = map->pci_start + map->nbytes - 1; + if (bus_addr >= map->pci_start && bus_addr <= last) + break; + } + + if (&map->ce_dmamap_list == &ce_kern->ce_dmamap_list) { + printk(KERN_WARNING + "%s: %s - no map found for bus_addr 0x%lx\n", + __FUNCTION__, pci_name(pdev), bus_addr); + } else if (--map->refcnt == 0) { + for (i = 0; i < map->ate_count; i++) { + map->ate_shadow[i] = 0; + map->ate_hw[i] = 0; + } + + list_del(&map->ce_dmamap_list); + kfree(map); + } + } + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); +} + +/** + * tioce_do_dma_map - map pages for PCI DMA + * @pdev: linux pci_dev representing the function + * @paddr: host physical address to map + * @byte_count: bytes to map + * + * This is the main wrapper for mapping host physical pages to CE PCI space. + * The mapping mode used is based on the device's dma_mask. + */ +static uint64_t +tioce_do_dma_map(struct pci_dev *pdev, uint64_t paddr, size_t byte_count, + int barrier) +{ + unsigned long flags; + uint64_t ct_addr; + uint64_t mapaddr = 0; + struct tioce_kernel *ce_kern; + struct tioce_dmamap *map; + int port; + uint64_t dma_mask; + + dma_mask = (barrier) ? pdev->dev.coherent_dma_mask : pdev->dma_mask; + + /* cards must be able to address at least 31 bits */ + if (dma_mask < 0x7fffffffUL) + return 0; + + ct_addr = PHYS_TO_TIODMA(paddr); + + /* + * If the device can generate 64 bit addresses, create a D64 map. + * Since this should never fail, bypass the rest of the checks. + */ + if (dma_mask == ~0UL) { + mapaddr = tioce_dma_d64(ct_addr); + goto dma_map_done; + } + + pcidev_to_tioce(pdev, NULL, &ce_kern, &port); + + spin_lock_irqsave(&ce_kern->ce_lock, flags); + + /* + * D64 didn't work ... See if we have an existing map that covers + * this address range. Must account for devices dma_mask here since + * an existing map might have been done in a mode using more pci + * address bits than this device can support. + */ + list_for_each_entry(map, &ce_kern->ce_dmamap_list, ce_dmamap_list) { + uint64_t last; + + last = map->ct_start + map->nbytes - 1; + if (ct_addr >= map->ct_start && + ct_addr + byte_count - 1 <= last && + map->pci_start <= dma_mask) { + map->refcnt++; + mapaddr = map->pci_start + (ct_addr - map->ct_start); + break; + } + } + + /* + * If we don't have a map yet, and the card can generate 40 + * bit addresses, try the M40/M40S modes. Note these modes do not + * support a barrier bit, so if we need a consistent map these + * won't work. + */ + if (!mapaddr && !barrier && dma_mask >= 0xffffffffffUL) { + /* + * We have two options for 40-bit mappings: 16GB "super" ATE's + * and 64MB "regular" ATE's. We'll try both if needed for a + * given mapping but which one we try first depends on the + * size. For requests >64MB, prefer to use a super page with + * regular as the fallback. Otherwise, try in the reverse order. + */ + + if (byte_count > MB(64)) { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count); + } else { + mapaddr = tioce_alloc_map(ce_kern, TIOCE_ATE_M40, -1, + ct_addr, byte_count); + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M40S, + port, ct_addr, byte_count); + } + } + + /* + * 32-bit direct is the next mode to try + */ + if (!mapaddr && dma_mask >= 0xffffffffUL) + mapaddr = tioce_dma_d32(pdev, ct_addr); + + /* + * Last resort, try 32-bit ATE-based map. + */ + if (!mapaddr) + mapaddr = + tioce_alloc_map(ce_kern, TIOCE_ATE_M32, -1, ct_addr, + byte_count); + + spin_unlock_irqrestore(&ce_kern->ce_lock, flags); + +dma_map_done: + if (mapaddr & barrier) + mapaddr = tioce_dma_barrier(mapaddr, 1); + + return mapaddr; +} + +/** + * tioce_dma - standard pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit clear + * in the address. + */ +static uint64_t +tioce_dma(struct pci_dev *pdev, uint64_t paddr, size_t byte_count) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 0); +} + +/** + * tioce_dma_consistent - consistent pci dma map interface + * @pdev: pci device requesting the map + * @paddr: system physical address to map into pci space + * @byte_count: # bytes to map + * + * Simply call tioce_do_dma_map() to create a map with the barrier bit set + * in the address. + */ static uint64_t +tioce_dma_consistent(struct pci_dev *pdev, uint64_t paddr, size_t byte_count) +{ + return tioce_do_dma_map(pdev, paddr, byte_count, 1); +} + +/** + * tioce_error_intr_handler - SGI TIO CE error interrupt handler + * @irq: unused + * @arg: pointer to tioce_common struct for the given CE + * @pt: unused + * + * Handle a CE error interrupt. Simply a wrapper around a SAL call which + * defers processing to the SGI prom. + */ static irqreturn_t +tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt) +{ + struct tioce_common *soft = arg; + struct ia64_sal_retval ret_stuff; + ret_stuff.status = 0; + ret_stuff.v0 = 0; + + SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_ERROR_INTERRUPT, + soft->ce_pcibus.bs_persist_segment, + soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0); + + return IRQ_HANDLED; +} + +/** + * tioce_kern_init - init kernel structures related to a given TIOCE + * @tioce_common: ptr to a cached tioce_common struct that originated in prom + */ static struct tioce_kernel * +tioce_kern_init(struct tioce_common *tioce_common) +{ + int i; + uint32_t tmp; + struct tioce *tioce_mmr; + struct tioce_kernel *tioce_kern; + + tioce_kern = kcalloc(1, sizeof(struct tioce_kernel), GFP_KERNEL); + if (!tioce_kern) { + return NULL; + } + + tioce_kern->ce_common = tioce_common; + spin_lock_init(&tioce_kern->ce_lock); + INIT_LIST_HEAD(&tioce_kern->ce_dmamap_list); + tioce_common->ce_kernel_private = (uint64_t) tioce_kern; + + /* + * Determine the secondary bus number of the port2 logical PPB. + * This is used to decide whether a given pci device resides on + * port1 or port2. Note: We don't have enough plumbing set up + * here to use pci_read_config_xxx() so use the raw_pci_ops vector. + */ + + raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment, + tioce_common->ce_pcibus.bs_persist_busnum, + PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp); + tioce_kern->ce_port1_secondary = (uint8_t) tmp; + + /* + * Set PMU pagesize to the largest size available, and zero out + * the ate's. + */ + + tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base; + tioce_mmr->ce_ure_page_map &= ~CE_URE_PAGESIZE_MASK; + tioce_mmr->ce_ure_page_map |= CE_URE_256K_PAGESIZE; + tioce_kern->ce_ate3240_pagesize = KB(256); + + for (i = 0; i < TIOCE_NUM_M40_ATES; i++) { + tioce_kern->ce_ate40_shadow[i] = 0; + tioce_mmr->ce_ure_ate40[i] = 0; + } + + for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) { + tioce_kern->ce_ate3240_shadow[i] = 0; + tioce_mmr->ce_ure_ate3240[i] = 0; + } + + return tioce_kern; +} + +/** + * tioce_force_interrupt - implement altix force_interrupt() backend for CE + * @sn_irq_info: sn asic irq that we need an interrupt generated for + * + * Given an sn_irq_info struct, set the proper bit in ce_adm_force_int to + * force a secondary interrupt to be generated. This is to work around an + * asic issue where there is a small window of opportunity for a legacy device + * interrupt to be lost. + */ +static void +tioce_force_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce *ce_mmr; + uint64_t force_int_val; + + if (!sn_irq_info->irq_bridge) + return; + + if (sn_irq_info->irq_bridge_type != PCIIO_ASIC_TYPE_TIOCE) + return; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + + /* + * irq_int_bit is originally set up by prom, and holds the interrupt + * bit shift (not mask) as defined by the bit definitions in the + * ce_adm_int mmr. These shifts are not the same for the + * ce_adm_force_int register, so do an explicit mapping here to make + * things clearer. + */ + + switch (sn_irq_info->irq_int_bit) { + case CE_ADM_INT_PCIE_PORT1_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT1_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT1_DEV_D_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_A_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_A_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_B_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_B_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_C_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_C_SHFT; + break; + case CE_ADM_INT_PCIE_PORT2_DEV_D_SHFT: + force_int_val = 1UL << CE_ADM_FORCE_INT_PCIE_PORT2_DEV_D_SHFT; + break; + default: + return; + } + ce_mmr->ce_adm_force_int = force_int_val; +} + +/** + * tioce_bus_fixup - perform final PCI fixup for a TIO CE bus + * @prom_bussoft: Common prom/kernel struct representing the bus + * + * Replicates the tioce_common pointed to by @prom_bussoft in kernel + * space. Allocates and initializes a kernel-only area for a given CE, + * and sets up an irq for handling CE error interrupts. + * + * On successful setup, returns the kernel version of tioce_common back to + * the caller. + */ +static void * +tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) +{ + struct tioce_common *tioce_common; + + /* + * Allocate kernel bus soft and copy from prom. + */ + + tioce_common = kcalloc(1, sizeof(struct tioce_common), GFP_KERNEL); + if (!tioce_common) + return NULL; + + memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common)); + tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET; + + if (tioce_kern_init(tioce_common) == NULL) { + kfree(tioce_common); + return NULL; + } + + if (request_irq(SGI_PCIASIC_ERROR, + tioce_error_intr_handler, + SA_SHIRQ, "TIOCE error", (void *)tioce_common)) + printk(KERN_WARNING + "%s: Unable to get irq %d. " + "Error interrupts won't be routed for " + "TIOCE bus %04x:%02x\n", + __FUNCTION__, SGI_PCIASIC_ERROR, + tioce_common->ce_pcibus.bs_persist_segment, + tioce_common->ce_pcibus.bs_persist_busnum); + + return tioce_common; +} + +static struct sn_pcibus_provider tioce_pci_interfaces = { + .dma_map = tioce_dma, + .dma_map_consistent = tioce_dma_consistent, + .dma_unmap = tioce_dma_unmap, + .bus_fixup = tioce_bus_fixup, + .force_interrupt = tioce_force_interrupt +}; + +/** + * tioce_init_provider - init SN PCI provider ops for TIO CE + */ +int +tioce_init_provider(void) +{ + sn_pci_provider[PCIIO_ASIC_TYPE_TIOCE] = &tioce_pci_interfaces; + return 0; +} -- cgit v1.2.3 From 0aa2c72e59cf1d09a0b321e4e6292af78a51b8b3 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:26:00 -0700 Subject: [IA64-SGI] - New SN hardware support - use_alias_space Use local SHUB alias space when referencing MMRs that are known to be node local. There is a slight performance benefit & code simplification. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/irq.c | 19 +++++++------------ arch/ia64/sn/pci/pcibr/pcibr_dma.c | 2 +- 2 files changed, 8 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index af061dba246f..607938c288bb 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2000-2005 Silicon Graphics, Inc. All Rights Reserved. */ #include @@ -76,16 +76,14 @@ static void sn_enable_irq(unsigned int irq) static void sn_ack_irq(unsigned int irq) { - uint64_t event_occurred, mask = 0; - int nasid; + u64 event_occurred, mask = 0; irq = irq & 0xff; - nasid = get_nasid(); event_occurred = - HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED)); + HUB_L((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED)); mask = event_occurred & SH_ALL_INT_MASK; - HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS), - mask); + HUB_S((u64*)LOCAL_MMR_ADDR(SH_EVENT_OCCURRED_ALIAS), + mask); __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs); move_irq(irq); @@ -93,15 +91,12 @@ static void sn_ack_irq(unsigned int irq) static void sn_end_irq(unsigned int irq) { - int nasid; int ivec; - uint64_t event_occurred; + u64 event_occurred; ivec = irq & 0xff; if (ivec == SGI_UART_VECTOR) { - nasid = get_nasid(); - event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR - (nasid, SH_EVENT_OCCURRED)); + event_occurred = HUB_L((u64*)LOCAL_MMR_ADDR (SH_EVENT_OCCURRED)); /* If the UART bit is set here, we may have received an * interrupt from the UART that the driver missed. To * make sure, we IPI ourselves to force us to look again. diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index ae455b6b1897..0f254255f6a1 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -297,7 +297,7 @@ void sn_dma_flush(uint64_t addr) * If CE ever needs the sn_dma_flush mechanism, we will have * to account for that here and in tioce_bus_fixup(). */ - uint32_t tio_id = REMOTE_HUB_L(nasid, TIO_NODE_ID); + uint32_t tio_id = HUB_L(TIO_IOSPACE_ADDR(nasid, TIO_NODE_ID)); uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id); /* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */ -- cgit v1.2.3 From 2fdbb590e4f9b346e5d06cf7f85dcb7a9f2e0a48 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:26:00 -0700 Subject: [IA64-SGI] - New SN hardware support - boot_init_shub2 Update the addresses of the pio_write_status_addr so that they are correct for newer processors. Shub2 did not number the threads in the order that I had expected. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 7c7fe441d623..ed77715393ef 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -532,8 +532,8 @@ void __init sn_cpu_init(void) */ { u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0}; - u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_1, - SH2_PIO_WRITE_STATUS_2, SH2_PIO_WRITE_STATUS_3}; + u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_2, + SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3}; u64 *pio; pio = is_shub1() ? pio1 : pio2; pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]); -- cgit v1.2.3 From 1007d02160974a46cc4fd3b1737c183c0471b88f Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:27:00 -0700 Subject: [IA64-SGI] - New SN hardware support - no_wars Disable some shub1-specific code when running on systems with shub2. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/timer_interrupt.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c index cde7375390b0..adf5db2e2afe 100644 --- a/arch/ia64/sn/kernel/sn2/timer_interrupt.c +++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c @@ -1,7 +1,7 @@ /* * * - * Copyright (c) 2003 Silicon Graphics, Inc. All Rights Reserved. + * Copyright (c) 2005 Silicon Graphics, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2 of the GNU General Public License @@ -50,14 +50,16 @@ void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT); } - if (enable_shub_wars_1_1()) { - /* Bugfix code for SHUB 1.1 */ - if (pda->pio_shub_war_cam_addr) - *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + if (is_shub1()) { + if (enable_shub_wars_1_1()) { + /* Bugfix code for SHUB 1.1 */ + if (pda->pio_shub_war_cam_addr) + *pda->pio_shub_war_cam_addr = 0x8000000000000010UL; + } + if (pda->sn_lb_int_war_ticks == 0) + sn_lb_int_war_check(); + pda->sn_lb_int_war_ticks++; + if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) + pda->sn_lb_int_war_ticks = 0; } - if (pda->sn_lb_int_war_ticks == 0) - sn_lb_int_war_check(); - pda->sn_lb_int_war_ticks++; - if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL) - pda->sn_lb_int_war_ticks = 0; } -- cgit v1.2.3 From 7e95b9d6e21eda23bac1ff024d465d2072c8996d Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:27:00 -0700 Subject: [IA64-SGI] - New SN hardware support - bte_fixes Change the BTE driver so that it works for both shub1 and shub2. Most of the changes are related to the number of cores that use the BTE engine, to the MMR addresses of various shub registers, and to using the correct processor or network physical address. Signed-off-by: Russ Anderson Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/bte.c | 82 +++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 647deae9bfcd..7adef84190db 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -29,16 +29,30 @@ /* two interfaces on two btes */ #define MAX_INTERFACES_TO_TRY 4 +#define MAX_NODES_TO_TRY 2 static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface) { nodepda_t *tmp_nodepda; + if (nasid_to_cnodeid(nasid) == -1) + return (struct bteinfo_s *)NULL;; + tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid)); return &tmp_nodepda->bte_if[interface]; } +static inline void bte_start_transfer(struct bteinfo_s *bte, u64 len, u64 mode) +{ + if (is_shub2()) { + BTE_CTRL_STORE(bte, (IBLS_BUSY | ((len) | (mode) << 24))); + } else { + BTE_LNSTAT_STORE(bte, len); + BTE_CTRL_STORE(bte, mode); + } +} + /************************************************************************ * Block Transfer Engine copy related functions. * @@ -67,13 +81,15 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) { u64 transfer_size; u64 transfer_stat; + u64 notif_phys_addr; struct bteinfo_s *bte; bte_result_t bte_status; unsigned long irq_flags; unsigned long itc_end = 0; - struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY]; - int bte_if_index; - int bte_pri, bte_sec; + int nasid_to_try[MAX_NODES_TO_TRY]; + int my_nasid = get_nasid(); + int bte_if_index, nasid_index; + int bte_first, btes_per_node = BTES_PER_NODE; BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n", src, dest, len, mode, notification)); @@ -86,36 +102,26 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK)); BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT))); - /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */ - if (cpuid_to_subnode(smp_processor_id()) == 0) { - bte_pri = 0; - bte_sec = 1; - } else { - bte_pri = 1; - bte_sec = 0; - } + /* + * Start with interface corresponding to cpu number + */ + bte_first = get_cpu() % btes_per_node; if (mode & BTE_USE_DEST) { /* try remote then local */ - btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri); - btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec); + nasid_to_try[0] = NASID_GET(dest); if (mode & BTE_USE_ANY) { - btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri); - btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec); + nasid_to_try[1] = my_nasid; } else { - btes_to_try[2] = NULL; - btes_to_try[3] = NULL; + nasid_to_try[1] = (int)NULL; } } else { /* try local then remote */ - btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri); - btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec); + nasid_to_try[0] = my_nasid; if (mode & BTE_USE_ANY) { - btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri); - btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec); + nasid_to_try[1] = NASID_GET(dest); } else { - btes_to_try[2] = NULL; - btes_to_try[3] = NULL; + nasid_to_try[1] = (int)NULL; } } @@ -123,11 +129,12 @@ retry_bteop: do { local_irq_save(irq_flags); - bte_if_index = 0; + bte_if_index = bte_first; + nasid_index = 0; /* Attempt to lock one of the BTE interfaces. */ - while (bte_if_index < MAX_INTERFACES_TO_TRY) { - bte = btes_to_try[bte_if_index++]; + while (nasid_index < MAX_NODES_TO_TRY) { + bte = bte_if_on_node(nasid_to_try[nasid_index],bte_if_index); if (bte == NULL) { continue; @@ -143,6 +150,15 @@ retry_bteop: break; } } + + bte_if_index = (bte_if_index + 1) % btes_per_node; /* Next interface */ + if (bte_if_index == bte_first) { + /* + * We've tried all interfaces on this node + */ + nasid_index++; + } + bte = NULL; } @@ -169,7 +185,13 @@ retry_bteop: /* Initialize the notification to a known value. */ *bte->most_rcnt_na = BTE_WORD_BUSY; + notif_phys_addr = TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)); + if (is_shub2()) { + src = SH2_TIO_PHYS_TO_DMA(src); + dest = SH2_TIO_PHYS_TO_DMA(dest); + notif_phys_addr = SH2_TIO_PHYS_TO_DMA(notif_phys_addr); + } /* Set the source and destination registers */ BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src)))); BTE_SRC_STORE(bte, TO_PHYS(src)); @@ -177,14 +199,12 @@ retry_bteop: BTE_DEST_STORE(bte, TO_PHYS(dest)); /* Set the notification register */ - BTE_PRINTKV(("IBNA = 0x%lx)\n", - TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)))); - BTE_NOTIF_STORE(bte, - TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))); + BTE_PRINTKV(("IBNA = 0x%lx)\n", notif_phys_addr)); + BTE_NOTIF_STORE(bte, notif_phys_addr); /* Initiate the transfer */ BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode))); - BTE_START_TRANSFER(bte, transfer_size, BTE_VALID_MODE(mode)); + bte_start_transfer(bte, transfer_size, BTE_VALID_MODE(mode)); itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec); -- cgit v1.2.3 From 68b9753f47953930cb94de0223c163f289399091 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:28:00 -0700 Subject: [IA64-SGI] - New SN hardware support - cpu_relax Add a few missing calls to "hint @pause". Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/bte.c | 1 + arch/ia64/sn/kernel/huberror.c | 2 +- arch/ia64/sn/pci/pcibr/pcibr_dma.c | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index 7adef84190db..b75814efadba 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -215,6 +215,7 @@ retry_bteop: } while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) { + cpu_relax(); if (ia64_get_itc() > itc_end) { BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n", NASID_GET(bte->bte_base_addr), bte->bte_num, diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c index 5c39b43ba3c0..5c5eb01c50f0 100644 --- a/arch/ia64/sn/kernel/huberror.c +++ b/arch/ia64/sn/kernel/huberror.c @@ -76,7 +76,7 @@ void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum) */ REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum)); while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND) - udelay(1); + cpu_relax(); } diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c index 0f254255f6a1..34093476e965 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_dma.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c @@ -317,7 +317,8 @@ void sn_dma_flush(uint64_t addr) *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1; /* wait for the interrupt to come back. */ - while (*(p->sfdl_flush_addr) != 0x10f) ; + while (*(p->sfdl_flush_addr) != 0x10f) + cpu_relax(); /* okay, everything is synched up. */ spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags); -- cgit v1.2.3 From 470ceb05d9a2b4d61c19fac615a79e56e8401565 Mon Sep 17 00:00:00 2001 From: Jack Steiner Date: Thu, 11 Aug 2005 10:28:00 -0700 Subject: [IA64-SGI] - New SN hardware support - ptc_fixes Shub2 provides a much improved mechanism for issuing internode TLB purges. Add code to support the newer mechanism. There is also some debug code (disabled) that is useful for testing. Collect statistics on the number, type & duration of TLB purges. This data will be useful for making future improvements in the algorithms. Signed-off-by: Jack Steiner Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 1 + arch/ia64/sn/kernel/sn2/ptc_deadlock.S | 13 +- arch/ia64/sn/kernel/sn2/sn2_smp.c | 256 ++++++++++++++++++++++++++++++--- 3 files changed, 243 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index ed77715393ef..6648deb778a6 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -403,6 +403,7 @@ static void __init sn_init_pdas(char **cmdline_p) memset(nodepdaindr[cnode], 0, sizeof(nodepda_t)); memset(nodepdaindr[cnode]->phys_cpuid, -1, sizeof(nodepdaindr[cnode]->phys_cpuid)); + spin_lock_init(&nodepdaindr[cnode]->ptc_lock); } /* diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S index 96cb71d15682..3fa95065a446 100644 --- a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S +++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -11,7 +11,7 @@ #define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT #define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK -#define ALIAS_OFFSET (SH1_PIO_WRITE_STATUS_0_ALIAS-SH1_PIO_WRITE_STATUS_0) +#define ALIAS_OFFSET 8 .global sn2_ptc_deadlock_recovery_core @@ -36,13 +36,15 @@ sn2_ptc_deadlock_recovery_core: extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address dep piowcphy=-1,piowcphy,63,1 movl mask=WRITECOUNTMASK + mov r8=r0 1: add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register - mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR - st8.rel [scr2]=scr1;; + ;; + ld8.acq scr1=[scr2];; 5: ld8.acq scr1=[piowc];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b @@ -57,6 +59,7 @@ sn2_ptc_deadlock_recovery_core: st8.rel [ptc0]=data0 // Write PTC0 & wait for completion. 5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b;; @@ -67,6 +70,7 @@ sn2_ptc_deadlock_recovery_core: (p7) st8.rel [ptc1]=data1;; // Now write PTC1. 5: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete. + hint @pause and scr2=scr1,mask;; // mask of writecount bits cmp.ne p6,p0=zeroval,scr2 (p6) br.cond.sptk 5b @@ -77,6 +81,7 @@ sn2_ptc_deadlock_recovery_core: srlz.i;; ////////////// END PHYSICAL MODE //////////////////// +(p8) add r8=1,r8 (p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred. br.ret.sptk rp diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index 7af05a7ac743..0a4ee50c302f 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -5,7 +5,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include @@ -39,12 +41,120 @@ #include #include -void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0, - volatile unsigned long *, unsigned long data1); +DEFINE_PER_CPU(struct ptc_stats, ptcstats); +DECLARE_PER_CPU(struct ptc_stats, ptcstats); static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock); -static unsigned long sn2_ptc_deadlock_count; +void sn2_ptc_deadlock_recovery(short *, short, int, volatile unsigned long *, unsigned long data0, + volatile unsigned long *, unsigned long data1); + +#ifdef DEBUG_PTC +/* + * ptctest: + * + * xyz - 3 digit hex number: + * x - Force PTC purges to use shub: + * 0 - no force + * 1 - force + * y - interupt enable + * 0 - disable interrupts + * 1 - leave interuupts enabled + * z - type of lock: + * 0 - global lock + * 1 - node local lock + * 2 - no lock + * + * Note: on shub1, only ptctest == 0 is supported. Don't try other values! + */ + +static unsigned int sn2_ptctest = 0; + +static int __init ptc_test(char *str) +{ + get_option(&str, &sn2_ptctest); + return 1; +} +__setup("ptctest=", ptc_test); + +static inline int ptc_lock(unsigned long *flagp) +{ + unsigned long opt = sn2_ptctest & 255; + + switch (opt) { + case 0x00: + spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); + break; + case 0x01: + spin_lock_irqsave(&sn_nodepda->ptc_lock, *flagp); + break; + case 0x02: + local_irq_save(*flagp); + break; + case 0x10: + spin_lock(&sn2_global_ptc_lock); + break; + case 0x11: + spin_lock(&sn_nodepda->ptc_lock); + break; + case 0x12: + break; + default: + BUG(); + } + return opt; +} + +static inline void ptc_unlock(unsigned long flags, int opt) +{ + switch (opt) { + case 0x00: + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + break; + case 0x01: + spin_unlock_irqrestore(&sn_nodepda->ptc_lock, flags); + break; + case 0x02: + local_irq_restore(flags); + break; + case 0x10: + spin_unlock(&sn2_global_ptc_lock); + break; + case 0x11: + spin_unlock(&sn_nodepda->ptc_lock); + break; + case 0x12: + break; + default: + BUG(); + } +} +#else + +#define sn2_ptctest 0 + +static inline int ptc_lock(unsigned long *flagp) +{ + spin_lock_irqsave(&sn2_global_ptc_lock, *flagp); + return 0; +} + +static inline void ptc_unlock(unsigned long flags, int opt) +{ + spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); +} +#endif + +struct ptc_stats { + unsigned long ptc_l; + unsigned long change_rid; + unsigned long shub_ptc_flushes; + unsigned long nodes_flushed; + unsigned long deadlocks; + unsigned long lock_itc_clocks; + unsigned long shub_itc_clocks; + unsigned long shub_itc_clocks_max; +}; static inline unsigned long wait_piowc(void) { @@ -89,9 +199,9 @@ void sn2_global_tlb_purge(unsigned long start, unsigned long end, unsigned long nbits) { - int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; + int i, opt, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0; volatile unsigned long *ptc0, *ptc1; - unsigned long flags = 0, data0 = 0, data1 = 0; + unsigned long itc, itc2, flags, data0 = 0, data1 = 0; struct mm_struct *mm = current->active_mm; short nasids[MAX_NUMNODES], nix; nodemask_t nodes_flushed; @@ -114,16 +224,19 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, start += (1UL << nbits); } while (start < end); ia64_srlz_i(); + __get_cpu_var(ptcstats).ptc_l++; preempt_enable(); return; } if (atomic_read(&mm->mm_users) == 1) { flush_tlb_mm(mm); + __get_cpu_var(ptcstats).change_rid++; preempt_enable(); return; } + itc = ia64_get_itc(); nix = 0; for_each_node_mask(cnode, nodes_flushed) nasids[nix++] = cnodeid_to_nasid(cnode); @@ -148,7 +261,12 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, mynasid = get_nasid(); - spin_lock_irqsave(&sn2_global_ptc_lock, flags); + itc = ia64_get_itc(); + opt = ptc_lock(&flags); + itc2 = ia64_get_itc(); + __get_cpu_var(ptcstats).lock_itc_clocks += itc2 - itc; + __get_cpu_var(ptcstats).shub_ptc_flushes++; + __get_cpu_var(ptcstats).nodes_flushed += nix; do { if (shub1) @@ -157,7 +275,7 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK); for (i = 0; i < nix; i++) { nasid = nasids[i]; - if (unlikely(nasid == mynasid)) { + if ((!(sn2_ptctest & 3)) && unlikely(nasid == mynasid)) { ia64_ptcga(start, nbits << 2); ia64_srlz_i(); } else { @@ -169,18 +287,22 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, flushed = 1; } } - if (flushed && (wait_piowc() & - SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) { - sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1); + (SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK))) { + sn2_ptc_deadlock_recovery(nasids, nix, mynasid, ptc0, data0, ptc1, data1); } start += (1UL << nbits); } while (start < end); - spin_unlock_irqrestore(&sn2_global_ptc_lock, flags); + itc2 = ia64_get_itc() - itc2; + __get_cpu_var(ptcstats).shub_itc_clocks += itc2; + if (itc2 > __get_cpu_var(ptcstats).shub_itc_clocks_max) + __get_cpu_var(ptcstats).shub_itc_clocks_max = itc2; + + ptc_unlock(flags, opt); preempt_enable(); } @@ -192,31 +314,29 @@ sn2_global_tlb_purge(unsigned long start, unsigned long end, * TLB flush transaction. The recovery sequence is somewhat tricky & is * coded in assembly language. */ -void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0, +void sn2_ptc_deadlock_recovery(short *nasids, short nix, int mynasid, volatile unsigned long *ptc0, unsigned long data0, volatile unsigned long *ptc1, unsigned long data1) { extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long); - int cnode, mycnode, nasid; - volatile unsigned long *piows; - volatile unsigned long zeroval; + short nasid, i; + unsigned long *piows, zeroval; - sn2_ptc_deadlock_count++; + __get_cpu_var(ptcstats).deadlocks++; - piows = pda->pio_write_status_addr; + piows = (unsigned long *) pda->pio_write_status_addr; zeroval = pda->pio_write_status_val; - mycnode = numa_node_id(); - - for_each_online_node(cnode) { - if (is_headless_node(cnode) || cnode == mycnode) + for (i=0; i < nix; i++) { + nasid = nasids[i]; + if (!(sn2_ptctest & 3) && nasid == mynasid) continue; - nasid = cnodeid_to_nasid(cnode); ptc0 = CHANGE_NASID(nasid, ptc0); if (ptc1) ptc1 = CHANGE_NASID(nasid, ptc1); sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval); } + } /** @@ -293,3 +413,93 @@ void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect) sn_send_IPI_phys(nasid, physid, vector, delivery_mode); } + +#ifdef CONFIG_PROC_FS + +#define PTC_BASENAME "sgi_sn/ptc_statistics" + +static void *sn2_ptc_seq_start(struct seq_file *file, loff_t * offset) +{ + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void *sn2_ptc_seq_next(struct seq_file *file, void *data, loff_t * offset) +{ + (*offset)++; + if (*offset < NR_CPUS) + return offset; + return NULL; +} + +static void sn2_ptc_seq_stop(struct seq_file *file, void *data) +{ +} + +static int sn2_ptc_seq_show(struct seq_file *file, void *data) +{ + struct ptc_stats *stat; + int cpu; + + cpu = *(loff_t *) data; + + if (!cpu) { + seq_printf(file, "# ptc_l change_rid shub_ptc_flushes shub_nodes_flushed deadlocks lock_nsec shub_nsec shub_nsec_max\n"); + seq_printf(file, "# ptctest %d\n", sn2_ptctest); + } + + if (cpu < NR_CPUS && cpu_online(cpu)) { + stat = &per_cpu(ptcstats, cpu); + seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld\n", cpu, stat->ptc_l, + stat->change_rid, stat->shub_ptc_flushes, stat->nodes_flushed, + stat->deadlocks, + 1000 * stat->lock_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks / per_cpu(cpu_info, cpu).cyc_per_usec, + 1000 * stat->shub_itc_clocks_max / per_cpu(cpu_info, cpu).cyc_per_usec); + } + + return 0; +} + +static struct seq_operations sn2_ptc_seq_ops = { + .start = sn2_ptc_seq_start, + .next = sn2_ptc_seq_next, + .stop = sn2_ptc_seq_stop, + .show = sn2_ptc_seq_show +}; + +int sn2_ptc_proc_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &sn2_ptc_seq_ops); +} + +static struct file_operations proc_sn2_ptc_operations = { + .open = sn2_ptc_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static struct proc_dir_entry *proc_sn2_ptc; + +static int __init sn2_ptc_init(void) +{ + if (!(proc_sn2_ptc = create_proc_entry(PTC_BASENAME, 0444, NULL))) { + printk(KERN_ERR "unable to create %s proc entry", PTC_BASENAME); + return -EINVAL; + } + proc_sn2_ptc->proc_fops = &proc_sn2_ptc_operations; + spin_lock_init(&sn2_global_ptc_lock); + return 0; +} + +static void __exit sn2_ptc_exit(void) +{ + remove_proc_entry(PTC_BASENAME, NULL); +} + +module_init(sn2_ptc_init); +module_exit(sn2_ptc_exit); +#endif /* CONFIG_PROC_FS */ + -- cgit v1.2.3 From ecc3c30ae39c4d3cbf249a1ebd599465e0e25708 Mon Sep 17 00:00:00 2001 From: Mark Goodwin Date: Tue, 16 Aug 2005 00:50:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - export_pci_topology Bugfix to export PCI topology information in /proc/sgi_sn/sn_topology. Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 50 ++++++++++++------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 833e700fdac9..58e48fdf5b47 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -174,29 +174,22 @@ static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj, return slabname; } -static void print_pci_topology(struct seq_file *s, - struct sn_hwperf_object_info *obj, int *ordinal, - u64 rack, u64 bay, u64 slot, u64 slab) +static void print_pci_topology(struct seq_file *s) { - char *p1; - char *p2; - char *pg; - - if (!(pg = (char *)get_zeroed_page(GFP_KERNEL))) - return; /* ignore */ - if (ia64_sn_ioif_get_pci_topology(rack, bay, slot, slab, - __pa(pg), PAGE_SIZE) == SN_HWPERF_OP_OK) { - for (p1=pg; *p1 && p1 < pg + PAGE_SIZE;) { - if (!(p2 = strchr(p1, '\n'))) - break; - *p2 = '\0'; - seq_printf(s, "pcibus %d %s-%s\n", - *ordinal, obj->location, p1); - (*ordinal)++; - p1 = p2 + 1; - } + char *p; + size_t sz; + int e; + + for (sz = PAGE_SIZE; sz < 16 * PAGE_SIZE; sz += PAGE_SIZE) { + if (!(p = (char *)kmalloc(sz, GFP_KERNEL))) + break; + e = ia64_sn_ioif_get_pci_topology(__pa(p), sz); + if (e == SALRET_OK) + seq_puts(s, p); + kfree(p); + if (e == SALRET_OK || e == SALRET_NOT_IMPLEMENTED) + break; } - free_page((unsigned long)pg); } static int sn_topology_show(struct seq_file *s, void *d) @@ -215,7 +208,6 @@ static int sn_topology_show(struct seq_file *s, void *d) struct sn_hwperf_object_info *p; struct sn_hwperf_object_info *obj = d; /* this object */ struct sn_hwperf_object_info *objs = s->private; /* all objects */ - int rack, bay, slot, slab; u8 shubtype; u8 system_size; u8 sharing_size; @@ -225,7 +217,6 @@ static int sn_topology_show(struct seq_file *s, void *d) u8 region_size; u16 nasid_mask; int nasid_msb; - int pci_bus_ordinal = 0; if (obj == objs) { seq_printf(s, "# sn_topology version 2\n"); @@ -253,6 +244,8 @@ static int sn_topology_show(struct seq_file *s, void *d) shubtype ? "shub2" : "shub1", (u64)nasid_mask << nasid_shift, nasid_msb, nasid_shift, system_size, sharing_size, coher, region_size); + + print_pci_topology(s); } if (SN_HWPERF_FOREIGN(obj)) { @@ -300,17 +293,6 @@ static int sn_topology_show(struct seq_file *s, void *d) seq_putc(s, '\n'); } } - - /* - * PCI busses attached to this node, if any - */ - if (sn_hwperf_location_to_bpos(obj->location, - &rack, &bay, &slot, &slab)) { - /* export pci bus info */ - print_pci_topology(s, obj, &pci_bus_ordinal, - rack, bay, slot, slab); - - } } if (obj->ports) { -- cgit v1.2.3 From 60a3ba0bb45995ecf9cfe208527d7cfd6128d053 Mon Sep 17 00:00:00 2001 From: Mark Goodwin Date: Wed, 17 Aug 2005 15:17:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - Add a new exported function for determining the nearest node with CPUs for I/O nodes and fix a bug where the hwperf dynamic misc device was being registered before misc_init(). Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/sn2/sn_hwperf.c | 266 +++++++++++++++++++++++++++++++++--- 1 file changed, 246 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c index 58e48fdf5b47..0261452d08df 100644 --- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c +++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c @@ -59,7 +59,7 @@ static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret) struct sn_hwperf_object_info *objbuf = NULL; if ((e = sn_hwperf_init()) < 0) { - printk("sn_hwperf_init failed: err %d\n", e); + printk(KERN_ERR "sn_hwperf_init failed: err %d\n", e); goto out; } @@ -111,7 +111,7 @@ static int sn_hwperf_geoid_to_cnode(char *location) if (sn_hwperf_location_to_bpos(location, &rack, &bay, &slot, &slab)) return -1; - for (cnode = 0; cnode < numionodes; cnode++) { + for_each_node(cnode) { geoid = cnodeid_get_geoid(cnode); module_id = geo_module(geoid); this_rack = MODULE_GET_RACK(module_id); @@ -124,11 +124,13 @@ static int sn_hwperf_geoid_to_cnode(char *location) } } - return cnode < numionodes ? cnode : -1; + return node_possible(cnode) ? cnode : -1; } static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj) { + if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) + BUG(); if (!obj->sn_hwp_this_part) return -1; return sn_hwperf_geoid_to_cnode(obj->location); @@ -192,6 +194,181 @@ static void print_pci_topology(struct seq_file *s) } } +static inline int sn_hwperf_has_cpus(cnodeid_t node) +{ + return node_online(node) && nr_cpus_node(node); +} + +static inline int sn_hwperf_has_mem(cnodeid_t node) +{ + return node_online(node) && NODE_DATA(node)->node_present_pages; +} + +static struct sn_hwperf_object_info * +sn_hwperf_findobj_id(struct sn_hwperf_object_info *objbuf, + int nobj, int id) +{ + int i; + struct sn_hwperf_object_info *p = objbuf; + + for (i=0; i < nobj; i++, p++) { + if (p->id == id) + return p; + } + + return NULL; + +} + +static int sn_hwperf_get_nearest_node_objdata(struct sn_hwperf_object_info *objbuf, + int nobj, cnodeid_t node, cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + struct sn_hwperf_object_info *nodeobj = NULL; + struct sn_hwperf_object_info *op; + struct sn_hwperf_object_info *dest; + struct sn_hwperf_object_info *router; + struct sn_hwperf_port_info ptdata[16]; + int sz, i, j; + cnodeid_t c; + int found_mem = 0; + int found_cpu = 0; + + if (!node_possible(node)) + return -EINVAL; + + if (sn_hwperf_has_cpus(node)) { + if (near_cpu_node) + *near_cpu_node = node; + found_cpu++; + } + + if (sn_hwperf_has_mem(node)) { + if (near_mem_node) + *near_mem_node = node; + found_mem++; + } + + if (found_cpu && found_mem) + return 0; /* trivially successful */ + + /* find the argument node object */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (!SN_HWPERF_IS_NODE(op) && !SN_HWPERF_IS_IONODE(op)) + continue; + if (node == sn_hwperf_obj_to_cnode(op)) { + nodeobj = op; + break; + } + } + if (!nodeobj) { + e = -ENOENT; + goto err; + } + + /* get it's interconnect topology */ + sz = op->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, nodeobj->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + + /* find nearest node with cpus and nearest memory */ + for (router=NULL, j=0; j < op->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, ptdata[j].conn_id); + if (!dest || SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (SN_HWPERF_IS_ROUTER(dest)) + router = dest; + } + + if (router && (!found_cpu || !found_mem)) { + /* search for a node connected to the same router */ + sz = router->ports * sizeof(struct sn_hwperf_port_info); + if (sz > sizeof(ptdata)) + BUG(); + e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, + SN_HWPERF_ENUM_PORTS, router->id, sz, + (u64)&ptdata, 0, 0, NULL); + if (e != SN_HWPERF_OP_OK) { + e = -EINVAL; + goto err; + } + for (j=0; j < router->ports; j++) { + dest = sn_hwperf_findobj_id(objbuf, nobj, + ptdata[j].conn_id); + if (!dest || dest->id == node || + SN_HWPERF_FOREIGN(dest) || + !SN_HWPERF_IS_NODE(dest) || + SN_HWPERF_IS_IONODE(dest)) { + continue; + } + c = sn_hwperf_obj_to_cnode(dest); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) { + /* resort to _any_ node with CPUs and memory */ + for (i=0, op=objbuf; i < nobj; i++, op++) { + if (SN_HWPERF_FOREIGN(op) || + SN_HWPERF_IS_IONODE(op) || + !SN_HWPERF_IS_NODE(op)) { + continue; + } + c = sn_hwperf_obj_to_cnode(op); + if (!found_cpu && sn_hwperf_has_cpus(c)) { + if (near_cpu_node) + *near_cpu_node = c; + found_cpu++; + } + if (!found_mem && sn_hwperf_has_mem(c)) { + if (near_mem_node) + *near_mem_node = c; + found_mem++; + } + if (found_cpu && found_mem) + break; + } + } + + if (!found_cpu || !found_mem) + e = -ENODATA; + +err: + return e; +} + + static int sn_topology_show(struct seq_file *s, void *d) { int sz; @@ -265,11 +442,24 @@ static int sn_topology_show(struct seq_file *s, void *d) if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj)) seq_putc(s, '\n'); else { + cnodeid_t near_mem = -1; + cnodeid_t near_cpu = -1; + seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal)); - for (i=0; i < numionodes; i++) { - seq_printf(s, i ? ":%d" : ", dist %d", - node_distance(ordinal, i)); + + if (sn_hwperf_get_nearest_node_objdata(objs, sn_hwperf_obj_cnt, + ordinal, &near_mem, &near_cpu) == 0) { + seq_printf(s, ", near_mem_nodeid %d, near_cpu_nodeid %d", + near_mem, near_cpu); + } + + if (!SN_HWPERF_IS_IONODE(obj)) { + for_each_online_node(i) { + seq_printf(s, i ? ":%d" : ", dist %d", + node_distance(ordinal, i)); + } } + seq_putc(s, '\n'); /* @@ -554,6 +744,8 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) { memset(p, 0, a.sz); for (i = 0; i < nobj; i++) { + if (!SN_HWPERF_IS_NODE(objs + i)) + continue; node = sn_hwperf_obj_to_cnode(objs + i); for_each_online_cpu(j) { if (node != cpu_to_node(j)) @@ -580,7 +772,7 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) case SN_HWPERF_GET_NODE_NASID: if (a.sz != sizeof(u64) || - (node = a.arg) < 0 || node >= numionodes) { + (node = a.arg) < 0 || !node_possible(node)) { r = -EINVAL; goto error; } @@ -609,6 +801,14 @@ sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg) vfree(objs); goto error; } + + if (!SN_HWPERF_IS_NODE(objs + i) && + !SN_HWPERF_IS_IONODE(objs + i)) { + r = -ENOENT; + vfree(objs); + goto error; + } + *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i); vfree(objs); } @@ -674,6 +874,7 @@ static int sn_hwperf_init(void) /* single threaded, once-only initialization */ down(&sn_hwperf_init_mutex); + if (sn_hwperf_salheap) { up(&sn_hwperf_init_mutex); return e; @@ -724,19 +925,6 @@ out: sn_hwperf_salheap = NULL; sn_hwperf_obj_cnt = 0; } - - if (!e) { - /* - * Register a dynamic misc device for ioctl. Platforms - * supporting hotplug will create /dev/sn_hwperf, else - * user can to look up the minor number in /proc/misc. - */ - if ((e = misc_register(&sn_hwperf_dev)) != 0) { - printk(KERN_ERR "sn_hwperf_init: misc register " - "for \"sn_hwperf\" failed, err %d\n", e); - } - } - up(&sn_hwperf_init_mutex); return e; } @@ -764,3 +952,41 @@ int sn_topology_release(struct inode *inode, struct file *file) vfree(seq->private); return seq_release(inode, file); } + +int sn_hwperf_get_nearest_node(cnodeid_t node, + cnodeid_t *near_mem_node, cnodeid_t *near_cpu_node) +{ + int e; + int nobj; + struct sn_hwperf_object_info *objbuf; + + if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) { + e = sn_hwperf_get_nearest_node_objdata(objbuf, nobj, + node, near_mem_node, near_cpu_node); + vfree(objbuf); + } + + return e; +} + +static int __devinit sn_hwperf_misc_register_init(void) +{ + int e; + + sn_hwperf_init(); + + /* + * Register a dynamic misc device for hwperf ioctls. Platforms + * supporting hotplug will create /dev/sn_hwperf, else user + * can to look up the minor number in /proc/misc. + */ + if ((e = misc_register(&sn_hwperf_dev)) != 0) { + printk(KERN_ERR "sn_hwperf_misc_register_init: failed to " + "register misc device for \"%s\"\n", sn_hwperf_dev.name); + } + + return e; +} + +device_initcall(sn_hwperf_misc_register_init); /* after misc_init() */ +EXPORT_SYMBOL(sn_hwperf_get_nearest_node); -- cgit v1.2.3 From 5390970d1c11b6d5d6a8253a718ed100e2178e14 Mon Sep 17 00:00:00 2001 From: Mark Goodwin Date: Tue, 16 Aug 2005 00:51:00 -0700 Subject: [IA64] - SGI SN hwperf enhancements - Update the SN pci device info to use the nearest node function to allocate driver memory on the nearest node (rather than defaulting to node 0). Signed-off-by: Mark Goodwin Signed-off-by: Tony Luck --- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index ff9c7de925d1..5862a709adf5 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "xtalk/xwidgetdev.h" #include "xtalk/hubdev.h" @@ -88,6 +89,7 @@ void * pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller) { int nasid, cnode, j; + cnodeid_t near_cnode; struct hubdev_info *hubdev_info; struct pcibus_info *soft; struct sn_flush_device_list *sn_flush_device_list; @@ -161,12 +163,18 @@ pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont memset(soft->pbi_int_ate_resource.ate, 0, (soft->pbi_int_ate_size * sizeof(uint64_t))); - if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) - /* - * TIO PCI Bridge with no closest node information. - * FIXME: Find another way to determine the closest node - */ - controller->node = -1; + if (prom_bussoft->bs_asic_type == PCIIO_ASIC_TYPE_TIOCP) { + /* TIO PCI Bridge: find nearest node with CPUs */ + int e = sn_hwperf_get_nearest_node(cnode, NULL, &near_cnode); + + if (e < 0) { + near_cnode = (cnodeid_t)-1; /* use any node */ + printk(KERN_WARNING "pcibr_bus_fixup: failed to find " + "near node with CPUs to TIO node %d, err=%d\n", + cnode, e); + } + controller->node = near_cnode; + } else controller->node = cnode; return soft; -- cgit v1.2.3 From 5b9021bc5800796e23e4994f8cf2dc61536be0a7 Mon Sep 17 00:00:00 2001 From: Russ Anderson <(rja@sgi.com)> Date: Wed, 17 Aug 2005 10:00:00 -0700 Subject: [IA64] SGI SN remove redundant partition SAL call Clean up of SGI SN partitioning related code. The SN_SAL_GET_SN_INFO SAL call returns the partition ID, making the SN_SAL_SYSCTL_PARTITION_GET SAL call redundant. Remove sn_partid and use sn_partition_id. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/setup.c | 2 -- arch/ia64/sn/kernel/sn2/sn_proc_fs.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c index 6648deb778a6..a594aca959e6 100644 --- a/arch/ia64/sn/kernel/setup.c +++ b/arch/ia64/sn/kernel/setup.c @@ -80,8 +80,6 @@ EXPORT_PER_CPU_SYMBOL(__sn_cnodeid_to_nasid); DEFINE_PER_CPU(struct nodepda_s *, __sn_nodepda); EXPORT_PER_CPU_SYMBOL(__sn_nodepda); -partid_t sn_partid = -1; -EXPORT_SYMBOL(sn_partid); char sn_system_serial_number_string[128]; EXPORT_SYMBOL(sn_system_serial_number_string); u64 sn_partition_serial_number; diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index 6a80fca807b9..51bf82720d99 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -3,7 +3,7 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. * - * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved. + * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved. */ #include #include @@ -15,7 +15,7 @@ static int partition_id_show(struct seq_file *s, void *p) { - seq_printf(s, "%d\n", sn_local_partid()); + seq_printf(s, "%d\n", sn_partition_id); return 0; } -- cgit v1.2.3 From 8409668b561fbe464f7a392e8dc77eca225d27ac Mon Sep 17 00:00:00 2001 From: Mark Maule Date: Thu, 25 Aug 2005 11:45:00 -0700 Subject: [IA64] altix: Abstract irq_affinity at the sn pci provider Altix patch to abstract irq_affinity down to the pci provider level since different SGI hardware implements this in different ways. Signed-off-by: Mark Maule Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/irq.c | 6 +++-- arch/ia64/sn/pci/pcibr/pcibr_provider.c | 5 +++-- arch/ia64/sn/pci/tioca_provider.c | 3 ++- arch/ia64/sn/pci/tioce_provider.c | 40 ++++++++++++++++++++++++++++++++- 4 files changed, 48 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c index 607938c288bb..9fc74631ba8a 100644 --- a/arch/ia64/sn/kernel/irq.c +++ b/arch/ia64/sn/kernel/irq.c @@ -127,6 +127,7 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) int local_widget, status; nasid_t local_nasid; struct sn_irq_info *new_irq_info; + struct sn_pcibus_provider *pci_provider; new_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC); if (new_irq_info == NULL) @@ -166,8 +167,9 @@ static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask) new_irq_info->irq_cpuid = cpuid; register_intr_pda(new_irq_info); - if (IS_PCI_BRIDGE_ASIC(new_irq_info->irq_bridge_type)) - pcibr_change_devices_irq(new_irq_info); + pci_provider = sn_pci_provider[new_irq_info->irq_bridge_type]; + if (pci_provider && pci_provider->target_interrupt) + (pci_provider->target_interrupt)(new_irq_info); spin_lock(&sn_irq_info_lock); list_replace_rcu(&sn_irq_info->list, &new_irq_info->list); diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c index 5862a709adf5..7b03b8084ffc 100644 --- a/arch/ia64/sn/pci/pcibr/pcibr_provider.c +++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c @@ -198,7 +198,7 @@ void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info) } } -void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info) +void pcibr_target_interrupt(struct sn_irq_info *sn_irq_info) { struct pcidev_info *pcidev_info; struct pcibus_info *pcibus_info; @@ -233,7 +233,8 @@ struct sn_pcibus_provider pcibr_provider = { .dma_map_consistent = pcibr_dma_map_consistent, .dma_unmap = pcibr_dma_unmap, .bus_fixup = pcibr_bus_fixup, - .force_interrupt = pcibr_force_interrupt + .force_interrupt = pcibr_force_interrupt, + .target_interrupt = pcibr_target_interrupt }; int diff --git a/arch/ia64/sn/pci/tioca_provider.c b/arch/ia64/sn/pci/tioca_provider.c index 4ea04cfa30ff..ea09c12f0258 100644 --- a/arch/ia64/sn/pci/tioca_provider.c +++ b/arch/ia64/sn/pci/tioca_provider.c @@ -657,7 +657,8 @@ static struct sn_pcibus_provider tioca_pci_interfaces = { .dma_map_consistent = tioca_dma_map, .dma_unmap = tioca_dma_unmap, .bus_fixup = tioca_bus_fixup, - .force_interrupt = NULL + .force_interrupt = NULL, + .target_interrupt = NULL }; /** diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c index d9081369cf96..8e75db2b825d 100644 --- a/arch/ia64/sn/pci/tioce_provider.c +++ b/arch/ia64/sn/pci/tioce_provider.c @@ -668,6 +668,43 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info) ce_mmr->ce_adm_force_int = force_int_val; } +/** + * tioce_target_interrupt - implement set_irq_affinity for tioce resident + * functions. Note: only applies to line interrupts, not MSI's. + * + * @sn_irq_info: SN IRQ context + * + * Given an sn_irq_info, set the associated CE device's interrupt destination + * register. Since the interrupt destination registers are on a per-ce-slot + * basis, this will retarget line interrupts for all functions downstream of + * the slot. + */ +static void +tioce_target_interrupt(struct sn_irq_info *sn_irq_info) +{ + struct pcidev_info *pcidev_info; + struct tioce_common *ce_common; + struct tioce *ce_mmr; + int bit; + + pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; + if (!pcidev_info) + return; + + ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info; + ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base; + + bit = sn_irq_info->irq_int_bit; + + ce_mmr->ce_adm_int_mask |= (1UL << bit); + ce_mmr->ce_adm_int_dest[bit] = + ((uint64_t)sn_irq_info->irq_irq << INTR_VECTOR_SHFT) | + sn_irq_info->irq_xtalkaddr; + ce_mmr->ce_adm_int_mask &= ~(1UL << bit); + + tioce_force_interrupt(sn_irq_info); +} + /** * tioce_bus_fixup - perform final PCI fixup for a TIO CE bus * @prom_bussoft: Common prom/kernel struct representing the bus @@ -719,7 +756,8 @@ static struct sn_pcibus_provider tioce_pci_interfaces = { .dma_map_consistent = tioce_dma_consistent, .dma_unmap = tioce_dma_unmap, .bus_fixup = tioce_bus_fixup, - .force_interrupt = tioce_force_interrupt + .force_interrupt = tioce_force_interrupt, + .target_interrupt = tioce_target_interrupt }; /** -- cgit v1.2.3 From d1e079b3fc90c7c114f46771e983a72ac8740882 Mon Sep 17 00:00:00 2001 From: Russ Anderson <(rja@sgi.com)> Date: Mon, 15 Aug 2005 14:46:00 -0700 Subject: [IA64-SGI] fix bte_copy() calling smp_processor_id() while preemptible bte_copy() calls calls smp_processor_id(), which will get flagged if preemption if enabled. raw_smp_processor_id() is used instead because we are just using it to pick a BTE interface and are not tied to a specific cpu. Signed-off-by: Russ Anderson (rja@sgi.com) Signed-off-by: Tony Luck --- arch/ia64/sn/kernel/bte.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c index b75814efadba..45854c637e9c 100644 --- a/arch/ia64/sn/kernel/bte.c +++ b/arch/ia64/sn/kernel/bte.c @@ -105,7 +105,7 @@ bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification) /* * Start with interface corresponding to cpu number */ - bte_first = get_cpu() % btes_per_node; + bte_first = raw_smp_processor_id() % btes_per_node; if (mode & BTE_USE_DEST) { /* try remote then local */ -- cgit v1.2.3