diff options
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r-- | arch/powerpc/platforms/powernv/Makefile | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-ioda.c | 1149 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-powernv.c | 1300 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci-ioda.c | 10 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.c | 69 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/pci.h | 29 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh_pseries.c | 98 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/msi.c | 6 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/pci_dlpar.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/pseries/setup.c | 2 |
10 files changed, 1230 insertions, 1437 deletions
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 6f3c5d33c3af..33e44f37212f 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -5,7 +5,7 @@ obj-y += opal-msglog.o opal-hmi.o opal-power.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o -obj-$(CONFIG_EEH) += eeh-ioda.o eeh-powernv.o +obj-$(CONFIG_EEH) += eeh-powernv.o obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c deleted file mode 100644 index 2809c9895288..000000000000 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ /dev/null @@ -1,1149 +0,0 @@ -/* - * The file intends to implement the functions needed by EEH, which is - * built on IODA compliant chip. Actually, lots of functions related - * to EEH would be built based on the OPAL APIs. - * - * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include <linux/debugfs.h> -#include <linux/delay.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/kernel.h> -#include <linux/msi.h> -#include <linux/notifier.h> -#include <linux/pci.h> -#include <linux/string.h> - -#include <asm/eeh.h> -#include <asm/eeh_event.h> -#include <asm/io.h> -#include <asm/iommu.h> -#include <asm/msi_bitmap.h> -#include <asm/opal.h> -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> -#include <asm/tce.h> - -#include "powernv.h" -#include "pci.h" - -static int ioda_eeh_nb_init = 0; - -static int ioda_eeh_event(struct notifier_block *nb, - unsigned long events, void *change) -{ - uint64_t changed_evts = (uint64_t)change; - - /* - * We simply send special EEH event if EEH has - * been enabled, or clear pending events in - * case that we enable EEH soon - */ - if (!(changed_evts & OPAL_EVENT_PCI_ERROR) || - !(events & OPAL_EVENT_PCI_ERROR)) - return 0; - - if (eeh_enabled()) - eeh_send_failure_event(NULL); - else - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); - - return 0; -} - -static struct notifier_block ioda_eeh_nb = { - .notifier_call = ioda_eeh_event, - .next = NULL, - .priority = 0 -}; - -#ifdef CONFIG_DEBUG_FS -static ssize_t ioda_eeh_ei_write(struct file *filp, - const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct pci_controller *hose = filp->private_data; - struct pnv_phb *phb = hose->private_data; - struct eeh_dev *edev; - struct eeh_pe *pe; - int pe_no, type, func; - unsigned long addr, mask; - char buf[50]; - int ret; - - if (!phb->eeh_ops || !phb->eeh_ops->err_inject) - return -ENXIO; - - ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); - if (!ret) - return -EFAULT; - - /* Retrieve parameters */ - ret = sscanf(buf, "%x:%x:%x:%lx:%lx", - &pe_no, &type, &func, &addr, &mask); - if (ret != 5) - return -EINVAL; - - /* Retrieve PE */ - edev = kzalloc(sizeof(*edev), GFP_KERNEL); - if (!edev) - return -ENOMEM; - edev->phb = hose; - edev->pe_config_addr = pe_no; - pe = eeh_pe_get(edev); - kfree(edev); - if (!pe) - return -ENODEV; - - /* Do error injection */ - ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); - return ret < 0 ? ret : count; -} - -static const struct file_operations ioda_eeh_ei_fops = { - .open = simple_open, - .llseek = no_llseek, - .write = ioda_eeh_ei_write, -}; - -static int ioda_eeh_dbgfs_set(void *data, int offset, u64 val) -{ - struct pci_controller *hose = data; - struct pnv_phb *phb = hose->private_data; - - out_be64(phb->regs + offset, val); - return 0; -} - -static int ioda_eeh_dbgfs_get(void *data, int offset, u64 *val) -{ - struct pci_controller *hose = data; - struct pnv_phb *phb = hose->private_data; - - *val = in_be64(phb->regs + offset); - return 0; -} - -static int ioda_eeh_outb_dbgfs_set(void *data, u64 val) -{ - return ioda_eeh_dbgfs_set(data, 0xD10, val); -} - -static int ioda_eeh_outb_dbgfs_get(void *data, u64 *val) -{ - return ioda_eeh_dbgfs_get(data, 0xD10, val); -} - -static int ioda_eeh_inbA_dbgfs_set(void *data, u64 val) -{ - return ioda_eeh_dbgfs_set(data, 0xD90, val); -} - -static int ioda_eeh_inbA_dbgfs_get(void *data, u64 *val) -{ - return ioda_eeh_dbgfs_get(data, 0xD90, val); -} - -static int ioda_eeh_inbB_dbgfs_set(void *data, u64 val) -{ - return ioda_eeh_dbgfs_set(data, 0xE10, val); -} - -static int ioda_eeh_inbB_dbgfs_get(void *data, u64 *val) -{ - return ioda_eeh_dbgfs_get(data, 0xE10, val); -} - -DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_outb_dbgfs_ops, ioda_eeh_outb_dbgfs_get, - ioda_eeh_outb_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbA_dbgfs_ops, ioda_eeh_inbA_dbgfs_get, - ioda_eeh_inbA_dbgfs_set, "0x%llx\n"); -DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get, - ioda_eeh_inbB_dbgfs_set, "0x%llx\n"); -#endif /* CONFIG_DEBUG_FS */ - - -/** - * ioda_eeh_post_init - Chip dependent post initialization - * @hose: PCI controller - * - * The function will be called after eeh PEs and devices - * have been built. That means the EEH is ready to supply - * service with I/O cache. - */ -static int ioda_eeh_post_init(struct pci_controller *hose) -{ - struct pnv_phb *phb = hose->private_data; - int ret; - - /* Register OPAL event notifier */ - if (!ioda_eeh_nb_init) { - ret = opal_notifier_register(&ioda_eeh_nb); - if (ret) { - pr_err("%s: Can't register OPAL event notifier (%d)\n", - __func__, ret); - return ret; - } - - ioda_eeh_nb_init = 1; - } - -#ifdef CONFIG_DEBUG_FS - if (!phb->has_dbgfs && phb->dbgfs) { - phb->has_dbgfs = 1; - - debugfs_create_file("err_injct", 0200, - phb->dbgfs, hose, - &ioda_eeh_ei_fops); - - debugfs_create_file("err_injct_outbound", 0600, - phb->dbgfs, hose, - &ioda_eeh_outb_dbgfs_ops); - debugfs_create_file("err_injct_inboundA", 0600, - phb->dbgfs, hose, - &ioda_eeh_inbA_dbgfs_ops); - debugfs_create_file("err_injct_inboundB", 0600, - phb->dbgfs, hose, - &ioda_eeh_inbB_dbgfs_ops); - } -#endif - - /* If EEH is enabled, we're going to rely on that. - * Otherwise, we restore to conventional mechanism - * to clear frozen PE during PCI config access. - */ - if (eeh_enabled()) - phb->flags |= PNV_PHB_FLAG_EEH; - else - phb->flags &= ~PNV_PHB_FLAG_EEH; - - return 0; -} - -/** - * ioda_eeh_set_option - Set EEH operation or I/O setting - * @pe: EEH PE - * @option: options - * - * Enable or disable EEH option for the indicated PE. The - * function also can be used to enable I/O or DMA for the - * PE. - */ -static int ioda_eeh_set_option(struct eeh_pe *pe, int option) -{ - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; - bool freeze_pe = false; - int enable, ret = 0; - s64 rc; - - /* Check on PE number */ - if (pe->addr < 0 || pe->addr >= phb->ioda.total_pe) { - pr_err("%s: PE address %x out of range [0, %x] " - "on PHB#%x\n", - __func__, pe->addr, phb->ioda.total_pe, - hose->global_number); - return -EINVAL; - } - - switch (option) { - case EEH_OPT_DISABLE: - return -EPERM; - case EEH_OPT_ENABLE: - return 0; - case EEH_OPT_THAW_MMIO: - enable = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; - break; - case EEH_OPT_THAW_DMA: - enable = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; - break; - case EEH_OPT_FREEZE_PE: - freeze_pe = true; - enable = OPAL_EEH_ACTION_SET_FREEZE_ALL; - break; - default: - pr_warn("%s: Invalid option %d\n", - __func__, option); - return -EINVAL; - } - - /* If PHB supports compound PE, to handle it */ - if (freeze_pe) { - if (phb->freeze_pe) { - phb->freeze_pe(phb, pe->addr); - } else { - rc = opal_pci_eeh_freeze_set(phb->opal_id, - pe->addr, - enable); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld freezing " - "PHB#%x-PE#%x\n", - __func__, rc, - phb->hose->global_number, pe->addr); - ret = -EIO; - } - } - } else { - if (phb->unfreeze_pe) { - ret = phb->unfreeze_pe(phb, pe->addr, enable); - } else { - rc = opal_pci_eeh_freeze_clear(phb->opal_id, - pe->addr, - enable); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld enable %d " - "for PHB#%x-PE#%x\n", - __func__, rc, option, - phb->hose->global_number, pe->addr); - ret = -EIO; - } - } - } - - return ret; -} - -static void ioda_eeh_phb_diag(struct eeh_pe *pe) -{ - struct pnv_phb *phb = pe->phb->private_data; - long rc; - - rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, - PNV_PCI_DIAG_BUF_SIZE); - if (rc != OPAL_SUCCESS) - pr_warn("%s: Failed to get diag-data for PHB#%x (%ld)\n", - __func__, pe->phb->global_number, rc); -} - -static int ioda_eeh_get_phb_state(struct eeh_pe *pe) -{ - struct pnv_phb *phb = pe->phb->private_data; - u8 fstate; - __be16 pcierr; - s64 rc; - int result = 0; - - rc = opal_pci_eeh_freeze_status(phb->opal_id, - pe->addr, - &fstate, - &pcierr, - NULL); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld getting PHB#%x state\n", - __func__, rc, phb->hose->global_number); - return EEH_STATE_NOT_SUPPORT; - } - - /* - * Check PHB state. If the PHB is frozen for the - * first time, to dump the PHB diag-data. - */ - if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { - result = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE | - EEH_STATE_MMIO_ENABLED | - EEH_STATE_DMA_ENABLED); - } else if (!(pe->state & EEH_PE_ISOLATED)) { - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(pe); - - if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) - pnv_pci_dump_phb_diag_data(pe->phb, pe->data); - } - - return result; -} - -static int ioda_eeh_get_pe_state(struct eeh_pe *pe) -{ - struct pnv_phb *phb = pe->phb->private_data; - u8 fstate; - __be16 pcierr; - s64 rc; - int result; - - /* - * We don't clobber hardware frozen state until PE - * reset is completed. In order to keep EEH core - * moving forward, we have to return operational - * state during PE reset. - */ - if (pe->state & EEH_PE_RESET) { - result = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE | - EEH_STATE_MMIO_ENABLED | - EEH_STATE_DMA_ENABLED); - return result; - } - - /* - * Fetch PE state from hardware. If the PHB - * supports compound PE, let it handle that. - */ - if (phb->get_pe_state) { - fstate = phb->get_pe_state(phb, pe->addr); - } else { - rc = opal_pci_eeh_freeze_status(phb->opal_id, - pe->addr, - &fstate, - &pcierr, - NULL); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", - __func__, rc, phb->hose->global_number, pe->addr); - return EEH_STATE_NOT_SUPPORT; - } - } - - /* Figure out state */ - switch (fstate) { - case OPAL_EEH_STOPPED_NOT_FROZEN: - result = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE | - EEH_STATE_MMIO_ENABLED | - EEH_STATE_DMA_ENABLED); - break; - case OPAL_EEH_STOPPED_MMIO_FREEZE: - result = (EEH_STATE_DMA_ACTIVE | - EEH_STATE_DMA_ENABLED); - break; - case OPAL_EEH_STOPPED_DMA_FREEZE: - result = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_MMIO_ENABLED); - break; - case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: - result = 0; - break; - case OPAL_EEH_STOPPED_RESET: - result = EEH_STATE_RESET_ACTIVE; - break; - case OPAL_EEH_STOPPED_TEMP_UNAVAIL: - result = EEH_STATE_UNAVAILABLE; - break; - case OPAL_EEH_STOPPED_PERM_UNAVAIL: - result = EEH_STATE_NOT_SUPPORT; - break; - default: - result = EEH_STATE_NOT_SUPPORT; - pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", - __func__, phb->hose->global_number, - pe->addr, fstate); - } - - /* - * If PHB supports compound PE, to freeze all - * slave PEs for consistency. - * - * If the PE is switching to frozen state for the - * first time, to dump the PHB diag-data. - */ - if (!(result & EEH_STATE_NOT_SUPPORT) && - !(result & EEH_STATE_UNAVAILABLE) && - !(result & EEH_STATE_MMIO_ACTIVE) && - !(result & EEH_STATE_DMA_ACTIVE) && - !(pe->state & EEH_PE_ISOLATED)) { - if (phb->freeze_pe) - phb->freeze_pe(phb, pe->addr); - - eeh_pe_state_mark(pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(pe); - - if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) - pnv_pci_dump_phb_diag_data(pe->phb, pe->data); - } - - return result; -} - -/** - * ioda_eeh_get_state - Retrieve the state of PE - * @pe: EEH PE - * - * The PE's state should be retrieved from the PEEV, PEST - * IODA tables. Since the OPAL has exported the function - * to do it, it'd better to use that. - */ -static int ioda_eeh_get_state(struct eeh_pe *pe) -{ - struct pnv_phb *phb = pe->phb->private_data; - - /* Sanity check on PE number. PHB PE should have 0 */ - if (pe->addr < 0 || - pe->addr >= phb->ioda.total_pe) { - pr_warn("%s: PHB#%x-PE#%x out of range [0, %x]\n", - __func__, phb->hose->global_number, - pe->addr, phb->ioda.total_pe); - return EEH_STATE_NOT_SUPPORT; - } - - if (pe->type & EEH_PE_PHB) - return ioda_eeh_get_phb_state(pe); - - return ioda_eeh_get_pe_state(pe); -} - -static s64 ioda_eeh_phb_poll(struct pnv_phb *phb) -{ - s64 rc = OPAL_HARDWARE; - - while (1) { - rc = opal_pci_poll(phb->opal_id); - if (rc <= 0) - break; - - if (system_state < SYSTEM_RUNNING) - udelay(1000 * rc); - else - msleep(rc); - } - - return rc; -} - -int ioda_eeh_phb_reset(struct pci_controller *hose, int option) -{ - struct pnv_phb *phb = hose->private_data; - s64 rc = OPAL_HARDWARE; - - pr_debug("%s: Reset PHB#%x, option=%d\n", - __func__, hose->global_number, option); - - /* Issue PHB complete reset request */ - if (option == EEH_RESET_FUNDAMENTAL || - option == EEH_RESET_HOT) - rc = opal_pci_reset(phb->opal_id, - OPAL_RESET_PHB_COMPLETE, - OPAL_ASSERT_RESET); - else if (option == EEH_RESET_DEACTIVATE) - rc = opal_pci_reset(phb->opal_id, - OPAL_RESET_PHB_COMPLETE, - OPAL_DEASSERT_RESET); - if (rc < 0) - goto out; - - /* - * Poll state of the PHB until the request is done - * successfully. The PHB reset is usually PHB complete - * reset followed by hot reset on root bus. So we also - * need the PCI bus settlement delay. - */ - rc = ioda_eeh_phb_poll(phb); - if (option == EEH_RESET_DEACTIVATE) { - if (system_state < SYSTEM_RUNNING) - udelay(1000 * EEH_PE_RST_SETTLE_TIME); - else - msleep(EEH_PE_RST_SETTLE_TIME); - } -out: - if (rc != OPAL_SUCCESS) - return -EIO; - - return 0; -} - -static int ioda_eeh_root_reset(struct pci_controller *hose, int option) -{ - struct pnv_phb *phb = hose->private_data; - s64 rc = OPAL_SUCCESS; - - pr_debug("%s: Reset PHB#%x, option=%d\n", - __func__, hose->global_number, option); - - /* - * During the reset deassert time, we needn't care - * the reset scope because the firmware does nothing - * for fundamental or hot reset during deassert phase. - */ - if (option == EEH_RESET_FUNDAMENTAL) - rc = opal_pci_reset(phb->opal_id, - OPAL_RESET_PCI_FUNDAMENTAL, - OPAL_ASSERT_RESET); - else if (option == EEH_RESET_HOT) - rc = opal_pci_reset(phb->opal_id, - OPAL_RESET_PCI_HOT, - OPAL_ASSERT_RESET); - else if (option == EEH_RESET_DEACTIVATE) - rc = opal_pci_reset(phb->opal_id, - OPAL_RESET_PCI_HOT, - OPAL_DEASSERT_RESET); - if (rc < 0) - goto out; - - /* Poll state of the PHB until the request is done */ - rc = ioda_eeh_phb_poll(phb); - if (option == EEH_RESET_DEACTIVATE) - msleep(EEH_PE_RST_SETTLE_TIME); -out: - if (rc != OPAL_SUCCESS) - return -EIO; - - return 0; -} - -static int ioda_eeh_bridge_reset(struct pci_dev *dev, int option) - -{ - struct device_node *dn = pci_device_to_OF_node(dev); - struct eeh_dev *edev = of_node_to_eeh_dev(dn); - int aer = edev ? edev->aer_cap : 0; - u32 ctrl; - - pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", - __func__, pci_domain_nr(dev->bus), - dev->bus->number, option); - - switch (option) { - case EEH_RESET_FUNDAMENTAL: - case EEH_RESET_HOT: - /* Don't report linkDown event */ - if (aer) { - eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, &ctrl); - ctrl |= PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, ctrl); - } - - eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); - ctrl |= PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); - msleep(EEH_PE_RST_HOLD_TIME); - - break; - case EEH_RESET_DEACTIVATE: - eeh_ops->read_config(dn, PCI_BRIDGE_CONTROL, 2, &ctrl); - ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; - eeh_ops->write_config(dn, PCI_BRIDGE_CONTROL, 2, ctrl); - msleep(EEH_PE_RST_SETTLE_TIME); - - /* Continue reporting linkDown event */ - if (aer) { - eeh_ops->read_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, &ctrl); - ctrl &= ~PCI_ERR_UNC_SURPDN; - eeh_ops->write_config(dn, aer + PCI_ERR_UNCOR_MASK, - 4, ctrl); - } - - break; - } - - return 0; -} - -void pnv_pci_reset_secondary_bus(struct pci_dev *dev) -{ - struct pci_controller *hose; - - if (pci_is_root_bus(dev->bus)) { - hose = pci_bus_to_host(dev->bus); - ioda_eeh_root_reset(hose, EEH_RESET_HOT); - ioda_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); - } else { - ioda_eeh_bridge_reset(dev, EEH_RESET_HOT); - ioda_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); - } -} - -/** - * ioda_eeh_reset - Reset the indicated PE - * @pe: EEH PE - * @option: reset option - * - * Do reset on the indicated PE. For PCI bus sensitive PE, - * we need to reset the parent p2p bridge. The PHB has to - * be reinitialized if the p2p bridge is root bridge. For - * PCI device sensitive PE, we will try to reset the device - * through FLR. For now, we don't have OPAL APIs to do HARD - * reset yet, so all reset would be SOFT (HOT) reset. - */ -static int ioda_eeh_reset(struct eeh_pe *pe, int option) -{ - struct pci_controller *hose = pe->phb; - struct pci_bus *bus; - int ret; - - /* - * For PHB reset, we always have complete reset. For those PEs whose - * primary bus derived from root complex (root bus) or root port - * (usually bus#1), we apply hot or fundamental reset on the root port. - * For other PEs, we always have hot reset on the PE primary bus. - * - * Here, we have different design to pHyp, which always clear the - * frozen state during PE reset. However, the good idea here from - * benh is to keep frozen state before we get PE reset done completely - * (until BAR restore). With the frozen state, HW drops illegal IO - * or MMIO access, which can incur recrusive frozen PE during PE - * reset. The side effect is that EEH core has to clear the frozen - * state explicitly after BAR restore. - */ - if (pe->type & EEH_PE_PHB) { - ret = ioda_eeh_phb_reset(hose, option); - } else { - struct pnv_phb *phb; - s64 rc; - - /* - * The frozen PE might be caused by PAPR error injection - * registers, which are expected to be cleared after hitting - * frozen PE as stated in the hardware spec. Unfortunately, - * that's not true on P7IOC. So we have to clear it manually - * to avoid recursive EEH errors during recovery. - */ - phb = hose->private_data; - if (phb->model == PNV_PHB_MODEL_P7IOC && - (option == EEH_RESET_HOT || - option == EEH_RESET_FUNDAMENTAL)) { - rc = opal_pci_reset(phb->opal_id, - OPAL_RESET_PHB_ERROR, - OPAL_ASSERT_RESET); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld clearing " - "error injection registers\n", - __func__, rc); - return -EIO; - } - } - - bus = eeh_pe_bus_get(pe); - if (pci_is_root_bus(bus) || - pci_is_root_bus(bus->parent)) - ret = ioda_eeh_root_reset(hose, option); - else - ret = ioda_eeh_bridge_reset(bus->self, option); - } - - return ret; -} - -/** - * ioda_eeh_get_log - Retrieve error log - * @pe: frozen PE - * @severity: permanent or temporary error - * @drv_log: device driver log - * @len: length of device driver log - * - * Retrieve error log, which contains log from device driver - * and firmware. - */ -static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) -{ - if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) - pnv_pci_dump_phb_diag_data(pe->phb, pe->data); - - return 0; -} - -/** - * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE - * @pe: EEH PE - * - * For particular PE, it might have included PCI bridges. In order - * to make the PE work properly, those PCI bridges should be configured - * correctly. However, we need do nothing on P7IOC since the reset - * function will do everything that should be covered by the function. - */ -static int ioda_eeh_configure_bridge(struct eeh_pe *pe) -{ - return 0; -} - -static int ioda_eeh_err_inject(struct eeh_pe *pe, int type, int func, - unsigned long addr, unsigned long mask) -{ - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; - s64 ret; - - /* Sanity check on error type */ - if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && - type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { - pr_warn("%s: Invalid error type %d\n", - __func__, type); - return -ERANGE; - } - - if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || - func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { - pr_warn("%s: Invalid error function %d\n", - __func__, func); - return -ERANGE; - } - - /* Firmware supports error injection ? */ - if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { - pr_warn("%s: Firmware doesn't support error injection\n", - __func__); - return -ENXIO; - } - - /* Do error injection */ - ret = opal_pci_err_inject(phb->opal_id, pe->addr, - type, func, addr, mask); - if (ret != OPAL_SUCCESS) { - pr_warn("%s: Failure %lld injecting error " - "%d-%d to PHB#%x-PE#%x\n", - __func__, ret, type, func, - hose->global_number, pe->addr); - return -EIO; - } - - return 0; -} - -static void ioda_eeh_hub_diag_common(struct OpalIoP7IOCErrorData *data) -{ - /* GEM */ - if (data->gemXfir || data->gemRfir || - data->gemRirqfir || data->gemMask || data->gemRwof) - pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", - be64_to_cpu(data->gemXfir), - be64_to_cpu(data->gemRfir), - be64_to_cpu(data->gemRirqfir), - be64_to_cpu(data->gemMask), - be64_to_cpu(data->gemRwof)); - - /* LEM */ - if (data->lemFir || data->lemErrMask || - data->lemAction0 || data->lemAction1 || data->lemWof) - pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", - be64_to_cpu(data->lemFir), - be64_to_cpu(data->lemErrMask), - be64_to_cpu(data->lemAction0), - be64_to_cpu(data->lemAction1), - be64_to_cpu(data->lemWof)); -} - -static void ioda_eeh_hub_diag(struct pci_controller *hose) -{ - struct pnv_phb *phb = hose->private_data; - struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag; - long rc; - - rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); - if (rc != OPAL_SUCCESS) { - pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", - __func__, phb->hub_id, rc); - return; - } - - switch (data->type) { - case OPAL_P7IOC_DIAG_TYPE_RGC: - pr_info("P7IOC diag-data for RGC\n\n"); - ioda_eeh_hub_diag_common(data); - if (data->rgc.rgcStatus || data->rgc.rgcLdcp) - pr_info(" RGC: %016llx %016llx\n", - be64_to_cpu(data->rgc.rgcStatus), - be64_to_cpu(data->rgc.rgcLdcp)); - break; - case OPAL_P7IOC_DIAG_TYPE_BI: - pr_info("P7IOC diag-data for BI %s\n\n", - data->bi.biDownbound ? "Downbound" : "Upbound"); - ioda_eeh_hub_diag_common(data); - if (data->bi.biLdcp0 || data->bi.biLdcp1 || - data->bi.biLdcp2 || data->bi.biFenceStatus) - pr_info(" BI: %016llx %016llx %016llx %016llx\n", - be64_to_cpu(data->bi.biLdcp0), - be64_to_cpu(data->bi.biLdcp1), - be64_to_cpu(data->bi.biLdcp2), - be64_to_cpu(data->bi.biFenceStatus)); - break; - case OPAL_P7IOC_DIAG_TYPE_CI: - pr_info("P7IOC diag-data for CI Port %d\n\n", - data->ci.ciPort); - ioda_eeh_hub_diag_common(data); - if (data->ci.ciPortStatus || data->ci.ciPortLdcp) - pr_info(" CI: %016llx %016llx\n", - be64_to_cpu(data->ci.ciPortStatus), - be64_to_cpu(data->ci.ciPortLdcp)); - break; - case OPAL_P7IOC_DIAG_TYPE_MISC: - pr_info("P7IOC diag-data for MISC\n\n"); - ioda_eeh_hub_diag_common(data); - break; - case OPAL_P7IOC_DIAG_TYPE_I2C: - pr_info("P7IOC diag-data for I2C\n\n"); - ioda_eeh_hub_diag_common(data); - break; - default: - pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", - __func__, phb->hub_id, data->type); - } -} - -static int ioda_eeh_get_pe(struct pci_controller *hose, - u16 pe_no, struct eeh_pe **pe) -{ - struct pnv_phb *phb = hose->private_data; - struct pnv_ioda_pe *pnv_pe; - struct eeh_pe *dev_pe; - struct eeh_dev edev; - - /* - * If PHB supports compound PE, to fetch - * the master PE because slave PE is invisible - * to EEH core. - */ - pnv_pe = &phb->ioda.pe_array[pe_no]; - if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { - pnv_pe = pnv_pe->master; - WARN_ON(!pnv_pe || - !(pnv_pe->flags & PNV_IODA_PE_MASTER)); - pe_no = pnv_pe->pe_number; - } - - /* Find the PE according to PE# */ - memset(&edev, 0, sizeof(struct eeh_dev)); - edev.phb = hose; - edev.pe_config_addr = pe_no; - dev_pe = eeh_pe_get(&edev); - if (!dev_pe) - return -EEXIST; - - /* Freeze the (compound) PE */ - *pe = dev_pe; - if (!(dev_pe->state & EEH_PE_ISOLATED)) - phb->freeze_pe(phb, pe_no); - - /* - * At this point, we're sure the (compound) PE should - * have been frozen. However, we still need poke until - * hitting the frozen PE on top level. - */ - dev_pe = dev_pe->parent; - while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { - int ret; - int active_flags = (EEH_STATE_MMIO_ACTIVE | - EEH_STATE_DMA_ACTIVE); - - ret = eeh_ops->get_state(dev_pe, NULL); - if (ret <= 0 || (ret & active_flags) == active_flags) { - dev_pe = dev_pe->parent; - continue; - } - - /* Frozen parent PE */ - *pe = dev_pe; - if (!(dev_pe->state & EEH_PE_ISOLATED)) - phb->freeze_pe(phb, dev_pe->addr); - - /* Next one */ - dev_pe = dev_pe->parent; - } - - return 0; -} - -/** - * ioda_eeh_next_error - Retrieve next error for EEH core to handle - * @pe: The affected PE - * - * The function is expected to be called by EEH core while it gets - * special EEH event (without binding PE). The function calls to - * OPAL APIs for next error to handle. The informational error is - * handled internally by platform. However, the dead IOC, dead PHB, - * fenced PHB and frozen PE should be handled by EEH core eventually. - */ -static int ioda_eeh_next_error(struct eeh_pe **pe) -{ - struct pci_controller *hose; - struct pnv_phb *phb; - struct eeh_pe *phb_pe, *parent_pe; - __be64 frozen_pe_no; - __be16 err_type, severity; - int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); - long rc; - int state, ret = EEH_NEXT_ERR_NONE; - - /* - * While running here, it's safe to purge the event queue. - * And we should keep the cached OPAL notifier event sychronized - * between the kernel and firmware. - */ - eeh_remove_event(NULL, false); - opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); - - list_for_each_entry(hose, &hose_list, list_node) { - /* - * If the subordinate PCI buses of the PHB has been - * removed or is exactly under error recovery, we - * needn't take care of it any more. - */ - phb = hose->private_data; - phb_pe = eeh_phb_pe_get(hose); - if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED)) - continue; - - rc = opal_pci_next_error(phb->opal_id, - &frozen_pe_no, &err_type, &severity); - - /* If OPAL API returns error, we needn't proceed */ - if (rc != OPAL_SUCCESS) { - pr_devel("%s: Invalid return value on " - "PHB#%x (0x%lx) from opal_pci_next_error", - __func__, hose->global_number, rc); - continue; - } - - /* If the PHB doesn't have error, stop processing */ - if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || - be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { - pr_devel("%s: No error found on PHB#%x\n", - __func__, hose->global_number); - continue; - } - - /* - * Processing the error. We're expecting the error with - * highest priority reported upon multiple errors on the - * specific PHB. - */ - pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", - __func__, be16_to_cpu(err_type), be16_to_cpu(severity), - be64_to_cpu(frozen_pe_no), hose->global_number); - switch (be16_to_cpu(err_type)) { - case OPAL_EEH_IOC_ERROR: - if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { - pr_err("EEH: dead IOC detected\n"); - ret = EEH_NEXT_ERR_DEAD_IOC; - } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { - pr_info("EEH: IOC informative error " - "detected\n"); - ioda_eeh_hub_diag(hose); - ret = EEH_NEXT_ERR_NONE; - } - - break; - case OPAL_EEH_PHB_ERROR: - if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { - *pe = phb_pe; - pr_err("EEH: dead PHB#%x detected, " - "location: %s\n", - hose->global_number, - eeh_pe_loc_get(phb_pe)); - ret = EEH_NEXT_ERR_DEAD_PHB; - } else if (be16_to_cpu(severity) == - OPAL_EEH_SEV_PHB_FENCED) { - *pe = phb_pe; - pr_err("EEH: Fenced PHB#%x detected, " - "location: %s\n", - hose->global_number, - eeh_pe_loc_get(phb_pe)); - ret = EEH_NEXT_ERR_FENCED_PHB; - } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { - pr_info("EEH: PHB#%x informative error " - "detected, location: %s\n", - hose->global_number, - eeh_pe_loc_get(phb_pe)); - ioda_eeh_phb_diag(phb_pe); - pnv_pci_dump_phb_diag_data(hose, phb_pe->data); - ret = EEH_NEXT_ERR_NONE; - } - - break; - case OPAL_EEH_PE_ERROR: - /* - * If we can't find the corresponding PE, we - * just try to unfreeze. - */ - if (ioda_eeh_get_pe(hose, - be64_to_cpu(frozen_pe_no), pe)) { - /* Try best to clear it */ - pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", - hose->global_number, frozen_pe_no); - pr_info("EEH: PHB location: %s\n", - eeh_pe_loc_get(phb_pe)); - opal_pci_eeh_freeze_clear(phb->opal_id, frozen_pe_no, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); - ret = EEH_NEXT_ERR_NONE; - } else if ((*pe)->state & EEH_PE_ISOLATED || - eeh_pe_passed(*pe)) { - ret = EEH_NEXT_ERR_NONE; - } else { - pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", - (*pe)->addr, (*pe)->phb->global_number); - pr_err("EEH: PE location: %s, PHB location: %s\n", - eeh_pe_loc_get(*pe), eeh_pe_loc_get(phb_pe)); - ret = EEH_NEXT_ERR_FROZEN_PE; - } - - break; - default: - pr_warn("%s: Unexpected error type %d\n", - __func__, be16_to_cpu(err_type)); - } - - /* - * EEH core will try recover from fenced PHB or - * frozen PE. In the time for frozen PE, EEH core - * enable IO path for that before collecting logs, - * but it ruins the site. So we have to dump the - * log in advance here. - */ - if ((ret == EEH_NEXT_ERR_FROZEN_PE || - ret == EEH_NEXT_ERR_FENCED_PHB) && - !((*pe)->state & EEH_PE_ISOLATED)) { - eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); - ioda_eeh_phb_diag(*pe); - - if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) - pnv_pci_dump_phb_diag_data((*pe)->phb, - (*pe)->data); - } - - /* - * We probably have the frozen parent PE out there and - * we need have to handle frozen parent PE firstly. - */ - if (ret == EEH_NEXT_ERR_FROZEN_PE) { - parent_pe = (*pe)->parent; - while (parent_pe) { - /* Hit the ceiling ? */ - if (parent_pe->type & EEH_PE_PHB) - break; - - /* Frozen parent PE ? */ - state = ioda_eeh_get_state(parent_pe); - if (state > 0 && - (state & active_flags) != active_flags) - *pe = parent_pe; - - /* Next parent level */ - parent_pe = parent_pe->parent; - } - - /* We possibly migrate to another PE */ - eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); - } - - /* - * If we have no errors on the specific PHB or only - * informative error there, we continue poking it. - * Otherwise, we need actions to be taken by upper - * layer. - */ - if (ret > EEH_NEXT_ERR_INF) - break; - } - - return ret; -} - -struct pnv_eeh_ops ioda_eeh_ops = { - .post_init = ioda_eeh_post_init, - .set_option = ioda_eeh_set_option, - .get_state = ioda_eeh_get_state, - .reset = ioda_eeh_reset, - .get_log = ioda_eeh_get_log, - .configure_bridge = ioda_eeh_configure_bridge, - .err_inject = ioda_eeh_err_inject, - .next_error = ioda_eeh_next_error -}; diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index e261869adc86..ce738ab3d5a9 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -12,6 +12,7 @@ */ #include <linux/atomic.h> +#include <linux/debugfs.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/init.h> @@ -38,12 +39,14 @@ #include "powernv.h" #include "pci.h" +static bool pnv_eeh_nb_init = false; + /** - * powernv_eeh_init - EEH platform dependent initialization + * pnv_eeh_init - EEH platform dependent initialization * * EEH platform dependent initialization on powernv */ -static int powernv_eeh_init(void) +static int pnv_eeh_init(void) { struct pci_controller *hose; struct pnv_phb *phb; @@ -85,37 +88,280 @@ static int powernv_eeh_init(void) return 0; } +static int pnv_eeh_event(struct notifier_block *nb, + unsigned long events, void *change) +{ + uint64_t changed_evts = (uint64_t)change; + + /* + * We simply send special EEH event if EEH has + * been enabled, or clear pending events in + * case that we enable EEH soon + */ + if (!(changed_evts & OPAL_EVENT_PCI_ERROR) || + !(events & OPAL_EVENT_PCI_ERROR)) + return 0; + + if (eeh_enabled()) + eeh_send_failure_event(NULL); + else + opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); + + return 0; +} + +static struct notifier_block pnv_eeh_nb = { + .notifier_call = pnv_eeh_event, + .next = NULL, + .priority = 0 +}; + +#ifdef CONFIG_DEBUG_FS +static ssize_t pnv_eeh_ei_write(struct file *filp, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct pci_controller *hose = filp->private_data; + struct eeh_dev *edev; + struct eeh_pe *pe; + int pe_no, type, func; + unsigned long addr, mask; + char buf[50]; + int ret; + + if (!eeh_ops || !eeh_ops->err_inject) + return -ENXIO; + + /* Copy over argument buffer */ + ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); + if (!ret) + return -EFAULT; + + /* Retrieve parameters */ + ret = sscanf(buf, "%x:%x:%x:%lx:%lx", + &pe_no, &type, &func, &addr, &mask); + if (ret != 5) + return -EINVAL; + + /* Retrieve PE */ + edev = kzalloc(sizeof(*edev), GFP_KERNEL); + if (!edev) + return -ENOMEM; + edev->phb = hose; + edev->pe_config_addr = pe_no; + pe = eeh_pe_get(edev); + kfree(edev); + if (!pe) + return -ENODEV; + + /* Do error injection */ + ret = eeh_ops->err_inject(pe, type, func, addr, mask); + return ret < 0 ? ret : count; +} + +static const struct file_operations pnv_eeh_ei_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = pnv_eeh_ei_write, +}; + +static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + out_be64(phb->regs + offset, val); + return 0; +} + +static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val) +{ + struct pci_controller *hose = data; + struct pnv_phb *phb = hose->private_data; + + *val = in_be64(phb->regs + offset); + return 0; +} + +static int pnv_eeh_outb_dbgfs_set(void *data, u64 val) +{ + return pnv_eeh_dbgfs_set(data, 0xD10, val); +} + +static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val) +{ + return pnv_eeh_dbgfs_get(data, 0xD10, val); +} + +static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val) +{ + return pnv_eeh_dbgfs_set(data, 0xD90, val); +} + +static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val) +{ + return pnv_eeh_dbgfs_get(data, 0xD90, val); +} + +static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val) +{ + return pnv_eeh_dbgfs_set(data, 0xE10, val); +} + +static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val) +{ + return pnv_eeh_dbgfs_get(data, 0xE10, val); +} + +DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get, + pnv_eeh_outb_dbgfs_set, "0x%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get, + pnv_eeh_inbA_dbgfs_set, "0x%llx\n"); +DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get, + pnv_eeh_inbB_dbgfs_set, "0x%llx\n"); +#endif /* CONFIG_DEBUG_FS */ + /** - * powernv_eeh_post_init - EEH platform dependent post initialization + * pnv_eeh_post_init - EEH platform dependent post initialization * * EEH platform dependent post initialization on powernv. When * the function is called, the EEH PEs and devices should have * been built. If the I/O cache staff has been built, EEH is * ready to supply service. */ -static int powernv_eeh_post_init(void) +static int pnv_eeh_post_init(void) { struct pci_controller *hose; struct pnv_phb *phb; int ret = 0; + /* Register OPAL event notifier */ + if (!pnv_eeh_nb_init) { + ret = opal_notifier_register(&pnv_eeh_nb); + if (ret) { + pr_warn("%s: Can't register OPAL event notifier (%d)\n", + __func__, ret); + return ret; + } + + pnv_eeh_nb_init = true; + } + list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; - if (phb->eeh_ops && phb->eeh_ops->post_init) { - ret = phb->eeh_ops->post_init(hose); - if (ret) - break; - } + /* + * If EEH is enabled, we're going to rely on that. + * Otherwise, we restore to conventional mechanism + * to clear frozen PE during PCI config access. + */ + if (eeh_enabled()) + phb->flags |= PNV_PHB_FLAG_EEH; + else + phb->flags &= ~PNV_PHB_FLAG_EEH; + + /* Create debugfs entries */ +#ifdef CONFIG_DEBUG_FS + if (phb->has_dbgfs || !phb->dbgfs) + continue; + + phb->has_dbgfs = 1; + debugfs_create_file("err_injct", 0200, + phb->dbgfs, hose, + &pnv_eeh_ei_fops); + + debugfs_create_file("err_injct_outbound", 0600, + phb->dbgfs, hose, + &pnv_eeh_outb_dbgfs_ops); + debugfs_create_file("err_injct_inboundA", 0600, + phb->dbgfs, hose, + &pnv_eeh_inbA_dbgfs_ops); + debugfs_create_file("err_injct_inboundB", 0600, + phb->dbgfs, hose, + &pnv_eeh_inbB_dbgfs_ops); +#endif /* CONFIG_DEBUG_FS */ } + return ret; } +static int pnv_eeh_cap_start(struct pci_dn *pdn) +{ + u32 status; + + if (!pdn) + return 0; + + pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + + return PCI_CAPABILITY_LIST; +} + +static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap) +{ + int pos = pnv_eeh_cap_start(pdn); + int cnt = 48; /* Maximal number of capabilities */ + u32 id; + + if (!pos) + return 0; + + while (cnt--) { + pnv_pci_cfg_read(pdn, pos, 1, &pos); + if (pos < 0x40) + break; + + pos &= ~3; + pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id); + if (id == 0xff) + break; + + /* Found */ + if (id == cap) + return pos; + + /* Next one */ + pos += PCI_CAP_LIST_NEXT; + } + + return 0; +} + +static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) +{ + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + u32 header; + int pos = 256, ttl = (4096 - 256) / 8; + + if (!edev || !edev->pcie_cap) + return 0; + if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + return 0; + else if (!header) + return 0; + + while (ttl-- > 0) { + if (PCI_EXT_CAP_ID(header) == cap && pos) + return pos; + + pos = PCI_EXT_CAP_NEXT(header); + if (pos < 256) + break; + + if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) + break; + } + + return 0; +} + /** - * powernv_eeh_dev_probe - Do probe on PCI device - * @dev: PCI device - * @flag: unused + * pnv_eeh_probe - Do probe on PCI device + * @pdn: PCI device node + * @data: unused * * When EEH module is installed during system boot, all PCI devices * are checked one by one to see if it supports EEH. The function @@ -129,12 +375,12 @@ static int powernv_eeh_post_init(void) * was possiblly triggered by EEH core, the binding between EEH device * and the PCI device isn't built yet. */ -static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) +static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) { - struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pci_controller *hose = pdn->phb; struct pnv_phb *phb = hose->private_data; - struct device_node *dn = pci_device_to_OF_node(dev); - struct eeh_dev *edev = of_node_to_eeh_dev(dn); + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + uint32_t pcie_flags; int ret; /* @@ -143,40 +389,42 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) * the root bridge. So it's not reasonable to continue * the probing. */ - if (!dn || !edev || edev->pe) - return 0; + if (!edev || edev->pe) + return NULL; /* Skip for PCI-ISA bridge */ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_ISA) - return 0; + if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) + return NULL; /* Initialize eeh device */ - edev->class_code = dev->class; + edev->class_code = pdn->class_code; edev->mode &= 0xFFFFFF00; - if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) + edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); + edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); + if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; - edev->pcix_cap = pci_find_capability(dev, PCI_CAP_ID_PCIX); - if (pci_is_pcie(dev)) { - edev->pcie_cap = pci_pcie_cap(dev); - - if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) - edev->mode |= EEH_DEV_ROOT_PORT; - else if (pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) - edev->mode |= EEH_DEV_DS_PORT; - - edev->aer_cap = pci_find_ext_capability(dev, - PCI_EXT_CAP_ID_ERR); + if (edev->pcie_cap) { + pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS, + 2, &pcie_flags); + pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; + if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) + edev->mode |= EEH_DEV_ROOT_PORT; + else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) + edev->mode |= EEH_DEV_DS_PORT; + } } - edev->config_addr = ((dev->bus->number << 8) | dev->devfn); - edev->pe_config_addr = phb->bdfn_to_pe(phb, dev->bus, dev->devfn & 0xff); + edev->config_addr = (pdn->busno << 8) | (pdn->devfn); + edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr]; /* Create PE */ ret = eeh_add_to_parent_pe(edev); if (ret) { - pr_warn("%s: Can't add PCI dev %s to parent PE (%d)\n", - __func__, pci_name(dev), ret); - return ret; + pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n", + __func__, hose->global_number, pdn->busno, + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret); + return NULL; } /* @@ -195,8 +443,10 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) * Broadcom Austin 4-ports NICs (14e4:1657) * Broadcom Shiner 2-ports 10G NICs (14e4:168e) */ - if ((dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x1657) || - (dev->vendor == PCI_VENDOR_ID_BROADCOM && dev->device == 0x168e)) + if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x1657) || + (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && + pdn->device_id == 0x168e)) edev->pe->state |= EEH_PE_CFG_RESTRICTED; /* @@ -206,7 +456,8 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) * to PE reset. */ if (!edev->pe->bus) - edev->pe->bus = dev->bus; + edev->pe->bus = pci_find_bus(hose->global_number, + pdn->busno); /* * Enable EEH explicitly so that we will do EEH check @@ -217,11 +468,11 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) /* Save memory bars */ eeh_save_bars(edev); - return 0; + return NULL; } /** - * powernv_eeh_set_option - Initialize EEH or MMIO/DMA reenable + * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable * @pe: EEH PE * @option: operation to be issued * @@ -229,36 +480,236 @@ static int powernv_eeh_dev_probe(struct pci_dev *dev, void *flag) * Currently, following options are support according to PAPR: * Enable EEH, Disable EEH, Enable MMIO and Enable DMA */ -static int powernv_eeh_set_option(struct eeh_pe *pe, int option) +static int pnv_eeh_set_option(struct eeh_pe *pe, int option) { struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; - int ret = -EEXIST; + bool freeze_pe = false; + int opt, ret = 0; + s64 rc; + + /* Sanity check on option */ + switch (option) { + case EEH_OPT_DISABLE: + return -EPERM; + case EEH_OPT_ENABLE: + return 0; + case EEH_OPT_THAW_MMIO: + opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; + break; + case EEH_OPT_THAW_DMA: + opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; + break; + case EEH_OPT_FREEZE_PE: + freeze_pe = true; + opt = OPAL_EEH_ACTION_SET_FREEZE_ALL; + break; + default: + pr_warn("%s: Invalid option %d\n", __func__, option); + return -EINVAL; + } - /* - * What we need do is pass it down for hardware - * implementation to handle it. - */ - if (phb->eeh_ops && phb->eeh_ops->set_option) - ret = phb->eeh_ops->set_option(pe, option); + /* If PHB supports compound PE, to handle it */ + if (freeze_pe) { + if (phb->freeze_pe) { + phb->freeze_pe(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_set(phb->opal_id, + pe->addr, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld freezing " + "PHB#%x-PE#%x\n", + __func__, rc, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } + } else { + if (phb->unfreeze_pe) { + ret = phb->unfreeze_pe(phb, pe->addr, opt); + } else { + rc = opal_pci_eeh_freeze_clear(phb->opal_id, + pe->addr, opt); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld enable %d " + "for PHB#%x-PE#%x\n", + __func__, rc, option, + phb->hose->global_number, pe->addr); + ret = -EIO; + } + } + } return ret; } /** - * powernv_eeh_get_pe_addr - Retrieve PE address + * pnv_eeh_get_pe_addr - Retrieve PE address * @pe: EEH PE * * Retrieve the PE address according to the given tranditional * PCI BDF (Bus/Device/Function) address. */ -static int powernv_eeh_get_pe_addr(struct eeh_pe *pe) +static int pnv_eeh_get_pe_addr(struct eeh_pe *pe) { return pe->addr; } +static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + s64 rc; + + rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, + PNV_PCI_DIAG_BUF_SIZE); + if (rc != OPAL_SUCCESS) + pr_warn("%s: Failure %lld getting PHB#%x diag-data\n", + __func__, rc, pe->phb->global_number); +} + +static int pnv_eeh_get_phb_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + u8 fstate; + __be16 pcierr; + s64 rc; + int result = 0; + + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x state\n", + __func__, rc, phb->hose->global_number); + return EEH_STATE_NOT_SUPPORT; + } + + /* + * Check PHB state. If the PHB is frozen for the + * first time, to dump the PHB diag-data. + */ + if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + } else if (!(pe->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + pnv_eeh_get_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + } + + return result; +} + +static int pnv_eeh_get_pe_state(struct eeh_pe *pe) +{ + struct pnv_phb *phb = pe->phb->private_data; + u8 fstate; + __be16 pcierr; + s64 rc; + int result; + + /* + * We don't clobber hardware frozen state until PE + * reset is completed. In order to keep EEH core + * moving forward, we have to return operational + * state during PE reset. + */ + if (pe->state & EEH_PE_RESET) { + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + return result; + } + + /* + * Fetch PE state from hardware. If the PHB + * supports compound PE, let it handle that. + */ + if (phb->get_pe_state) { + fstate = phb->get_pe_state(phb, pe->addr); + } else { + rc = opal_pci_eeh_freeze_status(phb->opal_id, + pe->addr, + &fstate, + &pcierr, + NULL); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", + __func__, rc, phb->hose->global_number, + pe->addr); + return EEH_STATE_NOT_SUPPORT; + } + } + + /* Figure out state */ + switch (fstate) { + case OPAL_EEH_STOPPED_NOT_FROZEN: + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE | + EEH_STATE_MMIO_ENABLED | + EEH_STATE_DMA_ENABLED); + break; + case OPAL_EEH_STOPPED_MMIO_FREEZE: + result = (EEH_STATE_DMA_ACTIVE | + EEH_STATE_DMA_ENABLED); + break; + case OPAL_EEH_STOPPED_DMA_FREEZE: + result = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_MMIO_ENABLED); + break; + case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: + result = 0; + break; + case OPAL_EEH_STOPPED_RESET: + result = EEH_STATE_RESET_ACTIVE; + break; + case OPAL_EEH_STOPPED_TEMP_UNAVAIL: + result = EEH_STATE_UNAVAILABLE; + break; + case OPAL_EEH_STOPPED_PERM_UNAVAIL: + result = EEH_STATE_NOT_SUPPORT; + break; + default: + result = EEH_STATE_NOT_SUPPORT; + pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", + __func__, phb->hose->global_number, + pe->addr, fstate); + } + + /* + * If PHB supports compound PE, to freeze all + * slave PEs for consistency. + * + * If the PE is switching to frozen state for the + * first time, to dump the PHB diag-data. + */ + if (!(result & EEH_STATE_NOT_SUPPORT) && + !(result & EEH_STATE_UNAVAILABLE) && + !(result & EEH_STATE_MMIO_ACTIVE) && + !(result & EEH_STATE_DMA_ACTIVE) && + !(pe->state & EEH_PE_ISOLATED)) { + if (phb->freeze_pe) + phb->freeze_pe(phb, pe->addr); + + eeh_pe_state_mark(pe, EEH_PE_ISOLATED); + pnv_eeh_get_phb_diag(pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); + } + + return result; +} + /** - * powernv_eeh_get_state - Retrieve PE state + * pnv_eeh_get_state - Retrieve PE state * @pe: EEH PE * @delay: delay while PE state is temporarily unavailable * @@ -267,64 +718,279 @@ static int powernv_eeh_get_pe_addr(struct eeh_pe *pe) * we prefer passing down to hardware implementation to handle * it. */ -static int powernv_eeh_get_state(struct eeh_pe *pe, int *delay) +static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay) +{ + int ret; + + if (pe->type & EEH_PE_PHB) + ret = pnv_eeh_get_phb_state(pe); + else + ret = pnv_eeh_get_pe_state(pe); + + if (!delay) + return ret; + + /* + * If the PE state is temporarily unavailable, + * to inform the EEH core delay for default + * period (1 second) + */ + *delay = 0; + if (ret & EEH_STATE_UNAVAILABLE) + *delay = 1000; + + return ret; +} + +static s64 pnv_eeh_phb_poll(struct pnv_phb *phb) +{ + s64 rc = OPAL_HARDWARE; + + while (1) { + rc = opal_pci_poll(phb->opal_id); + if (rc <= 0) + break; + + if (system_state < SYSTEM_RUNNING) + udelay(1000 * rc); + else + msleep(rc); + } + + return rc; +} + +int pnv_eeh_phb_reset(struct pci_controller *hose, int option) { - struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; - int ret = EEH_STATE_NOT_SUPPORT; + s64 rc = OPAL_HARDWARE; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* Issue PHB complete reset request */ + if (option == EEH_RESET_FUNDAMENTAL || + option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_COMPLETE, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_COMPLETE, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; - if (phb->eeh_ops && phb->eeh_ops->get_state) { - ret = phb->eeh_ops->get_state(pe); + /* + * Poll state of the PHB until the request is done + * successfully. The PHB reset is usually PHB complete + * reset followed by hot reset on root bus. So we also + * need the PCI bus settlement delay. + */ + rc = pnv_eeh_phb_poll(phb); + if (option == EEH_RESET_DEACTIVATE) { + if (system_state < SYSTEM_RUNNING) + udelay(1000 * EEH_PE_RST_SETTLE_TIME); + else + msleep(EEH_PE_RST_SETTLE_TIME); + } +out: + if (rc != OPAL_SUCCESS) + return -EIO; - /* - * If the PE state is temporarily unavailable, - * to inform the EEH core delay for default - * period (1 second) - */ - if (delay) { - *delay = 0; - if (ret & EEH_STATE_UNAVAILABLE) - *delay = 1000; + return 0; +} + +static int pnv_eeh_root_reset(struct pci_controller *hose, int option) +{ + struct pnv_phb *phb = hose->private_data; + s64 rc = OPAL_HARDWARE; + + pr_debug("%s: Reset PHB#%x, option=%d\n", + __func__, hose->global_number, option); + + /* + * During the reset deassert time, we needn't care + * the reset scope because the firmware does nothing + * for fundamental or hot reset during deassert phase. + */ + if (option == EEH_RESET_FUNDAMENTAL) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PCI_FUNDAMENTAL, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_HOT) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PCI_HOT, + OPAL_ASSERT_RESET); + else if (option == EEH_RESET_DEACTIVATE) + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PCI_HOT, + OPAL_DEASSERT_RESET); + if (rc < 0) + goto out; + + /* Poll state of the PHB until the request is done */ + rc = pnv_eeh_phb_poll(phb); + if (option == EEH_RESET_DEACTIVATE) + msleep(EEH_PE_RST_SETTLE_TIME); +out: + if (rc != OPAL_SUCCESS) + return -EIO; + + return 0; +} + +static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) +{ + struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); + int aer = edev ? edev->aer_cap : 0; + u32 ctrl; + + pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", + __func__, pci_domain_nr(dev->bus), + dev->bus->number, option); + + switch (option) { + case EEH_RESET_FUNDAMENTAL: + case EEH_RESET_HOT: + /* Don't report linkDown event */ + if (aer) { + eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + 4, &ctrl); + ctrl |= PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + 4, ctrl); } + + eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + ctrl |= PCI_BRIDGE_CTL_BUS_RESET; + eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + + msleep(EEH_PE_RST_HOLD_TIME); + break; + case EEH_RESET_DEACTIVATE: + eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); + ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; + eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); + + msleep(EEH_PE_RST_SETTLE_TIME); + + /* Continue reporting linkDown event */ + if (aer) { + eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, + 4, &ctrl); + ctrl &= ~PCI_ERR_UNC_SURPDN; + eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, + 4, ctrl); + } + + break; } - return ret; + return 0; +} + +void pnv_pci_reset_secondary_bus(struct pci_dev *dev) +{ + struct pci_controller *hose; + + if (pci_is_root_bus(dev->bus)) { + hose = pci_bus_to_host(dev->bus); + pnv_eeh_root_reset(hose, EEH_RESET_HOT); + pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); + } else { + pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); + pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); + } } /** - * powernv_eeh_reset - Reset the specified PE + * pnv_eeh_reset - Reset the specified PE * @pe: EEH PE * @option: reset option * - * Reset the specified PE + * Do reset on the indicated PE. For PCI bus sensitive PE, + * we need to reset the parent p2p bridge. The PHB has to + * be reinitialized if the p2p bridge is root bridge. For + * PCI device sensitive PE, we will try to reset the device + * through FLR. For now, we don't have OPAL APIs to do HARD + * reset yet, so all reset would be SOFT (HOT) reset. */ -static int powernv_eeh_reset(struct eeh_pe *pe, int option) +static int pnv_eeh_reset(struct eeh_pe *pe, int option) { struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; - int ret = -EEXIST; + struct pci_bus *bus; + int ret; + + /* + * For PHB reset, we always have complete reset. For those PEs whose + * primary bus derived from root complex (root bus) or root port + * (usually bus#1), we apply hot or fundamental reset on the root port. + * For other PEs, we always have hot reset on the PE primary bus. + * + * Here, we have different design to pHyp, which always clear the + * frozen state during PE reset. However, the good idea here from + * benh is to keep frozen state before we get PE reset done completely + * (until BAR restore). With the frozen state, HW drops illegal IO + * or MMIO access, which can incur recrusive frozen PE during PE + * reset. The side effect is that EEH core has to clear the frozen + * state explicitly after BAR restore. + */ + if (pe->type & EEH_PE_PHB) { + ret = pnv_eeh_phb_reset(hose, option); + } else { + struct pnv_phb *phb; + s64 rc; - if (phb->eeh_ops && phb->eeh_ops->reset) - ret = phb->eeh_ops->reset(pe, option); + /* + * The frozen PE might be caused by PAPR error injection + * registers, which are expected to be cleared after hitting + * frozen PE as stated in the hardware spec. Unfortunately, + * that's not true on P7IOC. So we have to clear it manually + * to avoid recursive EEH errors during recovery. + */ + phb = hose->private_data; + if (phb->model == PNV_PHB_MODEL_P7IOC && + (option == EEH_RESET_HOT || + option == EEH_RESET_FUNDAMENTAL)) { + rc = opal_pci_reset(phb->opal_id, + OPAL_RESET_PHB_ERROR, + OPAL_ASSERT_RESET); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld clearing " + "error injection registers\n", + __func__, rc); + return -EIO; + } + } + + bus = eeh_pe_bus_get(pe); + if (pci_is_root_bus(bus) || + pci_is_root_bus(bus->parent)) + ret = pnv_eeh_root_reset(hose, option); + else + ret = pnv_eeh_bridge_reset(bus->self, option); + } return ret; } /** - * powernv_eeh_wait_state - Wait for PE state + * pnv_eeh_wait_state - Wait for PE state * @pe: EEH PE * @max_wait: maximal period in microsecond * * Wait for the state of associated PE. It might take some time * to retrieve the PE's state. */ -static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) +static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait) { int ret; int mwait; while (1) { - ret = powernv_eeh_get_state(pe, &mwait); + ret = pnv_eeh_get_state(pe, &mwait); /* * If the PE's state is temporarily unavailable, @@ -348,7 +1014,7 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) } /** - * powernv_eeh_get_log - Retrieve error log + * pnv_eeh_get_log - Retrieve error log * @pe: EEH PE * @severity: temporary or permanent error log * @drv_log: driver log to be combined with retrieved error log @@ -356,41 +1022,30 @@ static int powernv_eeh_wait_state(struct eeh_pe *pe, int max_wait) * * Retrieve the temporary or permanent error from the PE. */ -static int powernv_eeh_get_log(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len) +static int pnv_eeh_get_log(struct eeh_pe *pe, int severity, + char *drv_log, unsigned long len) { - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; - int ret = -EEXIST; + if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data(pe->phb, pe->data); - if (phb->eeh_ops && phb->eeh_ops->get_log) - ret = phb->eeh_ops->get_log(pe, severity, drv_log, len); - - return ret; + return 0; } /** - * powernv_eeh_configure_bridge - Configure PCI bridges in the indicated PE + * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE * @pe: EEH PE * * The function will be called to reconfigure the bridges included * in the specified PE so that the mulfunctional PE would be recovered * again. */ -static int powernv_eeh_configure_bridge(struct eeh_pe *pe) +static int pnv_eeh_configure_bridge(struct eeh_pe *pe) { - struct pci_controller *hose = pe->phb; - struct pnv_phb *phb = hose->private_data; - int ret = 0; - - if (phb->eeh_ops && phb->eeh_ops->configure_bridge) - ret = phb->eeh_ops->configure_bridge(pe); - - return ret; + return 0; } /** - * powernv_pe_err_inject - Inject specified error to the indicated PE + * pnv_pe_err_inject - Inject specified error to the indicated PE * @pe: the indicated PE * @type: error type * @func: specific error type @@ -401,22 +1056,52 @@ static int powernv_eeh_configure_bridge(struct eeh_pe *pe) * determined by @type and @func, to the indicated PE for * testing purpose. */ -static int powernv_eeh_err_inject(struct eeh_pe *pe, int type, int func, - unsigned long addr, unsigned long mask) +static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func, + unsigned long addr, unsigned long mask) { struct pci_controller *hose = pe->phb; struct pnv_phb *phb = hose->private_data; - int ret = -EEXIST; + s64 rc; + + /* Sanity check on error type */ + if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && + type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { + pr_warn("%s: Invalid error type %d\n", + __func__, type); + return -ERANGE; + } - if (phb->eeh_ops && phb->eeh_ops->err_inject) - ret = phb->eeh_ops->err_inject(pe, type, func, addr, mask); + if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || + func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { + pr_warn("%s: Invalid error function %d\n", + __func__, func); + return -ERANGE; + } - return ret; + /* Firmware supports error injection ? */ + if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { + pr_warn("%s: Firmware doesn't support error injection\n", + __func__); + return -ENXIO; + } + + /* Do error injection */ + rc = opal_pci_err_inject(phb->opal_id, pe->addr, + type, func, addr, mask); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failure %lld injecting error " + "%d-%d to PHB#%x-PE#%x\n", + __func__, rc, type, func, + hose->global_number, pe->addr); + return -EIO; + } + + return 0; } -static inline bool powernv_eeh_cfg_blocked(struct device_node *dn) +static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) { - struct eeh_dev *edev = of_node_to_eeh_dev(dn); + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); if (!edev || !edev->pe) return false; @@ -427,51 +1112,377 @@ static inline bool powernv_eeh_cfg_blocked(struct device_node *dn) return false; } -static int powernv_eeh_read_config(struct device_node *dn, - int where, int size, u32 *val) +static int pnv_eeh_read_config(struct pci_dn *pdn, + int where, int size, u32 *val) { - if (powernv_eeh_cfg_blocked(dn)) { + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (pnv_eeh_cfg_blocked(pdn)) { *val = 0xFFFFFFFF; return PCIBIOS_SET_FAILED; } - return pnv_pci_cfg_read(dn, where, size, val); + return pnv_pci_cfg_read(pdn, where, size, val); } -static int powernv_eeh_write_config(struct device_node *dn, - int where, int size, u32 val) +static int pnv_eeh_write_config(struct pci_dn *pdn, + int where, int size, u32 val) { - if (powernv_eeh_cfg_blocked(dn)) + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (pnv_eeh_cfg_blocked(pdn)) return PCIBIOS_SET_FAILED; - return pnv_pci_cfg_write(dn, where, size, val); + return pnv_pci_cfg_write(pdn, where, size, val); +} + +static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data) +{ + /* GEM */ + if (data->gemXfir || data->gemRfir || + data->gemRirqfir || data->gemMask || data->gemRwof) + pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->gemXfir), + be64_to_cpu(data->gemRfir), + be64_to_cpu(data->gemRirqfir), + be64_to_cpu(data->gemMask), + be64_to_cpu(data->gemRwof)); + + /* LEM */ + if (data->lemFir || data->lemErrMask || + data->lemAction0 || data->lemAction1 || data->lemWof) + pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->lemFir), + be64_to_cpu(data->lemErrMask), + be64_to_cpu(data->lemAction0), + be64_to_cpu(data->lemAction1), + be64_to_cpu(data->lemWof)); +} + +static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose) +{ + struct pnv_phb *phb = hose->private_data; + struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag; + long rc; + + rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); + if (rc != OPAL_SUCCESS) { + pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", + __func__, phb->hub_id, rc); + return; + } + + switch (data->type) { + case OPAL_P7IOC_DIAG_TYPE_RGC: + pr_info("P7IOC diag-data for RGC\n\n"); + pnv_eeh_dump_hub_diag_common(data); + if (data->rgc.rgcStatus || data->rgc.rgcLdcp) + pr_info(" RGC: %016llx %016llx\n", + be64_to_cpu(data->rgc.rgcStatus), + be64_to_cpu(data->rgc.rgcLdcp)); + break; + case OPAL_P7IOC_DIAG_TYPE_BI: + pr_info("P7IOC diag-data for BI %s\n\n", + data->bi.biDownbound ? "Downbound" : "Upbound"); + pnv_eeh_dump_hub_diag_common(data); + if (data->bi.biLdcp0 || data->bi.biLdcp1 || + data->bi.biLdcp2 || data->bi.biFenceStatus) + pr_info(" BI: %016llx %016llx %016llx %016llx\n", + be64_to_cpu(data->bi.biLdcp0), + be64_to_cpu(data->bi.biLdcp1), + be64_to_cpu(data->bi.biLdcp2), + be64_to_cpu(data->bi.biFenceStatus)); + break; + case OPAL_P7IOC_DIAG_TYPE_CI: + pr_info("P7IOC diag-data for CI Port %d\n\n", + data->ci.ciPort); + pnv_eeh_dump_hub_diag_common(data); + if (data->ci.ciPortStatus || data->ci.ciPortLdcp) + pr_info(" CI: %016llx %016llx\n", + be64_to_cpu(data->ci.ciPortStatus), + be64_to_cpu(data->ci.ciPortLdcp)); + break; + case OPAL_P7IOC_DIAG_TYPE_MISC: + pr_info("P7IOC diag-data for MISC\n\n"); + pnv_eeh_dump_hub_diag_common(data); + break; + case OPAL_P7IOC_DIAG_TYPE_I2C: + pr_info("P7IOC diag-data for I2C\n\n"); + pnv_eeh_dump_hub_diag_common(data); + break; + default: + pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", + __func__, phb->hub_id, data->type); + } +} + +static int pnv_eeh_get_pe(struct pci_controller *hose, + u16 pe_no, struct eeh_pe **pe) +{ + struct pnv_phb *phb = hose->private_data; + struct pnv_ioda_pe *pnv_pe; + struct eeh_pe *dev_pe; + struct eeh_dev edev; + + /* + * If PHB supports compound PE, to fetch + * the master PE because slave PE is invisible + * to EEH core. + */ + pnv_pe = &phb->ioda.pe_array[pe_no]; + if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { + pnv_pe = pnv_pe->master; + WARN_ON(!pnv_pe || + !(pnv_pe->flags & PNV_IODA_PE_MASTER)); + pe_no = pnv_pe->pe_number; + } + + /* Find the PE according to PE# */ + memset(&edev, 0, sizeof(struct eeh_dev)); + edev.phb = hose; + edev.pe_config_addr = pe_no; + dev_pe = eeh_pe_get(&edev); + if (!dev_pe) + return -EEXIST; + + /* Freeze the (compound) PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, pe_no); + + /* + * At this point, we're sure the (compound) PE should + * have been frozen. However, we still need poke until + * hitting the frozen PE on top level. + */ + dev_pe = dev_pe->parent; + while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { + int ret; + int active_flags = (EEH_STATE_MMIO_ACTIVE | + EEH_STATE_DMA_ACTIVE); + + ret = eeh_ops->get_state(dev_pe, NULL); + if (ret <= 0 || (ret & active_flags) == active_flags) { + dev_pe = dev_pe->parent; + continue; + } + + /* Frozen parent PE */ + *pe = dev_pe; + if (!(dev_pe->state & EEH_PE_ISOLATED)) + phb->freeze_pe(phb, dev_pe->addr); + + /* Next one */ + dev_pe = dev_pe->parent; + } + + return 0; } /** - * powernv_eeh_next_error - Retrieve next EEH error to handle + * pnv_eeh_next_error - Retrieve next EEH error to handle * @pe: Affected PE * - * Using OPAL API, to retrieve next EEH error for EEH core to handle + * The function is expected to be called by EEH core while it gets + * special EEH event (without binding PE). The function calls to + * OPAL APIs for next error to handle. The informational error is + * handled internally by platform. However, the dead IOC, dead PHB, + * fenced PHB and frozen PE should be handled by EEH core eventually. */ -static int powernv_eeh_next_error(struct eeh_pe **pe) +static int pnv_eeh_next_error(struct eeh_pe **pe) { struct pci_controller *hose; - struct pnv_phb *phb = NULL; + struct pnv_phb *phb; + struct eeh_pe *phb_pe, *parent_pe; + __be64 frozen_pe_no; + __be16 err_type, severity; + int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); + long rc; + int state, ret = EEH_NEXT_ERR_NONE; + + /* + * While running here, it's safe to purge the event queue. + * And we should keep the cached OPAL notifier event sychronized + * between the kernel and firmware. + */ + eeh_remove_event(NULL, false); + opal_notifier_update_evt(OPAL_EVENT_PCI_ERROR, 0x0ul); list_for_each_entry(hose, &hose_list, list_node) { + /* + * If the subordinate PCI buses of the PHB has been + * removed or is exactly under error recovery, we + * needn't take care of it any more. + */ phb = hose->private_data; - break; - } + phb_pe = eeh_phb_pe_get(hose); + if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED)) + continue; + + rc = opal_pci_next_error(phb->opal_id, + &frozen_pe_no, &err_type, &severity); + if (rc != OPAL_SUCCESS) { + pr_devel("%s: Invalid return value on " + "PHB#%x (0x%lx) from opal_pci_next_error", + __func__, hose->global_number, rc); + continue; + } + + /* If the PHB doesn't have error, stop processing */ + if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || + be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { + pr_devel("%s: No error found on PHB#%x\n", + __func__, hose->global_number); + continue; + } + + /* + * Processing the error. We're expecting the error with + * highest priority reported upon multiple errors on the + * specific PHB. + */ + pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", + __func__, be16_to_cpu(err_type), + be16_to_cpu(severity), be64_to_cpu(frozen_pe_no), + hose->global_number); + switch (be16_to_cpu(err_type)) { + case OPAL_EEH_IOC_ERROR: + if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { + pr_err("EEH: dead IOC detected\n"); + ret = EEH_NEXT_ERR_DEAD_IOC; + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { + pr_info("EEH: IOC informative error " + "detected\n"); + pnv_eeh_get_and_dump_hub_diag(hose); + ret = EEH_NEXT_ERR_NONE; + } + + break; + case OPAL_EEH_PHB_ERROR: + if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { + *pe = phb_pe; + pr_err("EEH: dead PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); + ret = EEH_NEXT_ERR_DEAD_PHB; + } else if (be16_to_cpu(severity) == + OPAL_EEH_SEV_PHB_FENCED) { + *pe = phb_pe; + pr_err("EEH: Fenced PHB#%x detected, " + "location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); + ret = EEH_NEXT_ERR_FENCED_PHB; + } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { + pr_info("EEH: PHB#%x informative error " + "detected, location: %s\n", + hose->global_number, + eeh_pe_loc_get(phb_pe)); + pnv_eeh_get_phb_diag(phb_pe); + pnv_pci_dump_phb_diag_data(hose, phb_pe->data); + ret = EEH_NEXT_ERR_NONE; + } + + break; + case OPAL_EEH_PE_ERROR: + /* + * If we can't find the corresponding PE, we + * just try to unfreeze. + */ + if (pnv_eeh_get_pe(hose, + be64_to_cpu(frozen_pe_no), pe)) { + /* Try best to clear it */ + pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", + hose->global_number, frozen_pe_no); + pr_info("EEH: PHB location: %s\n", + eeh_pe_loc_get(phb_pe)); + opal_pci_eeh_freeze_clear(phb->opal_id, + frozen_pe_no, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + ret = EEH_NEXT_ERR_NONE; + } else if ((*pe)->state & EEH_PE_ISOLATED || + eeh_pe_passed(*pe)) { + ret = EEH_NEXT_ERR_NONE; + } else { + pr_err("EEH: Frozen PE#%x " + "on PHB#%x detected\n", + (*pe)->addr, + (*pe)->phb->global_number); + pr_err("EEH: PE location: %s, " + "PHB location: %s\n", + eeh_pe_loc_get(*pe), + eeh_pe_loc_get(phb_pe)); + ret = EEH_NEXT_ERR_FROZEN_PE; + } + + break; + default: + pr_warn("%s: Unexpected error type %d\n", + __func__, be16_to_cpu(err_type)); + } - if (phb && phb->eeh_ops->next_error) - return phb->eeh_ops->next_error(pe); + /* + * EEH core will try recover from fenced PHB or + * frozen PE. In the time for frozen PE, EEH core + * enable IO path for that before collecting logs, + * but it ruins the site. So we have to dump the + * log in advance here. + */ + if ((ret == EEH_NEXT_ERR_FROZEN_PE || + ret == EEH_NEXT_ERR_FENCED_PHB) && + !((*pe)->state & EEH_PE_ISOLATED)) { + eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + pnv_eeh_get_phb_diag(*pe); + + if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) + pnv_pci_dump_phb_diag_data((*pe)->phb, + (*pe)->data); + } - return -EEXIST; + /* + * We probably have the frozen parent PE out there and + * we need have to handle frozen parent PE firstly. + */ + if (ret == EEH_NEXT_ERR_FROZEN_PE) { + parent_pe = (*pe)->parent; + while (parent_pe) { + /* Hit the ceiling ? */ + if (parent_pe->type & EEH_PE_PHB) + break; + + /* Frozen parent PE ? */ + state = eeh_ops->get_state(parent_pe, NULL); + if (state > 0 && + (state & active_flags) != active_flags) + *pe = parent_pe; + + /* Next parent level */ + parent_pe = parent_pe->parent; + } + + /* We possibly migrate to another PE */ + eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); + } + + /* + * If we have no errors on the specific PHB or only + * informative error there, we continue poking it. + * Otherwise, we need actions to be taken by upper + * layer. + */ + if (ret > EEH_NEXT_ERR_INF) + break; + } + + return ret; } -static int powernv_eeh_restore_config(struct device_node *dn) +static int pnv_eeh_restore_config(struct pci_dn *pdn) { - struct eeh_dev *edev = of_node_to_eeh_dev(dn); + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); struct pnv_phb *phb; s64 ret; @@ -490,24 +1501,23 @@ static int powernv_eeh_restore_config(struct device_node *dn) return 0; } -static struct eeh_ops powernv_eeh_ops = { +static struct eeh_ops pnv_eeh_ops = { .name = "powernv", - .init = powernv_eeh_init, - .post_init = powernv_eeh_post_init, - .of_probe = NULL, - .dev_probe = powernv_eeh_dev_probe, - .set_option = powernv_eeh_set_option, - .get_pe_addr = powernv_eeh_get_pe_addr, - .get_state = powernv_eeh_get_state, - .reset = powernv_eeh_reset, - .wait_state = powernv_eeh_wait_state, - .get_log = powernv_eeh_get_log, - .configure_bridge = powernv_eeh_configure_bridge, - .err_inject = powernv_eeh_err_inject, - .read_config = powernv_eeh_read_config, - .write_config = powernv_eeh_write_config, - .next_error = powernv_eeh_next_error, - .restore_config = powernv_eeh_restore_config + .init = pnv_eeh_init, + .post_init = pnv_eeh_post_init, + .probe = pnv_eeh_probe, + .set_option = pnv_eeh_set_option, + .get_pe_addr = pnv_eeh_get_pe_addr, + .get_state = pnv_eeh_get_state, + .reset = pnv_eeh_reset, + .wait_state = pnv_eeh_wait_state, + .get_log = pnv_eeh_get_log, + .configure_bridge = pnv_eeh_configure_bridge, + .err_inject = pnv_eeh_err_inject, + .read_config = pnv_eeh_read_config, + .write_config = pnv_eeh_write_config, + .next_error = pnv_eeh_next_error, + .restore_config = pnv_eeh_restore_config }; /** @@ -521,7 +1531,7 @@ static int __init eeh_powernv_init(void) int ret = -EINVAL; eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE); - ret = eeh_ops_register(&powernv_eeh_ops); + ret = eeh_ops_register(&pnv_eeh_ops); if (!ret) pr_info("EEH: PowerNV platform initialized\n"); else diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 6c9ff2b95119..76b344125cef 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1777,7 +1777,8 @@ static void pnv_ioda_setup_pe_seg(struct pci_controller *hose, region.start += phb->ioda.io_segsize; index++; } - } else if (res->flags & IORESOURCE_MEM) { + } else if ((res->flags & IORESOURCE_MEM) && + !pnv_pci_is_mem_pref_64(res->flags)) { region.start = res->start - hose->mem_offset[0] - phb->ioda.m32_pci_base; @@ -2078,9 +2079,6 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, phb->get_pe_state = pnv_ioda_get_pe_state; phb->freeze_pe = pnv_ioda_freeze_pe; phb->unfreeze_pe = pnv_ioda_unfreeze_pe; -#ifdef CONFIG_EEH - phb->eeh_ops = &ioda_eeh_ops; -#endif /* Setup RID -> PE mapping function */ phb->bdfn_to_pe = pnv_ioda_bdfn_to_pe; @@ -2121,8 +2119,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np, */ if (is_kdump_kernel()) { pr_info(" Issue PHB reset ...\n"); - ioda_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); - ioda_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); + pnv_eeh_phb_reset(hose, EEH_RESET_FUNDAMENTAL); + pnv_eeh_phb_reset(hose, EEH_RESET_DEACTIVATE); } /* Remove M64 resource if we can't configure it successfully */ diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 54323d6b5166..946aa3d62c3c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -366,9 +366,9 @@ static void pnv_pci_handle_eeh_config(struct pnv_phb *phb, u32 pe_no) spin_unlock_irqrestore(&phb->lock, flags); } -static void pnv_pci_config_check_eeh(struct pnv_phb *phb, - struct device_node *dn) +static void pnv_pci_config_check_eeh(struct pci_dn *pdn) { + struct pnv_phb *phb = pdn->phb->private_data; u8 fstate; __be16 pcierr; int pe_no; @@ -379,7 +379,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, * setup that yet. So all ER errors should be mapped to * reserved PE. */ - pe_no = PCI_DN(dn)->pe_number; + pe_no = pdn->pe_number; if (pe_no == IODA_INVALID_PE) { if (phb->type == PNV_PHB_P5IOC2) pe_no = 0; @@ -407,8 +407,7 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, } cfg_dbg(" -> EEH check, bdfn=%04x PE#%d fstate=%x\n", - (PCI_DN(dn)->busno << 8) | (PCI_DN(dn)->devfn), - pe_no, fstate); + (pdn->busno << 8) | (pdn->devfn), pe_no, fstate); /* Clear the frozen state if applicable */ if (fstate == OPAL_EEH_STOPPED_MMIO_FREEZE || @@ -425,10 +424,9 @@ static void pnv_pci_config_check_eeh(struct pnv_phb *phb, } } -int pnv_pci_cfg_read(struct device_node *dn, +int pnv_pci_cfg_read(struct pci_dn *pdn, int where, int size, u32 *val) { - struct pci_dn *pdn = PCI_DN(dn); struct pnv_phb *phb = pdn->phb->private_data; u32 bdfn = (pdn->busno << 8) | pdn->devfn; s64 rc; @@ -462,10 +460,9 @@ int pnv_pci_cfg_read(struct device_node *dn, return PCIBIOS_SUCCESSFUL; } -int pnv_pci_cfg_write(struct device_node *dn, +int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val) { - struct pci_dn *pdn = PCI_DN(dn); struct pnv_phb *phb = pdn->phb->private_data; u32 bdfn = (pdn->busno << 8) | pdn->devfn; @@ -489,18 +486,17 @@ int pnv_pci_cfg_write(struct device_node *dn, } #if CONFIG_EEH -static bool pnv_pci_cfg_check(struct pci_controller *hose, - struct device_node *dn) +static bool pnv_pci_cfg_check(struct pci_dn *pdn) { struct eeh_dev *edev = NULL; - struct pnv_phb *phb = hose->private_data; + struct pnv_phb *phb = pdn->phb->private_data; /* EEH not enabled ? */ if (!(phb->flags & PNV_PHB_FLAG_EEH)) return true; /* PE reset or device removed ? */ - edev = of_node_to_eeh_dev(dn); + edev = pdn->edev; if (edev) { if (edev->pe && (edev->pe->state & EEH_PE_CFG_BLOCKED)) @@ -513,8 +509,7 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose, return true; } #else -static inline pnv_pci_cfg_check(struct pci_controller *hose, - struct device_node *dn) +static inline pnv_pci_cfg_check(struct pci_dn *pdn) { return true; } @@ -524,32 +519,26 @@ static int pnv_pci_read_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) { - struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); struct pci_dn *pdn; struct pnv_phb *phb; - bool found = false; int ret; *val = 0xFFFFFFFF; - for (dn = busdn->child; dn; dn = dn->sibling) { - pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn) { - phb = pdn->phb->private_data; - found = true; - break; - } - } + pdn = pci_get_pdn_by_devfn(bus, devfn); + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; - if (!found || !pnv_pci_cfg_check(pdn->phb, dn)) + if (!pnv_pci_cfg_check(pdn)) return PCIBIOS_DEVICE_NOT_FOUND; - ret = pnv_pci_cfg_read(dn, where, size, val); - if (phb->flags & PNV_PHB_FLAG_EEH) { + ret = pnv_pci_cfg_read(pdn, where, size, val); + phb = pdn->phb->private_data; + if (phb->flags & PNV_PHB_FLAG_EEH && pdn->edev) { if (*val == EEH_IO_ERROR_VALUE(size) && - eeh_dev_check_failure(of_node_to_eeh_dev(dn))) + eeh_dev_check_failure(pdn->edev)) return PCIBIOS_DEVICE_NOT_FOUND; } else { - pnv_pci_config_check_eeh(phb, dn); + pnv_pci_config_check_eeh(pdn); } return ret; @@ -559,27 +548,21 @@ static int pnv_pci_write_config(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 val) { - struct device_node *dn, *busdn = pci_bus_to_OF_node(bus); struct pci_dn *pdn; struct pnv_phb *phb; - bool found = false; int ret; - for (dn = busdn->child; dn; dn = dn->sibling) { - pdn = PCI_DN(dn); - if (pdn && pdn->devfn == devfn) { - phb = pdn->phb->private_data; - found = true; - break; - } - } + pdn = pci_get_pdn_by_devfn(bus, devfn); + if (!pdn) + return PCIBIOS_DEVICE_NOT_FOUND; - if (!found || !pnv_pci_cfg_check(pdn->phb, dn)) + if (!pnv_pci_cfg_check(pdn)) return PCIBIOS_DEVICE_NOT_FOUND; - ret = pnv_pci_cfg_write(dn, where, size, val); + ret = pnv_pci_cfg_write(pdn, where, size, val); + phb = pdn->phb->private_data; if (!(phb->flags & PNV_PHB_FLAG_EEH)) - pnv_pci_config_check_eeh(phb, dn); + pnv_pci_config_check_eeh(pdn); return ret; } diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index 6c02ff8dd69f..1f0cb66133a1 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -75,22 +75,6 @@ struct pnv_ioda_pe { struct list_head list; }; -/* IOC dependent EEH operations */ -#ifdef CONFIG_EEH -struct pnv_eeh_ops { - int (*post_init)(struct pci_controller *hose); - int (*set_option)(struct eeh_pe *pe, int option); - int (*get_state)(struct eeh_pe *pe); - int (*reset)(struct eeh_pe *pe, int option); - int (*get_log)(struct eeh_pe *pe, int severity, - char *drv_log, unsigned long len); - int (*configure_bridge)(struct eeh_pe *pe); - int (*err_inject)(struct eeh_pe *pe, int type, int func, - unsigned long addr, unsigned long mask); - int (*next_error)(struct eeh_pe **pe); -}; -#endif /* CONFIG_EEH */ - #define PNV_PHB_FLAG_EEH (1 << 0) struct pnv_phb { @@ -104,10 +88,6 @@ struct pnv_phb { int initialized; spinlock_t lock; -#ifdef CONFIG_EEH - struct pnv_eeh_ops *eeh_ops; -#endif - #ifdef CONFIG_DEBUG_FS int has_dbgfs; struct dentry *dbgfs; @@ -213,15 +193,12 @@ struct pnv_phb { }; extern struct pci_ops pnv_pci_ops; -#ifdef CONFIG_EEH -extern struct pnv_eeh_ops ioda_eeh_ops; -#endif void pnv_pci_dump_phb_diag_data(struct pci_controller *hose, unsigned char *log_buff); -int pnv_pci_cfg_read(struct device_node *dn, +int pnv_pci_cfg_read(struct pci_dn *pdn, int where, int size, u32 *val); -int pnv_pci_cfg_write(struct device_node *dn, +int pnv_pci_cfg_write(struct pci_dn *pdn, int where, int size, u32 val); extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl, void *tce_mem, u64 tce_size, @@ -232,6 +209,6 @@ extern void pnv_pci_init_ioda2_phb(struct device_node *np); extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl, __be64 *startp, __be64 *endp, bool rm); extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev); -extern int ioda_eeh_phb_reset(struct pci_controller *hose, int option); +extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option); #endif /* __POWERNV_PCI_H */ diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c index a6c7e19f5eb3..2039397cc75d 100644 --- a/arch/powerpc/platforms/pseries/eeh_pseries.c +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c @@ -118,9 +118,8 @@ static int pseries_eeh_init(void) return 0; } -static int pseries_eeh_cap_start(struct device_node *dn) +static int pseries_eeh_cap_start(struct pci_dn *pdn) { - struct pci_dn *pdn = PCI_DN(dn); u32 status; if (!pdn) @@ -134,10 +133,9 @@ static int pseries_eeh_cap_start(struct device_node *dn) } -static int pseries_eeh_find_cap(struct device_node *dn, int cap) +static int pseries_eeh_find_cap(struct pci_dn *pdn, int cap) { - struct pci_dn *pdn = PCI_DN(dn); - int pos = pseries_eeh_cap_start(dn); + int pos = pseries_eeh_cap_start(pdn); int cnt = 48; /* Maximal number of capabilities */ u32 id; @@ -160,10 +158,9 @@ static int pseries_eeh_find_cap(struct device_node *dn, int cap) return 0; } -static int pseries_eeh_find_ecap(struct device_node *dn, int cap) +static int pseries_eeh_find_ecap(struct pci_dn *pdn, int cap) { - struct pci_dn *pdn = PCI_DN(dn); - struct eeh_dev *edev = of_node_to_eeh_dev(dn); + struct eeh_dev *edev = pdn_to_eeh_dev(pdn); u32 header; int pos = 256; int ttl = (4096 - 256) / 8; @@ -191,53 +188,44 @@ static int pseries_eeh_find_ecap(struct device_node *dn, int cap) } /** - * pseries_eeh_of_probe - EEH probe on the given device - * @dn: OF node - * @flag: Unused + * pseries_eeh_probe - EEH probe on the given device + * @pdn: PCI device node + * @data: Unused * * When EEH module is installed during system boot, all PCI devices * are checked one by one to see if it supports EEH. The function * is introduced for the purpose. */ -static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) +static void *pseries_eeh_probe(struct pci_dn *pdn, void *data) { struct eeh_dev *edev; struct eeh_pe pe; - struct pci_dn *pdn = PCI_DN(dn); - const __be32 *classp, *vendorp, *devicep; - u32 class_code; - const __be32 *regs; u32 pcie_flags; int enable = 0; int ret; /* Retrieve OF node and eeh device */ - edev = of_node_to_eeh_dev(dn); - if (edev->pe || !of_device_is_available(dn)) + edev = pdn_to_eeh_dev(pdn); + if (!edev || edev->pe) return NULL; - /* Retrieve class/vendor/device IDs */ - classp = of_get_property(dn, "class-code", NULL); - vendorp = of_get_property(dn, "vendor-id", NULL); - devicep = of_get_property(dn, "device-id", NULL); - - /* Skip for bad OF node or PCI-ISA bridge */ - if (!classp || !vendorp || !devicep) - return NULL; - if (dn->type && !strcmp(dn->type, "isa")) + /* Check class/vendor/device IDs */ + if (!pdn->vendor_id || !pdn->device_id || !pdn->class_code) return NULL; - class_code = of_read_number(classp, 1); + /* Skip for PCI-ISA bridge */ + if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) + return NULL; /* * Update class code and mode of eeh device. We need * correctly reflects that current device is root port * or PCIe switch downstream port. */ - edev->class_code = class_code; - edev->pcix_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_PCIX); - edev->pcie_cap = pseries_eeh_find_cap(dn, PCI_CAP_ID_EXP); - edev->aer_cap = pseries_eeh_find_ecap(dn, PCI_EXT_CAP_ID_ERR); + edev->class_code = pdn->class_code; + edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); + edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP); + edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); edev->mode &= 0xFFFFFF00; if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { edev->mode |= EEH_DEV_BRIDGE; @@ -252,24 +240,16 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) } } - /* Retrieve the device address */ - regs = of_get_property(dn, "reg", NULL); - if (!regs) { - pr_warn("%s: OF node property %s::reg not found\n", - __func__, dn->full_name); - return NULL; - } - /* Initialize the fake PE */ memset(&pe, 0, sizeof(struct eeh_pe)); pe.phb = edev->phb; - pe.config_addr = of_read_number(regs, 1); + pe.config_addr = (pdn->busno << 16) | (pdn->devfn << 8); /* Enable EEH on the device */ ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); if (!ret) { - edev->config_addr = of_read_number(regs, 1); /* Retrieve PE address */ + edev->config_addr = (pdn->busno << 16) | (pdn->devfn << 8); edev->pe_config_addr = eeh_ops->get_pe_addr(&pe); pe.addr = edev->pe_config_addr; @@ -285,16 +265,17 @@ static void *pseries_eeh_of_probe(struct device_node *dn, void *flag) eeh_add_flag(EEH_ENABLED); eeh_add_to_parent_pe(edev); - pr_debug("%s: EEH enabled on %s PHB#%d-PE#%x, config addr#%x\n", - __func__, dn->full_name, pe.phb->global_number, - pe.addr, pe.config_addr); - } else if (dn->parent && of_node_to_eeh_dev(dn->parent) && - (of_node_to_eeh_dev(dn->parent))->pe) { + pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%d-PE#%x\n", + __func__, pdn->busno, PCI_SLOT(pdn->devfn), + PCI_FUNC(pdn->devfn), pe.phb->global_number, + pe.addr); + } else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) && + (pdn_to_eeh_dev(pdn->parent))->pe) { /* This device doesn't support EEH, but it may have an * EEH parent, in which case we mark it as supported. */ - edev->config_addr = of_node_to_eeh_dev(dn->parent)->config_addr; - edev->pe_config_addr = of_node_to_eeh_dev(dn->parent)->pe_config_addr; + edev->config_addr = pdn_to_eeh_dev(pdn->parent)->config_addr; + edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr; eeh_add_to_parent_pe(edev); } } @@ -670,45 +651,36 @@ static int pseries_eeh_configure_bridge(struct eeh_pe *pe) /** * pseries_eeh_read_config - Read PCI config space - * @dn: device node + * @pdn: PCI device node * @where: PCI address * @size: size to read * @val: return value * * Read config space from the speicifed device */ -static int pseries_eeh_read_config(struct device_node *dn, int where, int size, u32 *val) +static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val) { - struct pci_dn *pdn; - - pdn = PCI_DN(dn); - return rtas_read_config(pdn, where, size, val); } /** * pseries_eeh_write_config - Write PCI config space - * @dn: device node + * @pdn: PCI device node * @where: PCI address * @size: size to write * @val: value to be written * * Write config space to the specified device */ -static int pseries_eeh_write_config(struct device_node *dn, int where, int size, u32 val) +static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val) { - struct pci_dn *pdn; - - pdn = PCI_DN(dn); - return rtas_write_config(pdn, where, size, val); } static struct eeh_ops pseries_eeh_ops = { .name = "pseries", .init = pseries_eeh_init, - .of_probe = pseries_eeh_of_probe, - .dev_probe = NULL, + .probe = pseries_eeh_probe, .set_option = pseries_eeh_set_option, .get_pe_addr = pseries_eeh_get_pe_addr, .get_state = pseries_eeh_get_state, diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 691a154c286d..c8d24f9a6948 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -195,6 +195,7 @@ static struct device_node *find_pe_total_msi(struct pci_dev *dev, int *total) static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) { struct device_node *dn; + struct pci_dn *pdn; struct eeh_dev *edev; /* Found our PE and assume 8 at that point. */ @@ -204,10 +205,11 @@ static struct device_node *find_pe_dn(struct pci_dev *dev, int *total) return NULL; /* Get the top level device in the PE */ - edev = of_node_to_eeh_dev(dn); + edev = pdn_to_eeh_dev(PCI_DN(dn)); if (edev->pe) edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list); - dn = eeh_dev_to_of_node(edev); + pdn = eeh_dev_to_pdn(edev); + dn = pdn ? pdn->node : NULL; if (!dn) return NULL; diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c index 89e23811199c..f735f4fee48c 100644 --- a/arch/powerpc/platforms/pseries/pci_dlpar.c +++ b/arch/powerpc/platforms/pseries/pci_dlpar.c @@ -82,7 +82,7 @@ struct pci_controller *init_phb_dynamic(struct device_node *dn) eeh_dev_phb_init_dynamic(phb); if (dn->child) - eeh_add_device_tree_early(dn); + eeh_add_device_tree_early(PCI_DN(dn)); pcibios_scan_phb(phb); pcibios_finish_adding_to_bus(phb->bus); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e445b6701f50..70304070a260 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -265,7 +265,7 @@ static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long act update_dn_pci_info(np, pci->phb); /* Create EEH device for the OF node */ - eeh_dev_init(np, pci->phb); + eeh_dev_init(PCI_DN(np), pci->phb); } break; default: |