summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-09-30 12:38:59 +1000
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-10-30 09:43:15 -0700
commit11b477dd03bc4a0194813813744ee6f0cdcb8f79 (patch)
treeb009a7c381fba7995f92cc08e0076c93124def85
parentef409eff9c2021f233d7ac108ffb5257d69df8eb (diff)
downloadlinux-stable-11b477dd03bc4a0194813813744ee6f0cdcb8f79.tar.gz
linux-stable-11b477dd03bc4a0194813813744ee6f0cdcb8f79.tar.bz2
linux-stable-11b477dd03bc4a0194813813744ee6f0cdcb8f79.zip
powerpc/eeh: Clear frozen device state in time
commit 22fca17924094113fe79c1db5135290e1a84ad4b upstream. The problem was reported by Carol: In the scenario of passing mlx4 adapter to guest, EEH error could be recovered successfully. When returning the device back to host, the driver (mlx4_core.ko) couldn't be loaded successfully because of error number -5 (-EIO) returned from mlx4_get_ownership(), which hits offlined PCI device. The root cause is that we missed to put the affected devices into normal state on clearing PE isolated state right after PE reset. The patch fixes above issue by putting the affected devices to normal state when clearing PE isolated state in eeh_pe_state_clear(). Reported-by: Carol L. Soto <clsoto@us.ibm.com> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--arch/powerpc/kernel/eeh_pe.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 00e3844525a6..eef08f0bca73 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
{
struct eeh_pe *pe = (struct eeh_pe *)data;
int state = *((int *)flag);
+ struct eeh_dev *edev, *tmp;
+ struct pci_dev *pdev;
/* Keep the state of permanently removed PE intact */
if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
@@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
pe->state &= ~state;
- /* Clear check count since last isolation */
- if (state & EEH_PE_ISOLATED)
- pe->check_count = 0;
+ /*
+ * Special treatment on clearing isolated state. Clear
+ * check count since last isolation and put all affected
+ * devices to normal state.
+ */
+ if (!(state & EEH_PE_ISOLATED))
+ return NULL;
+
+ pe->check_count = 0;
+ eeh_pe_for_each_dev(pe, edev, tmp) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (!pdev)
+ continue;
+
+ pdev->error_state = pci_channel_io_normal;
+ }
return NULL;
}