diff options
author | Wei Hu <weh@microsoft.com> | 2020-07-27 15:17:31 +0800 |
---|---|---|
committer | Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> | 2020-07-27 12:10:16 +0100 |
commit | d6af2ed29c7c1c311b96dac989dcb991e90ee195 (patch) | |
tree | 2d2bd2aa86b746c90a30bcae85c6622f93c6d5a9 | |
parent | b3a9e3b9622ae10064826dccb4f7a52bd88c7407 (diff) | |
download | linux-stable-d6af2ed29c7c1c311b96dac989dcb991e90ee195.tar.gz linux-stable-d6af2ed29c7c1c311b96dac989dcb991e90ee195.tar.bz2 linux-stable-d6af2ed29c7c1c311b96dac989dcb991e90ee195.zip |
PCI: hv: Fix a timing issue which causes kdump to fail occasionally
Kdump could fail sometime on Hyper-V guest because the retry in
hv_pci_enter_d0() releases child device structures in hv_pci_bus_exit().
Although there is a second asynchronous device relations message sending
from the host, if this message arrives to the guest after
hv_send_resource_allocated() is called, the retry would fail.
Fix the problem by moving retry to hv_pci_probe() and start the retry
from hv_pci_query_relations() call. This will cause a device relations
message to arrive to the guest synchronously; the guest would then be
able to rebuild the child device structures before calling
hv_send_resource_allocated().
Link: https://lore.kernel.org/r/20200727071731.18516-1-weh@microsoft.com
Fixes: c81992e7f4aa ("PCI: hv: Retry PCI bus D0 entry on invalid device state")
Signed-off-by: Wei Hu <weh@microsoft.com>
[lorenzo.pieralisi@arm.com: fixed a comment and commit log]
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
-rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 71 |
1 files changed, 37 insertions, 34 deletions
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index bf40ff09c99d..d0033ff6c143 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -2759,10 +2759,8 @@ static int hv_pci_enter_d0(struct hv_device *hdev) struct pci_bus_d0_entry *d0_entry; struct hv_pci_compl comp_pkt; struct pci_packet *pkt; - bool retry = true; int ret; -enter_d0_retry: /* * Tell the host that the bus is ready to use, and moved into the * powered-on state. This includes telling the host which region @@ -2789,38 +2787,6 @@ enter_d0_retry: if (ret) goto exit; - /* - * In certain case (Kdump) the pci device of interest was - * not cleanly shut down and resource is still held on host - * side, the host could return invalid device status. - * We need to explicitly request host to release the resource - * and try to enter D0 again. - */ - if (comp_pkt.completion_status < 0 && retry) { - retry = false; - - dev_err(&hdev->device, "Retrying D0 Entry\n"); - - /* - * Hv_pci_bus_exit() calls hv_send_resource_released() - * to free up resources of its child devices. - * In the kdump kernel we need to set the - * wslot_res_allocated to 255 so it scans all child - * devices to release resources allocated in the - * normal kernel before panic happened. - */ - hbus->wslot_res_allocated = 255; - - ret = hv_pci_bus_exit(hdev, true); - - if (ret == 0) { - kfree(pkt); - goto enter_d0_retry; - } - dev_err(&hdev->device, - "Retrying D0 failed with ret %d\n", ret); - } - if (comp_pkt.completion_status < 0) { dev_err(&hdev->device, "PCI Pass-through VSP failed D0 Entry with status %x\n", @@ -3058,6 +3024,7 @@ static int hv_pci_probe(struct hv_device *hdev, struct hv_pcibus_device *hbus; u16 dom_req, dom; char *name; + bool enter_d0_retry = true; int ret; /* @@ -3178,11 +3145,47 @@ static int hv_pci_probe(struct hv_device *hdev, if (ret) goto free_fwnode; +retry: ret = hv_pci_query_relations(hdev); if (ret) goto free_irq_domain; ret = hv_pci_enter_d0(hdev); + /* + * In certain case (Kdump) the pci device of interest was + * not cleanly shut down and resource is still held on host + * side, the host could return invalid device status. + * We need to explicitly request host to release the resource + * and try to enter D0 again. + * Since the hv_pci_bus_exit() call releases structures + * of all its child devices, we need to start the retry from + * hv_pci_query_relations() call, requesting host to send + * the synchronous child device relations message before this + * information is needed in hv_send_resources_allocated() + * call later. + */ + if (ret == -EPROTO && enter_d0_retry) { + enter_d0_retry = false; + + dev_err(&hdev->device, "Retrying D0 Entry\n"); + + /* + * Hv_pci_bus_exit() calls hv_send_resources_released() + * to free up resources of its child devices. + * In the kdump kernel we need to set the + * wslot_res_allocated to 255 so it scans all child + * devices to release resources allocated in the + * normal kernel before panic happened. + */ + hbus->wslot_res_allocated = 255; + ret = hv_pci_bus_exit(hdev, true); + + if (ret == 0) + goto retry; + + dev_err(&hdev->device, + "Retrying D0 failed with ret %d\n", ret); + } if (ret) goto free_irq_domain; |