From 917e3e65c35459d52f0d0b890aa5df0cad07a051 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 22 Jul 2011 12:18:43 -0400 Subject: xen-pcifront: Update warning comment to use 'e820_host' option. With Xen changeset 23428 "libxl: Add 'e820_host' option to config file" the E820 as seen from the host can now be passed into the guest. This means that a PV guest can now: - Use the correct PCI I/O gap. Before these patches, Linux guest would boot up and would tell: [ 0.000000] Allocating PCI resources starting at 40000000 (gap: 40000000:c0000000) while in actuality the PCI I/O gap should have been: [ 0.000000] Allocating PCI resources starting at b0000000 (gap: b0000000:4c000000) - The PV domain with PCI devices was limited to 3GB. It now can be booted with 4GB, 8GB, or whatever number you want. The PCI devices will now _not_ conflict with System RAM. Meaning the drivers can load. CC: Jesse Barnes CC: linux-pci@vger.kernel.org CC: stable@kernel.org [v2: Made the string less broken up. Suggested by Joe Perches] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/pci/xen-pcifront.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index 492b7d807fe8..d4e7a1052259 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -400,9 +400,8 @@ static int pcifront_claim_resource(struct pci_dev *dev, void *data) dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n", pci_name(dev), i); if (pci_claim_resource(dev, i)) { - dev_err(&pdev->xdev->dev, "Could not claim " - "resource %s/%d! Device offline. Try " - "giving less than 4GB to domain.\n", + dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! " + "Device offline. Try using e820_host=1 in the guest config.\n", pci_name(dev), i); } } -- cgit v1.2.3 From f4b2f07b2ed9b469ead87e06fc2fc3d12663a725 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 22 Jul 2011 12:46:43 -0400 Subject: xen-swiotlb: Retry up three times to allocate Xen-SWIOTLB We can fail seting up Xen-SWIOTLB if: - The host does not have enough contiguous DMA32 memory available (can happen on a machine that has fragmented memory from starting, stopping many guests). - Not enough low memory (almost never happens). We retry allocating and exchanging the swath of contiguous memory up to three times. Each time we decrease the amount we need - the minimum being of 2MB. If we compleltly fail, we will print the reason for failure on the Xen console on top of doing it to earlyprintk=xen console. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 6e8c15a23201..d45cbacaf6ff 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -38,6 +38,7 @@ #include #include #include +#include /* * Used to do a quick range check in swiotlb_tbl_unmap_single and * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this @@ -146,8 +147,10 @@ xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs) void __init xen_swiotlb_init(int verbose) { unsigned long bytes; - int rc; + int rc = -ENOMEM; unsigned long nr_tbl; + char *m = NULL; + unsigned int repeat = 3; nr_tbl = swioltb_nr_tbl(); if (nr_tbl) @@ -156,16 +159,17 @@ void __init xen_swiotlb_init(int verbose) xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT); xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE); } - +retry: bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT; /* * Get IO TLB memory from any location. */ xen_io_tlb_start = alloc_bootmem(bytes); - if (!xen_io_tlb_start) - panic("Cannot allocate SWIOTLB buffer"); - + if (!xen_io_tlb_start) { + m = "Cannot allocate Xen-SWIOTLB buffer!\n"; + goto error; + } xen_io_tlb_end = xen_io_tlb_start + bytes; /* * And replace that memory with pages under 4GB. @@ -173,17 +177,28 @@ void __init xen_swiotlb_init(int verbose) rc = xen_swiotlb_fixup(xen_io_tlb_start, bytes, xen_io_tlb_nslabs); - if (rc) + if (rc) { + free_bootmem(__pa(xen_io_tlb_start), bytes); + m = "Failed to get contiguous memory for DMA from Xen!\n"\ + "You either: don't have the permissions, do not have"\ + " enough free memory under 4GB, or the hypervisor memory"\ + "is too fragmented!"; goto error; - + } start_dma_addr = xen_virt_to_bus(xen_io_tlb_start); swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose); return; error: - panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\ - "We either don't have the permission or you do not have enough"\ - "free memory under 4GB!\n", rc); + if (repeat--) { + xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */ + (xen_io_tlb_nslabs >> 1)); + printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n", + (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); + goto retry; + } + xen_raw_printk("%s (rc:%d)", rc, m); + panic("%s (rc:%d)", rc, m); } void * -- cgit v1.2.3 From ab2a47bd242d6cdcf6b2b64797f271c6f0a6d338 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 22 Jul 2011 12:51:48 -0400 Subject: xen-swiotlb: Fix wrong panic. Propagate the baremetal git commit "swiotlb: fix wrong panic" (fba99fa38b023224680308a482e12a0eca87e4e1) in the Xen-SWIOTLB version. wherein swiotlb's map_page wrongly calls panic() when it can't find a buffer fit for device's dma mask. It should return an error instead. Devices with an odd dma mask (i.e. under 4G) like b44 network card hit this bug (the system crashes): http://marc.info/?l=linux-kernel&m=129648943830106&w=2 If xen-swiotlb returns an error, b44 driver can use the own bouncing mechanism. CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index d45cbacaf6ff..ea8c28950322 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -293,9 +293,10 @@ dma_addr_t xen_swiotlb_map_page(struct device *dev, struct page *page, /* * Ensure that the address returned is DMA'ble */ - if (!dma_capable(dev, dev_addr, size)) - panic("map_single: bounce buffer is not DMA'ble"); - + if (!dma_capable(dev, dev_addr, size)) { + swiotlb_tbl_unmap_single(dev, map, size, dir); + dev_addr = 0; + } return dev_addr; } EXPORT_SYMBOL_GPL(xen_swiotlb_map_page); -- cgit v1.2.3 From 61ca79831ce52c23b3a130f3c2351751e00e0ac9 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 11 Aug 2011 13:57:07 -0700 Subject: xen-swiotlb: fix printk and panic args Fix printk() and panic() args [swap them] to fix build warnings: drivers/xen/swiotlb-xen.c:201: warning: format '%s' expects type 'char *', but argument 2 has type 'int' drivers/xen/swiotlb-xen.c:201: warning: format '%d' expects type 'int', but argument 3 has type 'char *' drivers/xen/swiotlb-xen.c:202: warning: format '%s' expects type 'char *', but argument 2 has type 'int' drivers/xen/swiotlb-xen.c:202: warning: format '%d' expects type 'int', but argument 3 has type 'char *' Signed-off-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index ea8c28950322..0408f3225722 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -197,8 +197,8 @@ error: (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20); goto retry; } - xen_raw_printk("%s (rc:%d)", rc, m); - panic("%s (rc:%d)", rc, m); + xen_raw_printk("%s (rc:%d)", m, rc); + panic("%s (rc:%d)", m, rc); } void * -- cgit v1.2.3 From 12e13ac84ca70e6641a4750e9317aa2d2c1f6f50 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 17 Aug 2011 09:32:32 +0100 Subject: xen/pci: make bus notifier handler return sane values Notifier functions are expected to return NOTIFY_* codes, not -E... ones. In particular, since the respective hypercalls failing is not fatal to the operation of the Dom0 kernel, it must be avoided to return negative values here as those would make it appear as if NOTIFY_STOP_MASK wa set, suppressing further notification calls to other interested parties (which is also why we don't want to use notifier_from_errno() here). While at it, also notify the user of a failed hypercall. Signed-off-by: Jan Beulich [v1: Added dev_err and the disable MSI/MSI-X call] [v2: Removed the disable MSI/MSI-X call] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/pci.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index cef4bafc07dc..02c402b1ed80 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -96,13 +96,16 @@ static int xen_pci_notifier(struct notifier_block *nb, r = xen_remove_device(dev); break; default: - break; + return NOTIFY_DONE; } - - return r; + if (r) + dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n", + action == BUS_NOTIFY_ADD_DEVICE ? "add" : + (action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?")); + return NOTIFY_OK; } -struct notifier_block device_nb = { +static struct notifier_block device_nb = { .notifier_call = xen_pci_notifier, }; -- cgit v1.2.3 From 6810df88dcfc22de267caf23eb072ffb97b3c411 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 25 Aug 2011 16:13:54 -0400 Subject: xen-swiotlb: When doing coherent alloc/dealloc check before swizzling the MFNs. The process to swizzle a Machine Frame Number (MFN) is not always necessary. Especially if we know that we actually do not have to do it. In this patch we check the MFN against the device's coherent DMA mask and if the requested page(s) are contingous. If it all checks out we will just return the bus addr without doing the memory swizzle. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 0408f3225722..c984768d98ca 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -209,6 +209,8 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, int order = get_order(size); u64 dma_mask = DMA_BIT_MASK(32); unsigned long vstart; + phys_addr_t phys; + dma_addr_t dev_addr; /* * Ignore region specifiers - the kernel's ideas of @@ -224,18 +226,26 @@ xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size, vstart = __get_free_pages(flags, order); ret = (void *)vstart; + if (!ret) + return ret; + if (hwdev && hwdev->coherent_dma_mask) - dma_mask = dma_alloc_coherent_mask(hwdev, flags); + dma_mask = hwdev->coherent_dma_mask; - if (ret) { + phys = virt_to_phys(ret); + dev_addr = xen_phys_to_bus(phys); + if (((dev_addr + size - 1 <= dma_mask)) && + !range_straddles_page_boundary(phys, size)) + *dma_handle = dev_addr; + else { if (xen_create_contiguous_region(vstart, order, fls64(dma_mask)) != 0) { free_pages(vstart, order); return NULL; } - memset(ret, 0, size); *dma_handle = virt_to_machine(ret).maddr; } + memset(ret, 0, size); return ret; } EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent); @@ -245,11 +255,21 @@ xen_swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dev_addr) { int order = get_order(size); + phys_addr_t phys; + u64 dma_mask = DMA_BIT_MASK(32); if (dma_release_from_coherent(hwdev, order, vaddr)) return; - xen_destroy_contiguous_region((unsigned long)vaddr, order); + if (hwdev && hwdev->coherent_dma_mask) + dma_mask = hwdev->coherent_dma_mask; + + phys = virt_to_phys(vaddr); + + if (((dev_addr + size - 1 > dma_mask)) || + range_straddles_page_boundary(phys, size)) + xen_destroy_contiguous_region((unsigned long)vaddr, order); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent); -- cgit v1.2.3 From c4c303c7c5679b4b368e12f41124aee29c325b76 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 25 Aug 2011 18:30:11 +0200 Subject: xen/pv-on-hvm kexec: prevent crash in xenwatch_thread() when stale watch events arrive During repeated kexec boots xenwatch_thread() can crash because xenbus_watch->callback is cleared by xenbus_watch_path() if a node/token combo for a new watch happens to match an already registered watch from an old kernel. In this case xs_watch returns -EEXISTS, then register_xenbus_watch() does not remove the to-be-registered watch from the list of active watches but returns the -EEXISTS to the caller anyway. Because the watch is still active in xenstored it will cause an event which will arrive in the new kernel. process_msg() will find the encapsulated struct xenbus_watch in its list of registered watches and puts the "empty" watch handle in the queue for xenwatch_thread(). xenwatch_thread() then calls ->callback which was cleared earlier by xenbus_watch_path(). To prevent that crash in a guest running on an old xen toolstack remove the special -EEXIST handling. v2: - remove the EEXIST handing in register_xenbus_watch() instead of checking for ->callback in process_msg() Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Olaf Hering --- drivers/xen/xenbus/xenbus_xs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 5534690075af..46347a49c5b8 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -638,8 +638,7 @@ int register_xenbus_watch(struct xenbus_watch *watch) err = xs_watch(watch->node, token); - /* Ignore errors due to multiple registration. */ - if ((err != 0) && (err != -EEXIST)) { + if (err) { spin_lock(&watches_lock); list_del(&watch->list); spin_unlock(&watches_lock); -- cgit v1.2.3 From 62cc5fc7b2e0218144e162afb8191db9b924b5e6 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 25 Aug 2011 18:30:48 +0200 Subject: xen/pv-on-hvm kexec: rebind virqs to existing eventchannel ports During a kexec boot some virqs such as timer and debugirq were already registered by the old kernel. The hypervisor will return -EEXISTS from the new EVTCHNOP_bind_virq request and the BUG in bind_virq_to_irq() triggers. Catch the -EEXISTS error and loop through all possible ports to find what port belongs to the virq/cpu combo. Signed-off-by: Olaf Hering [v2: - use NR_EVENT_CHANNELS instead of private MAX_EVTCHNS] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 30df85d8fca8..31493e906bbd 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -877,11 +877,32 @@ static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, return err ? : bind_evtchn_to_irq(bind_interdomain.local_port); } +static int find_virq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_status status; + int port, rc = -ENOENT; + + memset(&status, 0, sizeof(status)); + for (port = 0; port <= NR_EVENT_CHANNELS; port++) { + status.dom = DOMID_SELF; + status.port = port; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status); + if (rc < 0) + continue; + if (status.status != EVTCHNSTAT_virq) + continue; + if (status.u.virq == virq && status.vcpu == cpu) { + rc = port; + break; + } + } + return rc; +} int bind_virq_to_irq(unsigned int virq, unsigned int cpu) { struct evtchn_bind_virq bind_virq; - int evtchn, irq; + int evtchn, irq, ret; spin_lock(&irq_mapping_update_lock); @@ -897,10 +918,16 @@ int bind_virq_to_irq(unsigned int virq, unsigned int cpu) bind_virq.virq = virq; bind_virq.vcpu = cpu; - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, - &bind_virq) != 0) - BUG(); - evtchn = bind_virq.port; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, + &bind_virq); + if (ret == 0) + evtchn = bind_virq.port; + else { + if (ret == -EEXIST) + ret = find_virq(virq, cpu); + BUG_ON(ret < 0); + evtchn = ret; + } xen_irq_info_virq_init(cpu, irq, evtchn, virq); -- cgit v1.2.3 From 116df6f004af81925dcaa90d4a3b76da6b009427 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 25 Aug 2011 18:34:45 +0200 Subject: xen/pv-on-hvm kexec+kdump: reset PV devices in kexec or crash kernel After triggering a crash dump in a HVM guest, the PV backend drivers will remain in Connected state. When the kdump kernel starts the PV drivers will skip such devices. As a result, no root device is found and the vmcore cant be saved. A similar situation happens after a kexec boot, here the devices will be in the Closed state. With this change all frontend devices with state XenbusStateConnected or XenbusStateClosed will be reset by changing the state file to Closing -> Closed -> Initializing. This will trigger a disconnect in the backend drivers. Now the frontend drivers will find the backend drivers in state Initwait and can connect. Signed-off-by: Olaf Hering [v2: - add timeout when waiting for backend state change (based on feedback from Ian Campell) - extent printk message to include backend string - add comment to fall-through case in xenbus_reset_frontend] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_comms.c | 4 +- drivers/xen/xenbus/xenbus_probe_frontend.c | 121 +++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c index 090c61ee8fd0..2eff7a6aaa20 100644 --- a/drivers/xen/xenbus/xenbus_comms.c +++ b/drivers/xen/xenbus/xenbus_comms.c @@ -212,7 +212,9 @@ int xb_init_comms(void) printk(KERN_WARNING "XENBUS response ring is not quiescent " "(%08x:%08x): fixing up\n", intf->rsp_cons, intf->rsp_prod); - intf->rsp_cons = intf->rsp_prod; + /* breaks kdump */ + if (!reset_devices) + intf->rsp_cons = intf->rsp_prod; } if (xenbus_irq) { diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index b6a2690c9d49..b521ce43d325 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -252,10 +252,131 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, } EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); +static int backend_state; + +static void xenbus_reset_backend_state_changed(struct xenbus_watch *w, + const char **v, unsigned int l) +{ + xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state); + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + v[XS_WATCH_PATH], xenbus_strstate(backend_state)); + wake_up(&backend_state_wq); +} + +static void xenbus_reset_wait_for_backend(char *be, int expected) +{ + long timeout; + timeout = wait_event_interruptible_timeout(backend_state_wq, + backend_state == expected, 5 * HZ); + if (timeout <= 0) + printk(KERN_INFO "XENBUS: backend %s timed out.\n", be); +} + +/* + * Reset frontend if it is in Connected or Closed state. + * Wait for backend to catch up. + * State Connected happens during kdump, Closed after kexec. + */ +static void xenbus_reset_frontend(char *fe, char *be, int be_state) +{ + struct xenbus_watch be_watch; + + printk(KERN_DEBUG "XENBUS: backend %s %s\n", + be, xenbus_strstate(be_state)); + + memset(&be_watch, 0, sizeof(be_watch)); + be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be); + if (!be_watch.node) + return; + + be_watch.callback = xenbus_reset_backend_state_changed; + backend_state = XenbusStateUnknown; + + printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be); + register_xenbus_watch(&be_watch); + + /* fall through to forward backend to state XenbusStateInitialising */ + switch (be_state) { + case XenbusStateConnected: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing); + xenbus_reset_wait_for_backend(be, XenbusStateClosing); + + case XenbusStateClosing: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed); + xenbus_reset_wait_for_backend(be, XenbusStateClosed); + + case XenbusStateClosed: + xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising); + xenbus_reset_wait_for_backend(be, XenbusStateInitWait); + } + + unregister_xenbus_watch(&be_watch); + printk(KERN_INFO "XENBUS: reconnect done on %s\n", be); + kfree(be_watch.node); +} + +static void xenbus_check_frontend(char *class, char *dev) +{ + int be_state, fe_state, err; + char *backend, *frontend; + + frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev); + if (!frontend) + return; + + err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state); + if (err != 1) + goto out; + + switch (fe_state) { + case XenbusStateConnected: + case XenbusStateClosed: + printk(KERN_DEBUG "XENBUS: frontend %s %s\n", + frontend, xenbus_strstate(fe_state)); + backend = xenbus_read(XBT_NIL, frontend, "backend", NULL); + if (!backend || IS_ERR(backend)) + goto out; + err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state); + if (err == 1) + xenbus_reset_frontend(frontend, backend, be_state); + kfree(backend); + break; + default: + break; + } +out: + kfree(frontend); +} + +static void xenbus_reset_state(void) +{ + char **devclass, **dev; + int devclass_n, dev_n; + int i, j; + + devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n); + if (IS_ERR(devclass)) + return; + + for (i = 0; i < devclass_n; i++) { + dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n); + if (IS_ERR(dev)) + continue; + for (j = 0; j < dev_n; j++) + xenbus_check_frontend(devclass[i], dev[j]); + kfree(dev); + } + kfree(devclass); +} + static int frontend_probe_and_watch(struct notifier_block *notifier, unsigned long event, void *data) { + /* reset devices in Connected or Closed state */ + if (xen_hvm_domain()) + xenbus_reset_state(); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); register_xenbus_watch(&fe_watch); -- cgit v1.2.3 From 5fa99911a346e1f95c7932ff99a76693037e7927 Mon Sep 17 00:00:00 2001 From: Thomas Meyer Date: Sat, 6 Aug 2011 11:05:35 +0200 Subject: xen/pciback: use resource_size() Use resource_size function on resource object instead of explicit computation. The semantic patch that makes this output is available in scripts/coccinelle/api/resource_size.cocci. More information about semantic patching is available at http://coccinelle.lip6.fr/ Signed-off-by: Thomas Meyer Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/conf_space_header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index da3cbdfcb5dc..f14b30f71464 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -187,7 +187,7 @@ static inline void read_dev_bar(struct pci_dev *dev, bar_info->val = res[pos].start | (res[pos].flags & PCI_REGION_FLAG_MASK); - bar_info->len_val = res[pos].end - res[pos].start + 1; + bar_info->len_val = resource_size(&res[pos]); } static void *bar_init(struct pci_dev *dev, int offset) -- cgit v1.2.3 From 04df355227fa75c015491153cfc93e7ea7a80112 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Sep 2011 17:32:15 +0100 Subject: xen/pciback: use mutex rather than spinlock in passthrough backend To accommodate the call to the callback function from __xen_pcibk_publish_pci_roots(), which so far dropped and the re- acquired the lock without checking that the list didn't actually change, convert the code to use a mutex instead (observing that the code taking the lock won't ever get called from non-sleepable context). As a result, drop the bogus use of list_for_each_entry_safe() and remove the inappropriate dropping of the lock. Signed-off-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/passthrough.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index 1d32a9a42c01..e01e4b6ef267 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -7,13 +7,13 @@ #include #include -#include +#include #include "pciback.h" struct passthrough_dev_data { /* Access to dev_list must be protected by lock */ struct list_head dev_list; - spinlock_t lock; + struct mutex lock; }; static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, @@ -24,9 +24,8 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; struct pci_dev *dev = NULL; - unsigned long flags; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_for_each_entry(dev_entry, &dev_data->dev_list, list) { if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus) @@ -37,7 +36,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); return dev; } @@ -48,7 +47,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, { struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry; - unsigned long flags; unsigned int domain, bus, devfn; int err; @@ -57,9 +55,9 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, return -ENOMEM; dev_entry->dev = dev; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_add_tail(&dev_entry->list, &dev_data->dev_list); - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); /* Publish this device. */ domain = (unsigned int)pci_domain_nr(dev->bus); @@ -76,9 +74,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, struct passthrough_dev_data *dev_data = pdev->pci_dev_data; struct pci_dev_entry *dev_entry, *t; struct pci_dev *found_dev = NULL; - unsigned long flags; - spin_lock_irqsave(&dev_data->lock, flags); + mutex_lock(&dev_data->lock); list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) { if (dev_entry->dev == dev) { @@ -88,7 +85,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&dev_data->lock, flags); + mutex_unlock(&dev_data->lock); if (found_dev) pcistub_put_pci_dev(found_dev); @@ -102,7 +99,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) if (!dev_data) return -ENOMEM; - spin_lock_init(&dev_data->lock); + mutex_init(&dev_data->lock); INIT_LIST_HEAD(&dev_data->dev_list); @@ -116,14 +113,14 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, { int err = 0; struct passthrough_dev_data *dev_data = pdev->pci_dev_data; - struct pci_dev_entry *dev_entry, *e, *tmp; + struct pci_dev_entry *dev_entry, *e; struct pci_dev *dev; int found; unsigned int domain, bus; - spin_lock(&dev_data->lock); + mutex_lock(&dev_data->lock); - list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) { + list_for_each_entry(dev_entry, &dev_data->dev_list, list) { /* Only publish this device as a root if none of its * parent bridges are exported */ @@ -142,16 +139,13 @@ static int __xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, bus = (unsigned int)dev_entry->dev->bus->number; if (!found) { - spin_unlock(&dev_data->lock); err = publish_root_cb(pdev, domain, bus); if (err) break; - spin_lock(&dev_data->lock); } } - if (!err) - spin_unlock(&dev_data->lock); + mutex_unlock(&dev_data->lock); return err; } -- cgit v1.2.3 From 402c5e15b44070461dcc2f41536c16d0cfbca9c3 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Wed, 21 Sep 2011 16:22:11 -0400 Subject: xen/pciback: miscellaneous adjustments This is a minor bugfix and a set of small cleanups; as it is not clear whether this needs splitting into pieces (and if so, at what granularity), it is a single combined patch. - add a missing return statement to an error path in kill_domain_by_device() - use pci_is_enabled() rather than raw atomic_read() - remove a bogus attempt to zero-terminate an already zero-terminated string - #define DRV_NAME once uniformly in the shared local header - make DRIVER_ATTR() variables static - eliminate a pointless use of list_for_each_entry_safe() - add MODULE_ALIAS() - a little bit of constification - adjust a few messages - remove stray semicolons from inline function definitions Signed-off-by: Jan Beulich [v1: Dropped the resource_size fix, altered the description] [v2: Fixed cleanpatch.pl comments] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/conf_space.c | 1 - drivers/xen/xen-pciback/conf_space_header.c | 3 +-- drivers/xen/xen-pciback/conf_space_quirks.c | 3 +-- drivers/xen/xen-pciback/passthrough.c | 2 +- drivers/xen/xen-pciback/pci_stub.c | 34 ++++++++++++++--------------- drivers/xen/xen-pciback/pciback.h | 30 +++++++++++++++---------- drivers/xen/xen-pciback/pciback_ops.c | 1 - drivers/xen/xen-pciback/vpci.c | 9 ++++---- drivers/xen/xen-pciback/xenbus.c | 5 ++--- 9 files changed, 44 insertions(+), 44 deletions(-) diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index a8031445d94e..444345afbd5c 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -15,7 +15,6 @@ #include "conf_space.h" #include "conf_space_quirks.h" -#define DRV_NAME "xen-pciback" static int permissive; module_param(permissive, bool, 0644); diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c index f14b30f71464..3daf862d739d 100644 --- a/drivers/xen/xen-pciback/conf_space_header.c +++ b/drivers/xen/xen-pciback/conf_space_header.c @@ -15,7 +15,6 @@ struct pci_bar_info { int which; }; -#define DRV_NAME "xen-pciback" #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO)) #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER) @@ -25,7 +24,7 @@ static int command_read(struct pci_dev *dev, int offset, u16 *value, void *data) int ret; ret = xen_pcibk_read_config_word(dev, offset, value, data); - if (!atomic_read(&dev->enable_cnt)) + if (!pci_is_enabled(dev)) return ret; for (i = 0; i < PCI_ROM_RESOURCE; i++) { diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c index 921a889e65eb..7476791cab40 100644 --- a/drivers/xen/xen-pciback/conf_space_quirks.c +++ b/drivers/xen/xen-pciback/conf_space_quirks.c @@ -12,7 +12,6 @@ #include "conf_space_quirks.h" LIST_HEAD(xen_pcibk_quirks); -#define DRV_NAME "xen-pciback" static inline const struct pci_device_id * match_one_device(const struct pci_device_id *id, const struct pci_dev *dev) { @@ -36,7 +35,7 @@ static struct xen_pcibk_config_quirk *xen_pcibk_find_quirk(struct pci_dev *dev) goto out; tmp_quirk = NULL; printk(KERN_DEBUG DRV_NAME - ":quirk didn't match any device xen_pciback knows about\n"); + ": quirk didn't match any device known\n"); out: return tmp_quirk; } diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c index e01e4b6ef267..828dddc360df 100644 --- a/drivers/xen/xen-pciback/passthrough.c +++ b/drivers/xen/xen-pciback/passthrough.c @@ -176,7 +176,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, return 1; } -struct xen_pcibk_backend xen_pcibk_passthrough_backend = { +const struct xen_pcibk_backend xen_pcibk_passthrough_backend = { .name = "passthrough", .init = __xen_pcibk_init_devices, .free = __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index aec214ac0a14..1b474804180a 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -21,8 +21,6 @@ #include "conf_space.h" #include "conf_space_quirks.h" -#define DRV_NAME "xen-pciback" - static char *pci_devs_to_hide; wait_queue_head_t xen_pcibk_aer_wait_queue; /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops, @@ -514,12 +512,14 @@ static void kill_domain_by_device(struct pcistub_device *psdev) int err; char nodename[PCI_NODENAME_MAX]; - if (!psdev) + if (!psdev) { dev_err(&psdev->dev->dev, "device is NULL when do AER recovery/kill_domain\n"); + return; + } + snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", psdev->pdev->xdev->otherend_id); - nodename[strlen(nodename)] = '\0'; again: err = xenbus_transaction_start(&xbt); @@ -605,7 +605,7 @@ static pci_ers_result_t common_process(struct pcistub_device *psdev, if (test_bit(_XEN_PCIF_active, (unsigned long *)&psdev->pdev->sh_info->flags)) { dev_dbg(&psdev->dev->dev, - "schedule pci_conf service in xen_pcibk\n"); + "schedule pci_conf service in " DRV_NAME "\n"); xen_pcibk_test_and_schedule_op(psdev->pdev); } @@ -995,8 +995,7 @@ out: err = count; return err; } - -DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); +static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add); static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf, size_t count) @@ -1015,8 +1014,7 @@ out: err = count; return err; } - -DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); +static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove); static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) { @@ -1039,8 +1037,7 @@ static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf) return count; } - -DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); +static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL); static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) { @@ -1069,8 +1066,7 @@ static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf) spin_unlock_irqrestore(&pcistub_devices_lock, flags); return count; } - -DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); +static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL); static ssize_t pcistub_irq_handler_switch(struct device_driver *drv, const char *buf, @@ -1106,7 +1102,8 @@ out: err = count; return err; } -DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch); +static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, + pcistub_irq_handler_switch); static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf, size_t count) @@ -1170,8 +1167,8 @@ out: return count; } - -DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add); +static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, + pcistub_quirk_add); static ssize_t permissive_add(struct device_driver *drv, const char *buf, size_t count) @@ -1236,8 +1233,8 @@ static ssize_t permissive_show(struct device_driver *drv, char *buf) spin_unlock_irqrestore(&pcistub_devices_lock, flags); return count; } - -DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add); +static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, + permissive_add); static void pcistub_exit(void) { @@ -1374,3 +1371,4 @@ module_init(xen_pcibk_init); module_exit(xen_pcibk_cleanup); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("xen-backend:pci"); diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index a0e131a81503..d095acdc0e65 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -15,6 +15,8 @@ #include #include +#define DRV_NAME "xen-pciback" + struct pci_dev_entry { struct list_head list; struct pci_dev *dev; @@ -89,7 +91,7 @@ typedef int (*publish_pci_root_cb) (struct xen_pcibk_device *pdev, * passthrough - BDFs are exactly like in the host. */ struct xen_pcibk_backend { - char *name; + const char *name; int (*init)(struct xen_pcibk_device *pdev); void (*free)(struct xen_pcibk_device *pdev); int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev, @@ -104,9 +106,9 @@ struct xen_pcibk_backend { unsigned int devfn); }; -extern struct xen_pcibk_backend xen_pcibk_vpci_backend; -extern struct xen_pcibk_backend xen_pcibk_passthrough_backend; -extern struct xen_pcibk_backend *xen_pcibk_backend; +extern const struct xen_pcibk_backend xen_pcibk_vpci_backend; +extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend; +extern const struct xen_pcibk_backend *xen_pcibk_backend; static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev, @@ -116,13 +118,14 @@ static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, if (xen_pcibk_backend && xen_pcibk_backend->add) return xen_pcibk_backend->add(pdev, dev, devid, publish_cb); return -1; -}; +} + static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev *dev) { if (xen_pcibk_backend && xen_pcibk_backend->free) return xen_pcibk_backend->release(pdev, dev); -}; +} static inline struct pci_dev * xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, @@ -131,7 +134,8 @@ xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain, if (xen_pcibk_backend && xen_pcibk_backend->get) return xen_pcibk_backend->get(pdev, domain, bus, devfn); return NULL; -}; +} + /** * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk * before sending aer request to pcifront, so that guest could identify @@ -148,25 +152,29 @@ static inline int xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, return xen_pcibk_backend->find(pcidev, pdev, domain, bus, devfn); return -1; -}; +} + static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev) { if (xen_pcibk_backend && xen_pcibk_backend->init) return xen_pcibk_backend->init(pdev); return -1; -}; +} + static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev, publish_pci_root_cb cb) { if (xen_pcibk_backend && xen_pcibk_backend->publish) return xen_pcibk_backend->publish(pdev, cb); return -1; -}; +} + static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev) { if (xen_pcibk_backend && xen_pcibk_backend->free) return xen_pcibk_backend->free(pdev); -}; +} + /* Handles events from front-end */ irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id); void xen_pcibk_do_op(struct work_struct *data); diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c index 8c95c3415b75..63616d7453e6 100644 --- a/drivers/xen/xen-pciback/pciback_ops.c +++ b/drivers/xen/xen-pciback/pciback_ops.c @@ -10,7 +10,6 @@ #include #include "pciback.h" -#define DRV_NAME "xen-pciback" int verbose_request; module_param(verbose_request, int, 0644); diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 4a42cfb0959d..a7785525f253 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -12,7 +12,6 @@ #include "pciback.h" #define PCI_SLOT_MAX 32 -#define DRV_NAME "xen-pciback" struct vpci_dev_data { /* Access to dev_list must be protected by lock */ @@ -150,9 +149,9 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, spin_lock_irqsave(&vpci_dev->lock, flags); for (slot = 0; slot < PCI_SLOT_MAX; slot++) { - struct pci_dev_entry *e, *tmp; - list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot], - list) { + struct pci_dev_entry *e; + + list_for_each_entry(e, &vpci_dev->dev_list[slot], list) { if (e->dev == dev) { list_del(&e->list); found_dev = e->dev; @@ -247,7 +246,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, return found; } -struct xen_pcibk_backend xen_pcibk_vpci_backend = { +const struct xen_pcibk_backend xen_pcibk_vpci_backend = { .name = "vpci", .init = __xen_pcibk_init_devices, .free = __xen_pcibk_release_devices, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 978d2c6f5dca..522f2daa96cc 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -13,7 +13,6 @@ #include #include "pciback.h" -#define DRV_NAME "xen-pciback" #define INVALID_EVTCHN_IRQ (-1) struct workqueue_struct *xen_pcibk_wq; @@ -176,7 +175,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) { xenbus_dev_fatal(pdev->xdev, -EFAULT, "version mismatch (%s/%s) with pcifront - " - "halting xen_pcibk", + "halting " DRV_NAME, magic, XEN_PCI_MAGIC); goto out; } @@ -724,7 +723,7 @@ static struct xenbus_driver xenbus_xen_pcibk_driver = { .otherend_changed = xen_pcibk_frontend_changed, }; -struct xen_pcibk_backend *xen_pcibk_backend; +const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; int __init xen_pcibk_xenbus_register(void) { -- cgit v1.2.3 From b1766b62890e3bba1a778a20ef8bf9348d6096c2 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 16 Sep 2011 14:43:14 -0400 Subject: xen/pciback: Use mutexes when working with Xenbus state transitions. The caller that orchestrates the state changes is xenwatch_thread and it takes a mutex. In our processing of Xenbus states we can take the luxery of going to sleep on a mutex, so lets do that and also fix this bug: BUG: sleeping function called from invalid context at /linux/kernel/mutex.c:271 in_atomic(): 1, irqs_disabled(): 0, pid: 32, name: xenwatch 2 locks held by xenwatch/32: #0: (xenwatch_mutex){......}, at: [] xenwatch_thread+0x4b/0x180 #1: (&(&pdev->dev_lock)->rlock){......}, at: [] xen_pcibk_disconnect+0x1b/0x80 Pid: 32, comm: xenwatch Not tainted 3.1.0-rc6-00015-g3ce340d #2 Call Trace: [] __might_sleep+0x102/0x130 [] mutex_lock_nested+0x2f/0x50 [] unbind_from_irq+0x2c/0x1b0 [] ? free_irq+0x56/0xb0 [] unbind_from_irqhandler+0x1c/0x30 [] xen_pcibk_disconnect+0x2b/0x80 [] xen_pcibk_frontend_changed+0xe8/0x140 [] xenbus_otherend_changed+0xd2/0x150 [] ? get_parent_ip+0x11/0x50 [] frontend_changed+0x10/0x20 [] xenwatch_thread+0xb2/0x180 Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pciback.h | 2 +- drivers/xen/xen-pciback/xenbus.c | 22 +++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h index d095acdc0e65..e9b4011c5f9a 100644 --- a/drivers/xen/xen-pciback/pciback.h +++ b/drivers/xen/xen-pciback/pciback.h @@ -29,7 +29,7 @@ struct pci_dev_entry { struct xen_pcibk_device { void *pci_dev_data; - spinlock_t dev_lock; + struct mutex dev_lock; struct xenbus_device *xdev; struct xenbus_watch be_watch; u8 be_watching; diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 522f2daa96cc..474d52ec3374 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -43,7 +43,7 @@ static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev) pdev->xdev = xdev; dev_set_drvdata(&xdev->dev, pdev); - spin_lock_init(&pdev->dev_lock); + mutex_init(&pdev->dev_lock); pdev->sh_info = NULL; pdev->evtchn_irq = INVALID_EVTCHN_IRQ; @@ -61,14 +61,12 @@ out: static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) { - spin_lock(&pdev->dev_lock); - + mutex_lock(&pdev->dev_lock); /* Ensure the guest can't trigger our handler before removing devices */ if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) { unbind_from_irqhandler(pdev->evtchn_irq, pdev); pdev->evtchn_irq = INVALID_EVTCHN_IRQ; } - spin_unlock(&pdev->dev_lock); /* If the driver domain started an op, make sure we complete it * before releasing the shared memory */ @@ -76,13 +74,11 @@ static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev) /* Note, the workqueue does not use spinlocks at all.*/ flush_workqueue(xen_pcibk_wq); - spin_lock(&pdev->dev_lock); if (pdev->sh_info != NULL) { xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info); pdev->sh_info = NULL; } - spin_unlock(&pdev->dev_lock); - + mutex_unlock(&pdev->dev_lock); } static void free_pdev(struct xen_pcibk_device *pdev) @@ -119,9 +115,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, goto out; } - spin_lock(&pdev->dev_lock); pdev->sh_info = vaddr; - spin_unlock(&pdev->dev_lock); err = bind_interdomain_evtchn_to_irqhandler( pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event, @@ -131,10 +125,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, "Error binding event channel to IRQ"); goto out; } - - spin_lock(&pdev->dev_lock); pdev->evtchn_irq = err; - spin_unlock(&pdev->dev_lock); err = 0; dev_dbg(&pdev->xdev->dev, "Attached!\n"); @@ -149,6 +140,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) char *magic = NULL; + mutex_lock(&pdev->dev_lock); /* Make sure we only do this setup once */ if (xenbus_read_driver_state(pdev->xdev->nodename) != XenbusStateInitialised) @@ -193,6 +185,7 @@ static int xen_pcibk_attach(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err); out: + mutex_unlock(&pdev->dev_lock); kfree(magic); @@ -368,6 +361,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n"); + mutex_lock(&pdev->dev_lock); /* Make sure we only reconfigure once */ if (xenbus_read_driver_state(pdev->xdev->nodename) != XenbusStateReconfiguring) @@ -505,6 +499,7 @@ static int xen_pcibk_reconfigure(struct xen_pcibk_device *pdev) } out: + mutex_unlock(&pdev->dev_lock); return 0; } @@ -561,6 +556,7 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) char dev_str[64]; char state_str[64]; + mutex_lock(&pdev->dev_lock); /* It's possible we could get the call to setup twice, so make sure * we're not already connected. */ @@ -641,10 +637,10 @@ static int xen_pcibk_setup_backend(struct xen_pcibk_device *pdev) "Error switching to initialised state!"); out: + mutex_unlock(&pdev->dev_lock); if (!err) /* see if pcifront is already configured (if not, we'll wait) */ xen_pcibk_attach(pdev); - return err; } -- cgit v1.2.3 From 74d33dedc2fb8d98821bcf7df9800ce59456502e Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 21 Sep 2011 17:04:47 -0400 Subject: xen/pciback: use mutex rather than spinlock in vpci backend Similar to the "xen/pciback: use mutex rather than spinlock in passthrough backend" this patch converts the vpci backend to use a mutex instead of a spinlock. Note that the code taking the lock won't ever get called from non-sleepable context Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/vpci.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index a7785525f253..01222d7dd20d 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include "pciback.h" #define PCI_SLOT_MAX 32 @@ -16,7 +16,7 @@ struct vpci_dev_data { /* Access to dev_list must be protected by lock */ struct list_head dev_list[PCI_SLOT_MAX]; - spinlock_t lock; + struct mutex lock; }; static inline struct list_head *list_first(struct list_head *head) @@ -32,13 +32,12 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, struct pci_dev_entry *entry; struct pci_dev *dev = NULL; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; if (domain != 0 || bus != 0) return NULL; if (PCI_SLOT(devfn) < PCI_SLOT_MAX) { - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); list_for_each_entry(entry, &vpci_dev->dev_list[PCI_SLOT(devfn)], @@ -49,7 +48,7 @@ static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, } } - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); } return dev; } @@ -70,7 +69,6 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, int err = 0, slot, func = -1; struct pci_dev_entry *t, *dev_entry; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) { err = -EFAULT; @@ -89,7 +87,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, dev_entry->dev = dev; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); /* Keep multi-function devices together on the virtual PCI bus */ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { @@ -128,7 +126,7 @@ static int __xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev, "No more space on root virtual PCI bus"); unlock: - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); /* Publish this device. */ if (!err) @@ -144,9 +142,8 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, int slot; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; struct pci_dev *found_dev = NULL; - unsigned long flags; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) { struct pci_dev_entry *e; @@ -162,7 +159,7 @@ static void __xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev, } out: - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_unlock(&vpci_dev->lock); if (found_dev) pcistub_put_pci_dev(found_dev); @@ -177,7 +174,7 @@ static int __xen_pcibk_init_devices(struct xen_pcibk_device *pdev) if (!vpci_dev) return -ENOMEM; - spin_lock_init(&vpci_dev->lock); + mutex_init(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) INIT_LIST_HEAD(&vpci_dev->dev_list[slot]); @@ -221,10 +218,9 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, struct pci_dev_entry *entry; struct pci_dev *dev = NULL; struct vpci_dev_data *vpci_dev = pdev->pci_dev_data; - unsigned long flags; int found = 0, slot; - spin_lock_irqsave(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); for (slot = 0; slot < PCI_SLOT_MAX; slot++) { list_for_each_entry(entry, &vpci_dev->dev_list[slot], @@ -242,7 +238,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, } } } - spin_unlock_irqrestore(&vpci_dev->lock, flags); + mutex_lock(&vpci_dev->lock); return found; } -- cgit v1.2.3 From 55e901fc1f03dd8437f877813c68b6014cdbeefd Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 22 Sep 2011 09:17:57 +0100 Subject: xen/pci: support multi-segment systems Now that the hypercall interface changes are in -unstable, make the kernel side code not ignore the segment (aka domain) number anymore (which results in pretty odd behavior on such systems). Rather, if only the old interfaces are available, don't call them for devices on non-zero segments at all. Signed-off-by: Jan Beulich [v1: Edited git description] Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/pci/xen.c | 22 ++++++++-- drivers/xen/pci.c | 94 ++++++++++++++++++++++++++++++++++++----- include/xen/interface/physdev.h | 34 ++++++++++++++- 3 files changed, 136 insertions(+), 14 deletions(-) diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index f567965c0620..265fa8814ccd 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -185,6 +185,8 @@ static void xen_teardown_msi_irq(unsigned int irq) } #ifdef CONFIG_XEN_DOM0 +static bool __read_mostly pci_seg_supported = true; + static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { int ret = 0; @@ -202,10 +204,11 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) memset(&map_irq, 0, sizeof(map_irq)); map_irq.domid = domid; - map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.type = MAP_PIRQ_TYPE_MSI_SEG; map_irq.index = -1; map_irq.pirq = -1; - map_irq.bus = dev->bus->number; + map_irq.bus = dev->bus->number | + (pci_domain_nr(dev->bus) << 16); map_irq.devfn = dev->devfn; if (type == PCI_CAP_ID_MSIX) { @@ -222,7 +225,20 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) map_irq.entry_nr = msidesc->msi_attrib.entry_nr; } - ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq); + ret = -EINVAL; + if (pci_seg_supported) + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, + &map_irq); + if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { + map_irq.type = MAP_PIRQ_TYPE_MSI; + map_irq.index = -1; + map_irq.pirq = -1; + map_irq.bus = dev->bus->number; + ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, + &map_irq); + if (ret != -EINVAL) + pci_seg_supported = false; + } if (ret) { dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n", ret, domid); diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c index 02c402b1ed80..66057075d6e2 100644 --- a/drivers/xen/pci.c +++ b/drivers/xen/pci.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -26,26 +27,85 @@ #include #include "../pci/pci.h" +static bool __read_mostly pci_seg_supported = true; + static int xen_add_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); +#ifdef CONFIG_PCI_IOV + struct pci_dev *physfn = pci_dev->physfn; +#endif + + if (pci_seg_supported) { + struct physdev_pci_device_add add = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; +#ifdef CONFIG_ACPI + acpi_handle handle; +#endif #ifdef CONFIG_PCI_IOV - if (pci_dev->is_virtfn) { + if (pci_dev->is_virtfn) { + add.flags = XEN_PCI_DEV_VIRTFN; + add.physfn.bus = physfn->bus->number; + add.physfn.devfn = physfn->devfn; + } else +#endif + if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) + add.flags = XEN_PCI_DEV_EXTFN; + +#ifdef CONFIG_ACPI + handle = DEVICE_ACPI_HANDLE(&pci_dev->dev); + if (!handle) + handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge); +#ifdef CONFIG_PCI_IOV + if (!handle && pci_dev->is_virtfn) + handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge); +#endif + if (handle) { + acpi_status status; + + do { + unsigned long long pxm; + + status = acpi_evaluate_integer(handle, "_PXM", + NULL, &pxm); + if (ACPI_SUCCESS(status)) { + add.optarr[0] = pxm; + add.flags |= XEN_PCI_DEV_PXM; + break; + } + status = acpi_get_parent(handle, &handle); + } while (ACPI_SUCCESS(status)); + } +#endif /* CONFIG_ACPI */ + + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add); + if (r != -ENOSYS) + return r; + pci_seg_supported = false; + } + + if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; +#ifdef CONFIG_PCI_IOV + else if (pci_dev->is_virtfn) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, .is_virtfn = 1, - .physfn.bus = pci_dev->physfn->bus->number, - .physfn.devfn = pci_dev->physfn->devfn, + .physfn.bus = physfn->bus->number, + .physfn.devfn = physfn->devfn, }; r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext, &manage_pci_ext); - } else + } #endif - if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { + else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) { struct physdev_manage_pci_ext manage_pci_ext = { .bus = pci_dev->bus->number, .devfn = pci_dev->devfn, @@ -71,13 +131,27 @@ static int xen_remove_device(struct device *dev) { int r; struct pci_dev *pci_dev = to_pci_dev(dev); - struct physdev_manage_pci manage_pci; - manage_pci.bus = pci_dev->bus->number; - manage_pci.devfn = pci_dev->devfn; + if (pci_seg_supported) { + struct physdev_pci_device device = { + .seg = pci_domain_nr(pci_dev->bus), + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; - r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, - &manage_pci); + r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove, + &device); + } else if (pci_domain_nr(pci_dev->bus)) + r = -ENOSYS; + else { + struct physdev_manage_pci manage_pci = { + .bus = pci_dev->bus->number, + .devfn = pci_dev->devfn + }; + + r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove, + &manage_pci); + } return r; } diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h index 534cac89a77d..c1080d9c705d 100644 --- a/include/xen/interface/physdev.h +++ b/include/xen/interface/physdev.h @@ -109,6 +109,7 @@ struct physdev_irq { #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { @@ -119,7 +120,7 @@ struct physdev_map_pirq { int index; /* IN or OUT */ int pirq; - /* IN */ + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ int bus; /* IN */ int devfn; @@ -198,6 +199,37 @@ struct physdev_get_free_pirq { uint32_t pirq; }; +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** -- cgit v1.2.3 From 3b082b25c006f9a4ca82af7bb5bdc289d98cf6b9 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 22 Sep 2011 16:14:48 +0200 Subject: xen/pv-on-hvm kexec: update xs_wire.h:xsd_sockmsg_type from xen-unstable Update include/xen/interface/io/xs_wire.h from xen-unstable. Now entries in xsd_sockmsg_type were added. Signed-off-by: Olaf Hering Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/interface/io/xs_wire.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index 99fcffb372d1..f6f07aa35af5 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h @@ -26,7 +26,10 @@ enum xsd_sockmsg_type XS_SET_PERMS, XS_WATCH_EVENT, XS_ERROR, - XS_IS_DOMAIN_INTRODUCED + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + XS_RESTRICT }; #define XS_WRITE_NONE "NONE" -- cgit v1.2.3 From ddacf5ef684a655abe2bb50c4b2a5b72ae0d5e05 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 22 Sep 2011 16:14:49 +0200 Subject: xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel Add new xs_reset_watches function to shutdown watches from old kernel after kexec boot. The old kernel does not unregister all watches in the shutdown path. They are still active, the double registration can not be detected by the new kernel. When the watches fire, unexpected events will arrive and the xenwatch thread will crash (jumps to NULL). An orderly reboot of a hvm guest will destroy the entire guest with all its resources (including the watches) before it is rebuilt from scratch, so the missing unregister is not an issue in that case. With this change the xenstored is instructed to wipe all active watches for the guest. However, a patch for xenstored is required so that it accepts the XS_RESET_WATCHES request from a client (see changeset 23839:42a45baf037d in xen-unstable.hg). Without the patch for xenstored the registration of watches will fail and some features of a PVonHVM guest are not available. The guest is still able to boot, but repeated kexec boots will fail. [v5: use xs_single instead of passing a dummy string to xs_talkv] [v4: ignore -EEXIST in xs_reset_watches] [v3: use XS_RESET_WATCHES instead of XS_INTRODUCE] [v2: move all code which deals with XS_INTRODUCE into xs_introduce() (based on feedback from Ian Campbell); remove casts from kvec assignment] Signed-off-by: Olaf Hering [v1: Redid the git description a bit] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 13 +++++++++++++ include/xen/interface/io/xs_wire.h | 3 ++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 46347a49c5b8..d1071de11878 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -620,6 +620,15 @@ static struct xenbus_watch *find_watch(const char *token) return NULL; } +static void xs_reset_watches(void) +{ + int err; + + err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); + if (err && err != -EEXIST) + printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); +} + /* Register callback to watch this node. */ int register_xenbus_watch(struct xenbus_watch *watch) { @@ -896,5 +905,9 @@ int xs_init(void) if (IS_ERR(task)) return PTR_ERR(task); + /* shutdown watches for kexec boot */ + if (xen_hvm_domain()) + xs_reset_watches(); + return 0; } diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h index f6f07aa35af5..f0b6890370be 100644 --- a/include/xen/interface/io/xs_wire.h +++ b/include/xen/interface/io/xs_wire.h @@ -29,7 +29,8 @@ enum xsd_sockmsg_type XS_IS_DOMAIN_INTRODUCED, XS_RESUME, XS_SET_TARGET, - XS_RESTRICT + XS_RESTRICT, + XS_RESET_WATCHES }; #define XS_WRITE_NONE "NONE" -- cgit v1.2.3 From 5b25d89e19be2ff2fa7a5c80099e88fa56d66334 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 26 Sep 2011 13:13:42 -0400 Subject: xen/pv-on-hvm:kexec: Fix implicit declaration of function 'xen_hvm_domain' Randy found a compile error when using make randconfig to trigger drivers/xen/xenbus/xenbus_xs.c:909:2: error: implicit declaration of function 'xen_hvm_domain' it is unclear which of the CONFIG options triggered this. This patch fixes the error. Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index d1071de11878..b3b8f2f3ad10 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "xenbus_comms.h" struct xs_stored_msg { -- cgit v1.2.3 From e1db4cef8999c422420d55206beb4154f8a85383 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 27 Sep 2011 10:07:21 +0300 Subject: xen/pciback: double lock typo We called mutex_lock() twice instead of unlocking. Signed-off-by: Dan Carpenter Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/vpci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c index 01222d7dd20d..46d140baebd8 100644 --- a/drivers/xen/xen-pciback/vpci.c +++ b/drivers/xen/xen-pciback/vpci.c @@ -238,7 +238,7 @@ static int __xen_pcibk_get_pcifront_dev(struct pci_dev *pcidev, } } } - mutex_lock(&vpci_dev->lock); + mutex_unlock(&vpci_dev->lock); return found; } -- cgit v1.2.3 From b17d0b5c0824b6a6f143a6587fa7d47abe006ab4 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 29 Sep 2011 12:05:57 +0100 Subject: xen: XEN_PVHVM depends on PCI Xen PV on HVM guests require PCI support because they need the xen-platform-pci driver in order to initialize xenbus. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 5cc821cb2e09..e061f550a28a 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -25,8 +25,7 @@ config XEN_PRIVILEGED_GUEST config XEN_PVHVM def_bool y - depends on XEN - depends on X86_LOCAL_APIC + depends on XEN && PCI && X86_LOCAL_APIC config XEN_MAX_DOMAIN_MEMORY int -- cgit v1.2.3 From 5fbdc10395cd500d6ff844825a918c4e6f38de37 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Thu, 29 Sep 2011 12:05:58 +0100 Subject: xen: remove XEN_PLATFORM_PCI config option Xen PVHVM needs xen-platform-pci, on the other hand xen-platform-pci is useless in any other cases. Therefore remove the XEN_PLATFORM_PCI config option and compile xen-platform-pci built-in if XEN_PVHVM is selected. Changes to v1: - remove xen-platform-pci.o and just use platform-pci.o since it is not externally visible anymore. Signed-off-by: Stefano Stabellini Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/Kconfig | 10 ---------- drivers/xen/Makefile | 4 +--- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 5f7ff8e2fc14..8795480c2350 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -137,16 +137,6 @@ config XEN_GRANT_DEV_ALLOC to other domains. This can be used to implement frontend drivers or as part of an inter-domain shared memory channel. -config XEN_PLATFORM_PCI - tristate "xen platform pci device driver" - depends on XEN_PVHVM && PCI - default m - help - Driver for the Xen PCI Platform device: it is responsible for - initializing xenbus and grant_table when running in a Xen HVM - domain. As a consequence this driver is required to run any Xen PV - frontend on Xen HVM. - config SWIOTLB_XEN def_bool y depends on PCI diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 72bbb27d7a68..974fffdf22b2 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o +obj-$(CONFIG_XEN_PVHVM) += platform-pci.o obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o @@ -23,5 +23,3 @@ obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o - -xen-platform-pci-y := platform-pci.o -- cgit v1.2.3 From 77447991b6c9aef83d101aae4a9e5d83c206b9c5 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 13 Oct 2011 16:07:07 -0400 Subject: xenbus: Fix loopback event channel assuming domain 0 The xenbus event channel established in xenbus_init is intended to be a loopback channel, but the remote domain was hardcoded to 0; this will cause the channel to be unusable when xenstore is not being run in domain 0. Signed-off-by: Daniel De Graaf Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 739769551e33..d5347fe15882 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -724,7 +724,7 @@ static int __init xenbus_init(void) /* Next allocate a local port which xenstored can bind to */ alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = 0; + alloc_unbound.remote_dom = DOMID_SELF; err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &alloc_unbound); -- cgit v1.2.3 From e4184aaf3b2c4f2b69306f6cfc4bab8733c6c5f1 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 13 Oct 2011 16:07:08 -0400 Subject: xenbus: don't rely on xen_initial_domain to detect local xenstore The xenstore daemon does not have to run in the xen initial domain; however, Linux currently uses xen_initial_domain to test if a loopback event channel should be used instead of the event channel provided in Xen's start_info structure. Instead, if the event channel passed in the start_info structure is not valid, assume that this domain will run xenstored locally and set up the event channel. Signed-off-by: Daniel De Graaf Reviewed-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_probe.c | 101 ++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 48 deletions(-) diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index d5347fe15882..13b0f05bafb7 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -696,64 +696,74 @@ static int __init xenbus_probe_initcall(void) device_initcall(xenbus_probe_initcall); -static int __init xenbus_init(void) +/* Set up event channel for xenstored which is run as a local process + * (this is normally used only in dom0) + */ +static int __init xenstored_local_init(void) { int err = 0; unsigned long page = 0; + struct evtchn_alloc_unbound alloc_unbound; - DPRINTK(""); + /* Allocate Xenstore page */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + goto out_err; - err = -ENODEV; - if (!xen_domain()) - return err; + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); - /* - * Domain0 doesn't have a store_evtchn or store_mfn yet. - */ - if (xen_initial_domain()) { - struct evtchn_alloc_unbound alloc_unbound; + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; - /* Allocate Xenstore page */ - page = get_zeroed_page(GFP_KERNEL); - if (!page) - goto out_error; + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto out_err; - xen_store_mfn = xen_start_info->store_mfn = - pfn_to_mfn(virt_to_phys((void *)page) >> - PAGE_SHIFT); + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; - /* Next allocate a local port which xenstored can bind to */ - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = DOMID_SELF; + return 0; - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (err == -ENOSYS) - goto out_error; + out_err: + if (page != 0) + free_page(page); + return err; +} - BUG_ON(err); - xen_store_evtchn = xen_start_info->store_evtchn = - alloc_unbound.port; +static int __init xenbus_init(void) +{ + int err = 0; - xen_store_interface = mfn_to_virt(xen_store_mfn); + if (!xen_domain()) + return -ENODEV; + + if (xen_hvm_domain()) { + uint64_t v = 0; + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto out_error; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto out_error; + xen_store_mfn = (unsigned long)v; + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); } else { - if (xen_hvm_domain()) { - uint64_t v = 0; - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; - err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + if (xen_store_evtchn) + xenstored_ready = 1; + else { + err = xenstored_local_init(); if (err) goto out_error; - xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); - xenstored_ready = 1; } + xen_store_interface = mfn_to_virt(xen_store_mfn); } /* Initialize the interface to xenstore. */ @@ -772,12 +782,7 @@ static int __init xenbus_init(void) proc_mkdir("xen", NULL); #endif - return 0; - - out_error: - if (page != 0) - free_page(page); - + out_error: return err; } -- cgit v1.2.3 From 72bf809a19d9fc97bfe39bb928149b5e0f202cb6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 29 Sep 2011 13:43:28 -0400 Subject: xen/pciback: Do not dereference psdev during printk when it is NULL. .. instead use BUG_ON() as all the callers of the kill_domain_by_device check for psdev. Suggested-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pci_stub.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 1b474804180a..3cc3fbe5bf8d 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -512,12 +512,7 @@ static void kill_domain_by_device(struct pcistub_device *psdev) int err; char nodename[PCI_NODENAME_MAX]; - if (!psdev) { - dev_err(&psdev->dev->dev, - "device is NULL when do AER recovery/kill_domain\n"); - return; - } - + BUG_ON(!psdev); snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0", psdev->pdev->xdev->otherend_id); -- cgit v1.2.3 From 4645bf306746106f805d2afa5330bf31df381626 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 29 Sep 2011 13:12:43 -0400 Subject: xen/pciback: Check if the device is found instead of blindly assuming so. Just in case it is not found, don't try to dereference it. [v1: Added WARN_ON, suggested by Jan Beulich ] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pci_stub.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 3cc3fbe5bf8d..8f06e1ed028c 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -220,6 +220,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev) } spin_unlock_irqrestore(&pcistub_devices_lock, flags); + if (WARN_ON(!found_psdev)) + return; /*hold this lock for avoiding breaking link between * pcistub and xen_pcibk when AER is in processing -- cgit v1.2.3