From 0b934ccd707ff33a87f15a35a9916d1d8e85d30e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 4 Nov 2011 15:41:27 -0400 Subject: Xen: Export xen_biovec_phys_mergeable When Xen is enabled, using BIOVEC_PHYS_MERGEABLE in a module causes xen_biovec_phys_mergeable to be referenced, so it needs to be exported. Acked-by: Jens Axboe Signed-off-by: Matthew Wilcox --- drivers/xen/biomerge.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen') diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index ba6eda4b5143..18c1bb6ffce3 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -11,3 +11,4 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, return __BIOVEC_PHYS_MERGEABLE(vec1, vec2) && ((mfn1 == mfn2) || ((mfn1+1) == mfn2)); } +EXPORT_SYMBOL(xen_biovec_phys_mergeable); -- cgit v1.2.3 From 8ca19a8937ad91703cfefccf13bd8017b39510cd Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 27 Oct 2011 17:58:48 -0400 Subject: xen/gntalloc: Change gref_lock to a mutex The event channel release function cannot be called under a spinlock because it can attempt to acquire a mutex due to the event channel reference acquired when setting up unmap notifications. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index f6832f46aea4..439352d094db 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -74,7 +74,7 @@ MODULE_PARM_DESC(limit, "Maximum number of grants that may be allocated by " "the gntalloc device"); static LIST_HEAD(gref_list); -static DEFINE_SPINLOCK(gref_lock); +static DEFINE_MUTEX(gref_mutex); static int gref_size; struct notify_info { @@ -143,15 +143,15 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op, } /* Add to gref lists. */ - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); list_splice_tail(&queue_gref, &gref_list); list_splice_tail(&queue_file, &priv->list); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return 0; undo: - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref_size -= (op->count - i); list_for_each_entry(gref, &queue_file, next_file) { @@ -167,7 +167,7 @@ undo: */ if (unlikely(!list_empty(&queue_gref))) list_splice_tail(&queue_gref, &gref_list); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rc; } @@ -251,7 +251,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp) pr_debug("%s: priv %p\n", __func__, priv); - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); while (!list_empty(&priv->list)) { gref = list_entry(priv->list.next, struct gntalloc_gref, next_file); @@ -261,7 +261,7 @@ static int gntalloc_release(struct inode *inode, struct file *filp) __del_gref(gref); } kfree(priv); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return 0; } @@ -286,21 +286,21 @@ static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv, goto out; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); /* Clean up pages that were at zero (local) users but were still mapped * by remote domains. Since those pages count towards the limit that we * are about to enforce, removing them here is a good idea. */ do_cleanup(); if (gref_size + op.count > limit) { - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); rc = -ENOSPC; goto out_free; } gref_size += op.count; op.index = priv->index; priv->index += op.count * PAGE_SIZE; - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); rc = add_grefs(&op, gref_ids, priv); if (rc < 0) @@ -343,7 +343,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, goto dealloc_grant_out; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, op.index, op.count); if (gref) { /* Remove from the file list only, and decrease reference count. @@ -363,7 +363,7 @@ static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv, do_cleanup(); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); dealloc_grant_out: return rc; } @@ -383,7 +383,7 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, index = op.index & ~(PAGE_SIZE - 1); pgoff = op.index & (PAGE_SIZE - 1); - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, index, 1); if (!gref) { @@ -400,8 +400,9 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, gref->notify.pgoff = pgoff; gref->notify.event = op.event_channel_port; rc = 0; + unlock_out: - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rc; } @@ -433,9 +434,9 @@ static void gntalloc_vma_open(struct vm_area_struct *vma) if (!gref) return; - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref->users++; - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); } static void gntalloc_vma_close(struct vm_area_struct *vma) @@ -444,11 +445,11 @@ static void gntalloc_vma_close(struct vm_area_struct *vma) if (!gref) return; - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref->users--; if (gref->users == 0) __del_gref(gref); - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); } static struct vm_operations_struct gntalloc_vmops = { @@ -471,7 +472,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) return -EINVAL; } - spin_lock(&gref_lock); + mutex_lock(&gref_mutex); gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); if (gref == NULL) { rv = -ENOENT; @@ -499,7 +500,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) rv = 0; out_unlock: - spin_unlock(&gref_lock); + mutex_unlock(&gref_mutex); return rv; } -- cgit v1.2.3 From 0cc678f850f2cba0cedbd133fcbbf175554cd6c6 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 27 Oct 2011 17:58:49 -0400 Subject: xen/gnt{dev,alloc}: reserve event channels for notify When using the unmap notify ioctl, the event channel used for notification needs to be reserved to avoid it being deallocated prior to sending the notification. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 21 ++++++++++++++++++++- drivers/xen/gntdev.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index 439352d094db..c95181f43a6a 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -178,8 +178,10 @@ static void __del_gref(struct gntalloc_gref *gref) tmp[gref->notify.pgoff] = 0; kunmap(gref->page); } - if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(gref->notify.event); + evtchn_put(gref->notify.event); + } gref->notify.flags = 0; @@ -396,6 +398,23 @@ static long gntalloc_ioctl_unmap_notify(struct gntalloc_file_private_data *priv, goto unlock_out; } + /* We need to grab a reference to the event channel we are going to use + * to send the notify before releasing the reference we may already have + * (if someone has called this ioctl twice). This is required so that + * it is possible to change the clear_byte part of the notification + * without disturbing the event channel part, which may now be the last + * reference to that event channel. + */ + if (op.action & UNMAP_NOTIFY_SEND_EVENT) { + if (evtchn_get(op.event_channel_port)) { + rc = -EINVAL; + goto unlock_out; + } + } + + if (gref->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + evtchn_put(gref->notify.event); + gref->notify.flags = op.action; gref->notify.pgoff = pgoff; gref->notify.event = op.event_channel_port; diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index 39871326afa2..a7308559a26a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -193,8 +193,10 @@ static void gntdev_put_map(struct grant_map *map) atomic_sub(map->count, &pages_mapped); - if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) + if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) { notify_remote_via_evtchn(map->notify.event); + evtchn_put(map->notify.event); + } if (map->pages) { if (!use_ptemod) @@ -599,6 +601,8 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) struct ioctl_gntdev_unmap_notify op; struct grant_map *map; int rc; + int out_flags; + unsigned int out_event; if (copy_from_user(&op, u, sizeof(op))) return -EFAULT; @@ -606,6 +610,21 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT)) return -EINVAL; + /* We need to grab a reference to the event channel we are going to use + * to send the notify before releasing the reference we may already have + * (if someone has called this ioctl twice). This is required so that + * it is possible to change the clear_byte part of the notification + * without disturbing the event channel part, which may now be the last + * reference to that event channel. + */ + if (op.action & UNMAP_NOTIFY_SEND_EVENT) { + if (evtchn_get(op.event_channel_port)) + return -EINVAL; + } + + out_flags = op.action; + out_event = op.event_channel_port; + spin_lock(&priv->lock); list_for_each_entry(map, &priv->maps, next) { @@ -624,12 +643,22 @@ static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u) goto unlock_out; } + out_flags = map->notify.flags; + out_event = map->notify.event; + map->notify.flags = op.action; map->notify.addr = op.index - (map->index << PAGE_SHIFT); map->notify.event = op.event_channel_port; + rc = 0; + unlock_out: spin_unlock(&priv->lock); + + /* Drop the reference to the event channel we did not save in the map */ + if (out_flags & UNMAP_NOTIFY_SEND_EVENT) + evtchn_put(out_event); + return rc; } -- cgit v1.2.3 From 420eb554d5ee6daad743d8190383219f757dd66c Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Thu, 27 Oct 2011 17:58:47 -0400 Subject: xen/event: Add reference counting to event channels Event channels exposed to userspace by the evtchn module may be used by other modules in an asynchronous manner, which requires that reference counting be used to prevent the event channel from being closed before the signals are delivered. The reference count on new event channels defaults to -1 which indicates the event channel is not referenced outside the kernel; evtchn_get fails if called on such an event channel. The event channels made visible to userspace by evtchn have a normal reference count. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++- drivers/xen/evtchn.c | 2 +- 2 files changed, 74 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 6e075cdd0c6b..a3bcd6175f4a 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -87,6 +87,7 @@ enum xen_irq_type { */ struct irq_info { struct list_head list; + int refcnt; enum xen_irq_type type; /* type */ unsigned irq; unsigned short evtchn; /* event channel */ @@ -406,6 +407,7 @@ static void xen_irq_init(unsigned irq) panic("Unable to allocate metadata for IRQ%d\n", irq); info->type = IRQT_UNBOUND; + info->refcnt = -1; irq_set_handler_data(irq, info); @@ -469,6 +471,8 @@ static void xen_free_irq(unsigned irq) irq_set_handler_data(irq, NULL); + WARN_ON(info->refcnt > 0); + kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ @@ -637,7 +641,7 @@ int xen_bind_pirq_gsi_to_irq(unsigned gsi, if (irq != -1) { printk(KERN_INFO "xen_map_pirq_gsi: returning irq %d for gsi %u\n", irq, gsi); - goto out; /* XXX need refcount? */ + goto out; } irq = xen_allocate_irq_gsi(gsi); @@ -939,9 +943,16 @@ static void unbind_from_irq(unsigned int irq) { struct evtchn_close close; int evtchn = evtchn_from_irq(irq); + struct irq_info *info = irq_get_handler_data(irq); mutex_lock(&irq_mapping_update_lock); + if (info->refcnt > 0) { + info->refcnt--; + if (info->refcnt != 0) + goto done; + } + if (VALID_EVTCHN(evtchn)) { close.port = evtchn; if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) @@ -970,6 +981,7 @@ static void unbind_from_irq(unsigned int irq) xen_free_irq(irq); + done: mutex_unlock(&irq_mapping_update_lock); } @@ -1065,6 +1077,66 @@ void unbind_from_irqhandler(unsigned int irq, void *dev_id) } EXPORT_SYMBOL_GPL(unbind_from_irqhandler); +int evtchn_make_refcounted(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + struct irq_info *info; + + if (irq == -1) + return -ENOENT; + + info = irq_get_handler_data(irq); + + if (!info) + return -ENOENT; + + WARN_ON(info->refcnt != -1); + + info->refcnt = 1; + + return 0; +} +EXPORT_SYMBOL_GPL(evtchn_make_refcounted); + +int evtchn_get(unsigned int evtchn) +{ + int irq; + struct irq_info *info; + int err = -ENOENT; + + mutex_lock(&irq_mapping_update_lock); + + irq = evtchn_to_irq[evtchn]; + if (irq == -1) + goto done; + + info = irq_get_handler_data(irq); + + if (!info) + goto done; + + err = -EINVAL; + if (info->refcnt <= 0) + goto done; + + info->refcnt++; + err = 0; + done: + mutex_unlock(&irq_mapping_update_lock); + + return err; +} +EXPORT_SYMBOL_GPL(evtchn_get); + +void evtchn_put(unsigned int evtchn) +{ + int irq = evtchn_to_irq[evtchn]; + if (WARN_ON(irq == -1)) + return; + unbind_from_irq(irq); +} +EXPORT_SYMBOL_GPL(evtchn_put); + void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector) { int irq = per_cpu(ipi_to_irq, cpu)[vector]; diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c index dbc13e94b612..b1f60a0c0bea 100644 --- a/drivers/xen/evtchn.c +++ b/drivers/xen/evtchn.c @@ -268,7 +268,7 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port) rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, u->name, (void *)(unsigned long)port); if (rc >= 0) - rc = 0; + rc = evtchn_make_refcounted(port); return rc; } -- cgit v1.2.3 From 0f9f5a9588468cddeccc9146b86798492c7cd4f5 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:58:06 +0800 Subject: xen/granttable: Introducing grant table V2 stucture This patch introduces new structures of grant table V2, grant table V2 is an extension from V1. Grant table is shared between guest and Xen, and Xen is responsible to do corresponding work for grant operations, such as: figure out guest's grant table version, perform different actions based on different grant table version, etc. Although full-page structure of V2 is different from V1, it play the same role as V1. Acked-by: Ian Campbell Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 181 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 141 insertions(+), 40 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index bf1c094f4ebf..18355a53763f 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -53,7 +53,7 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry)) +#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry_v1)) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; @@ -64,7 +64,63 @@ static DEFINE_SPINLOCK(gnttab_list_lock); unsigned long xen_hvm_resume_frames; EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); -static struct grant_entry *shared; +static union { + struct grant_entry_v1 *v1; + void *addr; +} gnttab_shared; + +/*This is a structure of function pointers for grant table*/ +struct gnttab_ops { + /* + * Mapping a list of frames for storing grant entries. First input + * parameter is used to storing grant table address when grant table + * being setup, second parameter is the number of frames to map grant + * table. Returning GNTST_okay means success and negative value means + * failure. + */ + int (*map_frames)(unsigned long *, unsigned int); + /* + * Release a list of frames which are mapped in map_frames for grant + * entry status. + */ + void (*unmap_frames)(void); + /* + * Introducing a valid entry into the grant table, granting the frame + * of this grant entry to domain for accessing, or transfering, or + * transitively accessing. First input parameter is reference of this + * introduced grant entry, second one is domid of granted domain, third + * one is the frame to be granted, and the last one is status of the + * grant entry to be updated. + */ + void (*update_entry)(grant_ref_t, domid_t, unsigned long, unsigned); + /* + * Stop granting a grant entry to domain for accessing. First input + * parameter is reference of a grant entry whose grant access will be + * stopped, second one is not in use now. If the grant entry is + * currently mapped for reading or writing, just return failure(==0) + * directly and don't tear down the grant access. Otherwise, stop grant + * access for this entry and return success(==1). + */ + int (*end_foreign_access_ref)(grant_ref_t, int); + /* + * Stop granting a grant entry to domain for transfer. If tranfer has + * not started, just reclaim the grant entry and return failure(==0). + * Otherwise, wait for the transfer to complete and then return the + * frame. + */ + unsigned long (*end_foreign_transfer_ref)(grant_ref_t); + /* + * Query the status of a grant entry. Input parameter is reference of + * queried grant entry, return value is the status of queried entry. + * Detailed status(writing/reading) can be gotten from the return value + * by bit operations. + */ + int (*query_foreign_access)(grant_ref_t); +}; + +static struct gnttab_ops *gnttab_interface; + +static int grant_table_version; static struct gnttab_free_callback *gnttab_free_callback_list; @@ -142,23 +198,23 @@ static void put_free_entry(grant_ref_t ref) spin_unlock_irqrestore(&gnttab_list_lock, flags); } -static void update_grant_entry(grant_ref_t ref, domid_t domid, - unsigned long frame, unsigned flags) +/* + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + */ +static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags) { - /* - * Introducing a valid entry into the grant table: - * 1. Write ent->domid. - * 2. Write ent->frame: - * GTF_permit_access: Frame to which access is permitted. - * GTF_accept_transfer: Pseudo-phys frame slot being filled by new - * frame, or zero if none. - * 3. Write memory barrier (WMB). - * 4. Write ent->flags, inc. valid type. - */ - shared[ref].frame = frame; - shared[ref].domid = domid; + gnttab_shared.v1[ref].domid = domid; + gnttab_shared.v1[ref].frame = frame; wmb(); - shared[ref].flags = flags; + gnttab_shared.v1[ref].flags = flags; } /* @@ -167,7 +223,7 @@ static void update_grant_entry(grant_ref_t ref, domid_t domid, void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, unsigned long frame, int readonly) { - update_grant_entry(ref, domid, frame, + gnttab_interface->update_entry(ref, domid, frame, GTF_permit_access | (readonly ? GTF_readonly : 0)); } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref); @@ -187,31 +243,37 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); -int gnttab_query_foreign_access(grant_ref_t ref) +static int gnttab_query_foreign_access_v1(grant_ref_t ref) { - u16 nflags; - - nflags = shared[ref].flags; + return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); +} - return nflags & (GTF_reading|GTF_writing); +int gnttab_query_foreign_access(grant_ref_t ref) +{ + return gnttab_interface->query_foreign_access(ref); } EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); -int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; - nflags = shared[ref].flags; + nflags = gnttab_shared.v1[ref].flags; do { flags = nflags; if (flags & (GTF_reading|GTF_writing)) { printk(KERN_ALERT "WARNING: g.e. still in use!\n"); return 0; } - } while ((nflags = sync_cmpxchg(&shared[ref].flags, flags, 0)) != flags); + } while ((nflags = sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0)) != flags); return 1; } + +int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) +{ + return gnttab_interface->end_foreign_access_ref(ref, readonly); +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref); void gnttab_end_foreign_access(grant_ref_t ref, int readonly, @@ -246,11 +308,11 @@ EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer); void gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, unsigned long pfn) { - update_grant_entry(ref, domid, pfn, GTF_accept_transfer); + gnttab_interface->update_entry(ref, domid, pfn, GTF_accept_transfer); } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_transfer_ref); -unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) { unsigned long frame; u16 flags; @@ -259,24 +321,29 @@ unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ - while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { - if (sync_cmpxchg(&shared[ref].flags, flags, 0) == flags) + while (!((flags = gnttab_shared.v1[ref].flags) & GTF_transfer_committed)) { + if (sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0) == flags) return 0; cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = shared[ref].flags; + flags = gnttab_shared.v1[ref].flags; cpu_relax(); } rmb(); /* Read the frame number /after/ reading completion status. */ - frame = shared[ref].frame; + frame = gnttab_shared.v1[ref].frame; BUG_ON(frame == 0); return frame; } + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) +{ + return gnttab_interface->end_foreign_transfer_ref(ref); +} EXPORT_SYMBOL_GPL(gnttab_end_foreign_transfer_ref); unsigned long gnttab_end_foreign_transfer(grant_ref_t ref) @@ -520,6 +587,23 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes) +{ + int rc; + + rc = arch_gnttab_map_shared(frames, nr_gframes, + gnttab_max_grant_frames(), + &gnttab_shared.addr); + BUG_ON(rc); + + return 0; +} + +static void gnttab_unmap_frames_v1(void) +{ + arch_gnttab_unmap_shared(gnttab_shared.addr, nr_grant_frames); +} + static int gnttab_map(unsigned int start_idx, unsigned int end_idx) { struct gnttab_setup_table setup; @@ -567,19 +651,35 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) BUG_ON(rc || setup.status); - rc = arch_gnttab_map_shared(frames, nr_gframes, gnttab_max_grant_frames(), - &shared); - BUG_ON(rc); + rc = gnttab_interface->map_frames(frames, nr_gframes); kfree(frames); - return 0; + return rc; +} + +static struct gnttab_ops gnttab_v1_ops = { + .map_frames = gnttab_map_frames_v1, + .unmap_frames = gnttab_unmap_frames_v1, + .update_entry = gnttab_update_entry_v1, + .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1, + .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v1, + .query_foreign_access = gnttab_query_foreign_access_v1, +}; + +static void gnttab_request_version(void) +{ + grant_table_version = 1; + gnttab_interface = &gnttab_v1_ops; + printk(KERN_INFO "Grant tables using version %d layout.\n", + grant_table_version); } int gnttab_resume(void) { unsigned int max_nr_gframes; + gnttab_request_version(); max_nr_gframes = gnttab_max_grant_frames(); if (max_nr_gframes < nr_grant_frames) return -ENOSYS; @@ -587,9 +687,10 @@ int gnttab_resume(void) if (xen_pv_domain()) return gnttab_map(0, nr_grant_frames - 1); - if (!shared) { - shared = ioremap(xen_hvm_resume_frames, PAGE_SIZE * max_nr_gframes); - if (shared == NULL) { + if (gnttab_shared.addr == NULL) { + gnttab_shared.addr = ioremap(xen_hvm_resume_frames, + PAGE_SIZE * max_nr_gframes); + if (gnttab_shared.addr == NULL) { printk(KERN_WARNING "Failed to ioremap gnttab share frames!"); return -ENOMEM; @@ -603,7 +704,7 @@ int gnttab_resume(void) int gnttab_suspend(void) { - arch_gnttab_unmap_shared(shared, nr_grant_frames); + gnttab_interface->unmap_frames(); return 0; } -- cgit v1.2.3 From b1e495b2fae578b1bd3ab1906cb15aac43f96fee Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:58:47 +0800 Subject: xen/granttable: Refactor some code Acked-by: Ian Campbell Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 18355a53763f..0518d0404942 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -257,15 +257,17 @@ EXPORT_SYMBOL_GPL(gnttab_query_foreign_access); static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) { u16 flags, nflags; + u16 *pflags; - nflags = gnttab_shared.v1[ref].flags; + pflags = &gnttab_shared.v1[ref].flags; + nflags = *pflags; do { flags = nflags; if (flags & (GTF_reading|GTF_writing)) { printk(KERN_ALERT "WARNING: g.e. still in use!\n"); return 0; } - } while ((nflags = sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0)) != flags); + } while ((nflags = sync_cmpxchg(pflags, flags, 0)) != flags); return 1; } @@ -316,20 +318,23 @@ static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) { unsigned long frame; u16 flags; + u16 *pflags; + + pflags = &gnttab_shared.v1[ref].flags; /* * If a transfer is not even yet started, try to reclaim the grant * reference and return failure (== 0). */ - while (!((flags = gnttab_shared.v1[ref].flags) & GTF_transfer_committed)) { - if (sync_cmpxchg(&gnttab_shared.v1[ref].flags, flags, 0) == flags) + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (sync_cmpxchg(pflags, flags, 0) == flags) return 0; cpu_relax(); } /* If a transfer is in progress then wait until it is completed. */ while (!(flags & GTF_transfer_completed)) { - flags = gnttab_shared.v1[ref].flags; + flags = *pflags; cpu_relax(); } -- cgit v1.2.3 From 85ff6acb075a484780b3d763fdf41596d8fc0970 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:59:21 +0800 Subject: xen/granttable: Grant tables V2 implementation Receiver-side copying of packets is based on this implementation, it gives better performance and better CPU accounting. It totally supports three types: full-page, sub-page and transitive grants. However this patch does not cover sub-page and transitive grants, it mainly focus on Full-page part and implements grant table V2 interfaces corresponding to what already exists in grant table V1, such as: grant table V2 initialization, mapping, releasing and exported interfaces. Each guest can only supports one type of grant table type, every entry in grant table should be the same version. It is necessary to set V1 or V2 version before initializing the grant table. Grant table exported interfaces of V2 are same with those of V1, Xen is responsible to judge what grant table version guests are using in every grant operation. V2 fulfills the same role of V1, and it is totally backwards compitable with V1. If dom0 support grant table V2, the guests runing on it can run with either V1 or V2. Acked-by: Ian Campbell Signed-off-by: Annie Li [v1: Modified alloc_vm_area call (new parameters), indentation, and cleanpatch warnings] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 171 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 167 insertions(+), 4 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 0518d0404942..301869f60dc7 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -53,7 +54,10 @@ /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff -#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(struct grant_entry_v1)) +#define GREFS_PER_GRANT_FRAME \ +(grant_table_version == 1 ? \ +(PAGE_SIZE / sizeof(struct grant_entry_v1)) : \ +(PAGE_SIZE / sizeof(union grant_entry_v2))) static grant_ref_t **gnttab_list; static unsigned int nr_grant_frames; @@ -66,6 +70,7 @@ EXPORT_SYMBOL_GPL(xen_hvm_resume_frames); static union { struct grant_entry_v1 *v1; + union grant_entry_v2 *v2; void *addr; } gnttab_shared; @@ -120,6 +125,9 @@ struct gnttab_ops { static struct gnttab_ops *gnttab_interface; +/*This reflects status of grant entries, so act as a global value*/ +static grant_status_t *grstatus; + static int grant_table_version; static struct gnttab_free_callback *gnttab_free_callback_list; @@ -127,6 +135,7 @@ static struct gnttab_free_callback *gnttab_free_callback_list; static int gnttab_expand(unsigned int req_entries); #define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define SPP (PAGE_SIZE / sizeof(grant_status_t)) static inline grant_ref_t *__gnttab_entry(grant_ref_t entry) { @@ -199,6 +208,7 @@ static void put_free_entry(grant_ref_t ref) } /* + * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2. * Introducing a valid entry into the grant table: * 1. Write ent->domid. * 2. Write ent->frame: @@ -217,6 +227,15 @@ static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid, gnttab_shared.v1[ref].flags = flags; } +static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags) +{ + gnttab_shared.v2[ref].hdr.domid = domid; + gnttab_shared.v2[ref].full_page.frame = frame; + wmb(); + gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags; +} + /* * Public grant-issuing interface functions */ @@ -248,6 +267,11 @@ static int gnttab_query_foreign_access_v1(grant_ref_t ref) return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); } +static int gnttab_query_foreign_access_v2(grant_ref_t ref) +{ + return grstatus[ref] & (GTF_reading|GTF_writing); +} + int gnttab_query_foreign_access(grant_ref_t ref) { return gnttab_interface->query_foreign_access(ref); @@ -272,6 +296,29 @@ static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly) return 1; } +static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref, int readonly) +{ + gnttab_shared.v2[ref].hdr.flags = 0; + mb(); + if (grstatus[ref] & (GTF_reading|GTF_writing)) { + return 0; + } else { + /* The read of grstatus needs to have acquire + semantics. On x86, reads already have + that, and we just need to protect against + compiler reorderings. On other + architectures we may need a full + barrier. */ +#ifdef CONFIG_X86 + barrier(); +#else + mb(); +#endif + } + + return 1; +} + int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly) { return gnttab_interface->end_foreign_access_ref(ref, readonly); @@ -345,6 +392,37 @@ static unsigned long gnttab_end_foreign_transfer_ref_v1(grant_ref_t ref) return frame; } +static unsigned long gnttab_end_foreign_transfer_ref_v2(grant_ref_t ref) +{ + unsigned long frame; + u16 flags; + u16 *pflags; + + pflags = &gnttab_shared.v2[ref].hdr.flags; + + /* + * If a transfer is not even yet started, try to reclaim the grant + * reference and return failure (== 0). + */ + while (!((flags = *pflags) & GTF_transfer_committed)) { + if (sync_cmpxchg(pflags, flags, 0) == flags) + return 0; + cpu_relax(); + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = *pflags; + cpu_relax(); + } + + rmb(); /* Read the frame number /after/ reading completion status. */ + frame = gnttab_shared.v2[ref].full_page.frame; + BUG_ON(frame == 0); + + return frame; +} + unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref) { return gnttab_interface->end_foreign_transfer_ref(ref); @@ -592,6 +670,11 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, } EXPORT_SYMBOL_GPL(gnttab_unmap_refs); +static unsigned nr_status_frames(unsigned nr_grant_frames) +{ + return (nr_grant_frames * GREFS_PER_GRANT_FRAME + SPP - 1) / SPP; +} + static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes) { int rc; @@ -606,7 +689,56 @@ static int gnttab_map_frames_v1(unsigned long *frames, unsigned int nr_gframes) static void gnttab_unmap_frames_v1(void) { - arch_gnttab_unmap_shared(gnttab_shared.addr, nr_grant_frames); + arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); +} + +static int gnttab_map_frames_v2(unsigned long *frames, unsigned int nr_gframes) +{ + uint64_t *sframes; + unsigned int nr_sframes; + struct gnttab_get_status_frames getframes; + int rc; + + nr_sframes = nr_status_frames(nr_gframes); + + /* No need for kzalloc as it is initialized in following hypercall + * GNTTABOP_get_status_frames. + */ + sframes = kmalloc(nr_sframes * sizeof(uint64_t), GFP_ATOMIC); + if (!sframes) + return -ENOMEM; + + getframes.dom = DOMID_SELF; + getframes.nr_frames = nr_sframes; + set_xen_guest_handle(getframes.frame_list, sframes); + + rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames, + &getframes, 1); + if (rc == -ENOSYS) { + kfree(sframes); + return -ENOSYS; + } + + BUG_ON(rc || getframes.status); + + rc = arch_gnttab_map_status(sframes, nr_sframes, + nr_status_frames(gnttab_max_grant_frames()), + &grstatus); + BUG_ON(rc); + kfree(sframes); + + rc = arch_gnttab_map_shared(frames, nr_gframes, + gnttab_max_grant_frames(), + &gnttab_shared.addr); + BUG_ON(rc); + + return 0; +} + +static void gnttab_unmap_frames_v2(void) +{ + arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames); + arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames)); } static int gnttab_map(unsigned int start_idx, unsigned int end_idx) @@ -640,6 +772,9 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx) return rc; } + /* No need for kzalloc as it is initialized in following hypercall + * GNTTABOP_setup_table. + */ frames = kmalloc(nr_gframes * sizeof(unsigned long), GFP_ATOMIC); if (!frames) return -ENOMEM; @@ -672,10 +807,38 @@ static struct gnttab_ops gnttab_v1_ops = { .query_foreign_access = gnttab_query_foreign_access_v1, }; +static struct gnttab_ops gnttab_v2_ops = { + .map_frames = gnttab_map_frames_v2, + .unmap_frames = gnttab_unmap_frames_v2, + .update_entry = gnttab_update_entry_v2, + .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, + .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, + .query_foreign_access = gnttab_query_foreign_access_v2, +}; + static void gnttab_request_version(void) { - grant_table_version = 1; - gnttab_interface = &gnttab_v1_ops; + int rc; + struct gnttab_set_version gsv; + + gsv.version = 2; + rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1); + if (rc == 0) { + grant_table_version = 2; + gnttab_interface = &gnttab_v2_ops; + } else if (grant_table_version == 2) { + /* + * If we've already used version 2 features, + * but then suddenly discover that they're not + * available (e.g. migrating to an older + * version of Xen), almost unbounded badness + * can happen. + */ + panic("we need grant tables version 2, but only version 1 is available"); + } else { + grant_table_version = 1; + gnttab_interface = &gnttab_v1_ops; + } printk(KERN_INFO "Grant tables using version %d layout.\n", grant_table_version); } -- cgit v1.2.3 From c123799a41bf466ce5b199331aac4c1f28f67ec3 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Tue, 22 Nov 2011 09:59:56 +0800 Subject: xen/granttable: Keep code format clean Acked-by: Ian Campbell Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 301869f60dc7..bd325fd0000b 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -50,7 +50,6 @@ #include #include - /* External tools reserve first few grant table entries. */ #define NR_RESERVED_ENTRIES 8 #define GNTTAB_LIST_END 0xffffffff @@ -598,8 +597,8 @@ unsigned int gnttab_max_grant_frames(void) EXPORT_SYMBOL_GPL(gnttab_max_grant_frames); int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, - struct gnttab_map_grant_ref *kmap_ops, - struct page **pages, unsigned int count) + struct gnttab_map_grant_ref *kmap_ops, + struct page **pages, unsigned int count) { int i, ret; pte_t *pte; @@ -649,7 +648,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count) { int i, ret; -- cgit v1.2.3 From f21ffe9f6da6d3a69c518b7345c198d48d941c34 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 11 Aug 2011 16:50:56 -0400 Subject: swiotlb: Expose swiotlb_nr_tlb function to modules As a mechanism to detect whether SWIOTLB is enabled or not. We also fix the spelling - it was swioltb instead of swiotlb. CC: FUJITA Tomonori [v1: Ripped out swiotlb_enabled] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 8e964b91c447..4864e5d72e72 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -153,7 +153,7 @@ void __init xen_swiotlb_init(int verbose) char *m = NULL; unsigned int repeat = 3; - nr_tbl = swioltb_nr_tbl(); + nr_tbl = swiotlb_nr_tbl(); if (nr_tbl) xen_io_tlb_nslabs = nr_tbl; else { -- cgit v1.2.3 From 63a741757d15320a25ebf5778f8651cce2ed0611 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Thu, 15 Dec 2011 11:28:46 -0500 Subject: xen/swiotlb: Use page alignment for early buffer allocation. This fixes an odd bug found on a Dell PowerEdge 1850/0RC130 (BIOS A05 01/09/2006) where all of the modules doing pci_set_dma_mask would fail with: ata_piix 0000:00:1f.1: enabling device (0005 -> 0007) ata_piix 0000:00:1f.1: can't derive routing for PCI INT A ata_piix 0000:00:1f.1: BMDMA: failed to set dma mask, falling back to PIO The issue was the Xen-SWIOTLB was allocated such as that the end of buffer was stradling a page (and also above 4GB). The fix was spotted by Kalev Leonid which was to piggyback on git commit e79f86b2ef9c0a8c47225217c1018b7d3d90101c "swiotlb: Use page alignment for early buffer allocation" which: We could call free_bootmem_late() if swiotlb is not used, and it will shrink to page alignment. So alloc them with page alignment at first, to avoid lose two pages And doing that fixes the outstanding issue. CC: stable@kernel.org Suggested-by: "Kalev, Leonid" Reported-and-Tested-by: "Taylor, Neal E" Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/swiotlb-xen.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 8e964b91c447..284798aaf8b1 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -166,7 +166,7 @@ retry: /* * Get IO TLB memory from any location. */ - xen_io_tlb_start = alloc_bootmem(bytes); + xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes)); if (!xen_io_tlb_start) { m = "Cannot allocate Xen-SWIOTLB buffer!\n"; goto error; @@ -179,7 +179,7 @@ retry: bytes, xen_io_tlb_nslabs); if (rc) { - free_bootmem(__pa(xen_io_tlb_start), bytes); + free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes)); m = "Failed to get contiguous memory for DMA from Xen!\n"\ "You either: don't have the permissions, do not have"\ " enough free memory under 4GB, or the hypervisor memory"\ -- cgit v1.2.3 From cb0c05c5fae12eeb7c85c205578df277bd706155 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 15 Dec 2011 11:07:02 -0800 Subject: xen: fix build breakage in xen-selfballoon.c caused by sysdev conversion This adds the needed include file for xen-selfballoon.c to fix the build error reported by Stephen Rothwell. Reported-by: Stephen Rothwell Cc: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/xen/xen-selfballoon.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen') diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c index b7b9e95f8717..767ff656d5a7 100644 --- a/drivers/xen/xen-selfballoon.c +++ b/drivers/xen/xen-selfballoon.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 9dbc71d53ce4e0260d0a8307838cd9ebddc07a12 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 12 Dec 2011 18:13:57 +0800 Subject: xen/granttable: Improve comments for function pointers Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 48 +++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index bd325fd0000b..1589ea1a2445 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -76,50 +76,50 @@ static union { /*This is a structure of function pointers for grant table*/ struct gnttab_ops { /* - * Mapping a list of frames for storing grant entries. First input - * parameter is used to storing grant table address when grant table - * being setup, second parameter is the number of frames to map grant - * table. Returning GNTST_okay means success and negative value means - * failure. + * Mapping a list of frames for storing grant entries. Frames parameter + * is used to store grant table address when grant table being setup, + * nr_gframes is the number of frames to map grant table. Returning + * GNTST_okay means success and negative value means failure. */ - int (*map_frames)(unsigned long *, unsigned int); + int (*map_frames)(unsigned long *frames, unsigned int nr_gframes); /* * Release a list of frames which are mapped in map_frames for grant * entry status. */ void (*unmap_frames)(void); /* - * Introducing a valid entry into the grant table, granting the frame - * of this grant entry to domain for accessing, or transfering, or - * transitively accessing. First input parameter is reference of this - * introduced grant entry, second one is domid of granted domain, third - * one is the frame to be granted, and the last one is status of the - * grant entry to be updated. + * Introducing a valid entry into the grant table, granting the frame of + * this grant entry to domain for accessing or transfering. Ref + * parameter is reference of this introduced grant entry, domid is id of + * granted domain, frame is the page frame to be granted, and flags is + * status of the grant entry to be updated. */ - void (*update_entry)(grant_ref_t, domid_t, unsigned long, unsigned); + void (*update_entry)(grant_ref_t ref, domid_t domid, + unsigned long frame, unsigned flags); /* - * Stop granting a grant entry to domain for accessing. First input - * parameter is reference of a grant entry whose grant access will be - * stopped, second one is not in use now. If the grant entry is + * Stop granting a grant entry to domain for accessing. Ref parameter is + * reference of a grant entry whose grant access will be stopped, + * readonly is not in use in this function. If the grant entry is * currently mapped for reading or writing, just return failure(==0) * directly and don't tear down the grant access. Otherwise, stop grant * access for this entry and return success(==1). */ - int (*end_foreign_access_ref)(grant_ref_t, int); + int (*end_foreign_access_ref)(grant_ref_t ref, int readonly); /* - * Stop granting a grant entry to domain for transfer. If tranfer has - * not started, just reclaim the grant entry and return failure(==0). - * Otherwise, wait for the transfer to complete and then return the - * frame. + * Stop granting a grant entry to domain for transfer. Ref parameter is + * reference of a grant entry whose grant transfer will be stopped. If + * tranfer has not started, just reclaim the grant entry and return + * failure(==0). Otherwise, wait for the transfer to complete and then + * return the frame. */ - unsigned long (*end_foreign_transfer_ref)(grant_ref_t); + unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref); /* - * Query the status of a grant entry. Input parameter is reference of + * Query the status of a grant entry. Ref parameter is reference of * queried grant entry, return value is the status of queried entry. * Detailed status(writing/reading) can be gotten from the return value * by bit operations. */ - int (*query_foreign_access)(grant_ref_t); + int (*query_foreign_access)(grant_ref_t ref); }; static struct gnttab_ops *gnttab_interface; -- cgit v1.2.3 From 6666754b11297526c699f8df63c52d50c24fe946 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 12 Dec 2011 18:14:42 +0800 Subject: xen/granttable: Support sub-page grants - They can't be used to map the page (so can only be used in a GNTTABOP_copy hypercall). - It's possible to grant access with a finer granularity than whole pages. - Xen guarantees that they can be revoked quickly (a normal map grant can only be revoked with the cooperation of the domain which has been granted access). Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index 1589ea1a2445..c8312c7056f1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -120,6 +120,17 @@ struct gnttab_ops { * by bit operations. */ int (*query_foreign_access)(grant_ref_t ref); + /* + * Grant a domain to access a range of bytes within the page referred by + * an available grant entry. Ref parameter is reference of a grant entry + * which will be sub-page accessed, domid is id of grantee domain, frame + * is frame address of subpage grant, flags is grant type and flag + * information, page_off is offset of the range of bytes, and length is + * length of bytes to be accessed. + */ + void (*update_subpage_entry)(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, unsigned length); }; static struct gnttab_ops *gnttab_interface; @@ -261,6 +272,66 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, } EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access); +void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) +{ + gnttab_shared.v2[ref].sub_page.frame = frame; + gnttab_shared.v2[ref].sub_page.page_off = page_off; + gnttab_shared.v2[ref].sub_page.length = length; + gnttab_shared.v2[ref].hdr.domid = domid; + wmb(); + gnttab_shared.v2[ref].hdr.flags = + GTF_permit_access | GTF_sub_page | flags; +} + +int gnttab_grant_foreign_access_subpage_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags, + unsigned page_off, + unsigned length) +{ + if (flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_transitive)) + return -EPERM; + + if (gnttab_interface->update_subpage_entry == NULL) + return -ENOSYS; + + gnttab_interface->update_subpage_entry(ref, domid, frame, flags, + page_off, length); + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage_ref); + +int gnttab_grant_foreign_access_subpage(domid_t domid, unsigned long frame, + int flags, unsigned page_off, + unsigned length) +{ + int ref, rc; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + + rc = gnttab_grant_foreign_access_subpage_ref(ref, domid, frame, flags, + page_off, length); + if (rc < 0) { + put_free_entry(ref); + return rc; + } + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_subpage); + +bool gnttab_subpage_grants_available(void) +{ + return gnttab_interface->update_subpage_entry != NULL; +} +EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); + static int gnttab_query_foreign_access_v1(grant_ref_t ref) { return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); @@ -813,6 +884,7 @@ static struct gnttab_ops gnttab_v2_ops = { .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2, .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, .query_foreign_access = gnttab_query_foreign_access_v2, + .update_subpage_entry = gnttab_update_subpage_entry_v2, }; static void gnttab_request_version(void) -- cgit v1.2.3 From 9438ce9dbbf512fd717051284f568d5cb35e5cf1 Mon Sep 17 00:00:00 2001 From: Annie Li Date: Mon, 12 Dec 2011 18:15:07 +0800 Subject: xen/granttable: Support transitive grants These allow a domain A which has been granted access on a page of domain B's memory to issue domain C with a copy-grant on the same page. This is useful e.g. for forwarding packets between domains. Signed-off-by: Annie Li Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/grant-table.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index c8312c7056f1..a3d0e1e278c1 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -131,6 +131,18 @@ struct gnttab_ops { void (*update_subpage_entry)(grant_ref_t ref, domid_t domid, unsigned long frame, int flags, unsigned page_off, unsigned length); + /* + * Redirect an available grant entry on domain A to another grant + * reference of domain B, then allow domain C to use grant reference + * of domain B transitively. Ref parameter is an available grant entry + * reference on domain A, domid is id of domain C which accesses grant + * entry transitively, flags is grant type and flag information, + * trans_domid is id of domain B whose grant entry is finally accessed + * transitively, trans_gref is grant entry transitive reference of + * domain B. + */ + void (*update_trans_entry)(grant_ref_t ref, domid_t domid, int flags, + domid_t trans_domid, grant_ref_t trans_gref); }; static struct gnttab_ops *gnttab_interface; @@ -332,6 +344,63 @@ bool gnttab_subpage_grants_available(void) } EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available); +void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) +{ + gnttab_shared.v2[ref].transitive.trans_domid = trans_domid; + gnttab_shared.v2[ref].transitive.gref = trans_gref; + gnttab_shared.v2[ref].hdr.domid = domid; + wmb(); + gnttab_shared.v2[ref].hdr.flags = + GTF_permit_access | GTF_transitive | flags; +} + +int gnttab_grant_foreign_access_trans_ref(grant_ref_t ref, domid_t domid, + int flags, domid_t trans_domid, + grant_ref_t trans_gref) +{ + if (flags & (GTF_accept_transfer | GTF_reading | + GTF_writing | GTF_sub_page)) + return -EPERM; + + if (gnttab_interface->update_trans_entry == NULL) + return -ENOSYS; + + gnttab_interface->update_trans_entry(ref, domid, flags, trans_domid, + trans_gref); + + return 0; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans_ref); + +int gnttab_grant_foreign_access_trans(domid_t domid, int flags, + domid_t trans_domid, + grant_ref_t trans_gref) +{ + int ref, rc; + + ref = get_free_entries(1); + if (unlikely(ref < 0)) + return -ENOSPC; + + rc = gnttab_grant_foreign_access_trans_ref(ref, domid, flags, + trans_domid, trans_gref); + if (rc < 0) { + put_free_entry(ref); + return rc; + } + + return ref; +} +EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_trans); + +bool gnttab_trans_grants_available(void) +{ + return gnttab_interface->update_trans_entry != NULL; +} +EXPORT_SYMBOL_GPL(gnttab_trans_grants_available); + static int gnttab_query_foreign_access_v1(grant_ref_t ref) { return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing); @@ -885,6 +954,7 @@ static struct gnttab_ops gnttab_v2_ops = { .end_foreign_transfer_ref = gnttab_end_foreign_transfer_ref_v2, .query_foreign_access = gnttab_query_foreign_access_v2, .update_subpage_entry = gnttab_update_subpage_entry_v2, + .update_trans_entry = gnttab_update_trans_entry_v2, }; static void gnttab_request_version(void) -- cgit v1.2.3 From c3b3f16d1bceb5ac5f21528f889810b4ac5a3596 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 28 Nov 2011 11:49:09 -0500 Subject: xen/events: prevent calling evtchn_get on invalid channels The event channel number provided to evtchn_get can be provided by userspace, so needs to be checked against the maximum number of event channels prior to using it to index into evtchn_to_irq. Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/events.c b/drivers/xen/events.c index a3bcd6175f4a..e5e5812a1014 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1104,6 +1104,9 @@ int evtchn_get(unsigned int evtchn) struct irq_info *info; int err = -ENOENT; + if (evtchn >= NR_EVENT_CHANNELS) + return -EINVAL; + mutex_lock(&irq_mapping_update_lock); irq = evtchn_to_irq[evtchn]; -- cgit v1.2.3 From 0105d2b4fbc24c2fb6ca9bae650784dd7ddf0b12 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 28 Nov 2011 11:49:10 -0500 Subject: xen/gntalloc: release grant references on page free gnttab_end_foreign_access_ref does not return the grant reference it is passed to the free list; gnttab_free_grant_reference needs to be explicitly called. While gnttab_end_foreign_access provides a wrapper for this, it is unsuitable because it does not return errors. Reported-by: Anil Madhavapeddy Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index c95181f43a6a..f330a4b8b685 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -191,6 +191,8 @@ static void __del_gref(struct gntalloc_gref *gref) if (!gnttab_end_foreign_access_ref(gref->gref_id, 0)) return; + + gnttab_free_grant_reference(gref->gref_id); } gref_size--; -- cgit v1.2.3 From 243082e0d59f169a1fa502f51ee5a820889fae93 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 28 Nov 2011 11:49:11 -0500 Subject: xen/gntalloc: fix reference counts on multi-page mappings When a multi-page mapping of gntalloc is created, the reference counts of all pages in the vma are incremented. However, the vma open/close operations only adjusted the reference count of the first page in the mapping, leaking the other pages. Store a struct in the vm_private_data to track the original page count to properly free the pages when the last reference to the vma is closed. Reported-by: Anil Madhavapeddy Signed-off-by: Daniel De Graaf Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 56 ++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 13 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index f330a4b8b685..e8ea56583b4c 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -99,6 +99,12 @@ struct gntalloc_file_private_data { uint64_t index; }; +struct gntalloc_vma_private_data { + struct gntalloc_gref *gref; + int users; + int count; +}; + static void __del_gref(struct gntalloc_gref *gref); static void do_cleanup(void) @@ -451,25 +457,39 @@ static long gntalloc_ioctl(struct file *filp, unsigned int cmd, static void gntalloc_vma_open(struct vm_area_struct *vma) { - struct gntalloc_gref *gref = vma->vm_private_data; - if (!gref) + struct gntalloc_vma_private_data *priv = vma->vm_private_data; + + if (!priv) return; mutex_lock(&gref_mutex); - gref->users++; + priv->users++; mutex_unlock(&gref_mutex); } static void gntalloc_vma_close(struct vm_area_struct *vma) { - struct gntalloc_gref *gref = vma->vm_private_data; - if (!gref) + struct gntalloc_vma_private_data *priv = vma->vm_private_data; + struct gntalloc_gref *gref, *next; + int i; + + if (!priv) return; mutex_lock(&gref_mutex); - gref->users--; - if (gref->users == 0) - __del_gref(gref); + priv->users--; + if (priv->users == 0) { + gref = priv->gref; + for (i = 0; i < priv->count; i++) { + gref->users--; + next = list_entry(gref->next_gref.next, + struct gntalloc_gref, next_gref); + if (gref->users == 0) + __del_gref(gref); + gref = next; + } + kfree(priv); + } mutex_unlock(&gref_mutex); } @@ -481,19 +501,25 @@ static struct vm_operations_struct gntalloc_vmops = { static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) { struct gntalloc_file_private_data *priv = filp->private_data; + struct gntalloc_vma_private_data *vm_priv; struct gntalloc_gref *gref; int count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; int rv, i; - pr_debug("%s: priv %p, page %lu+%d\n", __func__, - priv, vma->vm_pgoff, count); - if (!(vma->vm_flags & VM_SHARED)) { printk(KERN_ERR "%s: Mapping must be shared.\n", __func__); return -EINVAL; } + vm_priv = kmalloc(sizeof(*vm_priv), GFP_KERNEL); + if (!vm_priv) + return -ENOMEM; + mutex_lock(&gref_mutex); + + pr_debug("%s: priv %p,%p, page %lu+%d\n", __func__, + priv, vm_priv, vma->vm_pgoff, count); + gref = find_grefs(priv, vma->vm_pgoff << PAGE_SHIFT, count); if (gref == NULL) { rv = -ENOENT; @@ -502,9 +528,13 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) goto out_unlock; } - vma->vm_private_data = gref; + vm_priv->gref = gref; + vm_priv->users = 1; + vm_priv->count = count; + + vma->vm_private_data = vm_priv; - vma->vm_flags |= VM_RESERVED; + vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND; vma->vm_ops = &gntalloc_vmops; -- cgit v1.2.3 From d8414d3c157dc1f83e73c17447ba41fe5afa9d3d Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Fri, 16 Dec 2011 11:34:33 -0500 Subject: xen: Add privcmd device driver Access to arbitrary hypercalls is currently provided via xenfs. This adds a standard character device to handle this. The support in xenfs remains for backward compatibility and uses the device driver code. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/Kconfig | 7 + drivers/xen/Makefile | 2 + drivers/xen/privcmd.c | 437 ++++++++++++++++++++++++++++++++++++++++++++ drivers/xen/privcmd.h | 3 + drivers/xen/xenfs/Makefile | 2 +- drivers/xen/xenfs/privcmd.c | 400 ---------------------------------------- drivers/xen/xenfs/super.c | 3 +- drivers/xen/xenfs/xenfs.h | 1 - 8 files changed, 452 insertions(+), 403 deletions(-) create mode 100644 drivers/xen/privcmd.c create mode 100644 drivers/xen/privcmd.h delete mode 100644 drivers/xen/xenfs/privcmd.c (limited to 'drivers/xen') diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 8795480c2350..a1ced521cf74 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -86,6 +86,7 @@ config XEN_BACKEND config XENFS tristate "Xen filesystem" + select XEN_PRIVCMD default y help The xen filesystem provides a way for domains to share @@ -171,4 +172,10 @@ config XEN_PCIDEV_BACKEND xen-pciback.hide=(03:00.0)(04:00.0) If in doubt, say m. + +config XEN_PRIVCMD + tristate + depends on XEN + default m + endmenu diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile index 974fffdf22b2..aa31337192cc 100644 --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -19,7 +19,9 @@ obj-$(CONFIG_XEN_TMEM) += tmem.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/ +obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o xen-evtchn-y := evtchn.o xen-gntdev-y := gntdev.o xen-gntalloc-y := gntalloc.o +xen-privcmd-y := privcmd.o diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c new file mode 100644 index 000000000000..4e8d3da89ad5 --- /dev/null +++ b/drivers/xen/privcmd.c @@ -0,0 +1,437 @@ +/****************************************************************************** + * privcmd.c + * + * Interface to privileged domain-0 commands. + * + * Copyright (c) 2002-2004, K A Fraser, B Dragovic + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "privcmd.h" + +MODULE_LICENSE("GPL"); + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); +#endif + +static long privcmd_ioctl_hypercall(void __user *udata) +{ + struct privcmd_hypercall hypercall; + long ret; + + if (copy_from_user(&hypercall, udata, sizeof(hypercall))) + return -EFAULT; + + ret = privcmd_call(hypercall.op, + hypercall.arg[0], hypercall.arg[1], + hypercall.arg[2], hypercall.arg[3], + hypercall.arg[4]); + + return ret; +} + +static void free_page_list(struct list_head *pages) +{ + struct page *p, *n; + + list_for_each_entry_safe(p, n, pages, lru) + __free_page(p); + + INIT_LIST_HEAD(pages); +} + +/* + * Given an array of items in userspace, return a list of pages + * containing the data. If copying fails, either because of memory + * allocation failure or a problem reading user memory, return an + * error code; its up to the caller to dispose of any partial list. + */ +static int gather_array(struct list_head *pagelist, + unsigned nelem, size_t size, + void __user *data) +{ + unsigned pageidx; + void *pagedata; + int ret; + + if (size > PAGE_SIZE) + return 0; + + pageidx = PAGE_SIZE; + pagedata = NULL; /* quiet, gcc */ + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page = alloc_page(GFP_KERNEL); + + ret = -ENOMEM; + if (page == NULL) + goto fail; + + pagedata = page_address(page); + + list_add_tail(&page->lru, pagelist); + pageidx = 0; + } + + ret = -EFAULT; + if (copy_from_user(pagedata + pageidx, data, size)) + goto fail; + + data += size; + pageidx += size; + } + + ret = 0; + +fail: + return ret; +} + +/* + * Call function "fn" on each element of the array fragmented + * over a list of pages. + */ +static int traverse_pages(unsigned nelem, size_t size, + struct list_head *pos, + int (*fn)(void *data, void *state), + void *state) +{ + void *pagedata; + unsigned pageidx; + int ret = 0; + + BUG_ON(size > PAGE_SIZE); + + pageidx = PAGE_SIZE; + pagedata = NULL; /* hush, gcc */ + + while (nelem--) { + if (pageidx > PAGE_SIZE-size) { + struct page *page; + pos = pos->next; + page = list_entry(pos, struct page, lru); + pagedata = page_address(page); + pageidx = 0; + } + + ret = (*fn)(pagedata + pageidx, state); + if (ret) + break; + pageidx += size; + } + + return ret; +} + +struct mmap_mfn_state { + unsigned long va; + struct vm_area_struct *vma; + domid_t domain; +}; + +static int mmap_mfn_range(void *data, void *state) +{ + struct privcmd_mmap_entry *msg = data; + struct mmap_mfn_state *st = state; + struct vm_area_struct *vma = st->vma; + int rc; + + /* Do not allow range to wrap the address space. */ + if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || + ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) + return -EINVAL; + + /* Range chunks must be contiguous in va space. */ + if ((msg->va != st->va) || + ((msg->va+(msg->npages< vma->vm_end)) + return -EINVAL; + + rc = xen_remap_domain_mfn_range(vma, + msg->va & PAGE_MASK, + msg->mfn, msg->npages, + vma->vm_page_prot, + st->domain); + if (rc < 0) + return rc; + + st->va += msg->npages << PAGE_SHIFT; + + return 0; +} + +static long privcmd_ioctl_mmap(void __user *udata) +{ + struct privcmd_mmap mmapcmd; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + LIST_HEAD(pagelist); + struct mmap_mfn_state state; + + if (!xen_initial_domain()) + return -EPERM; + + if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) + return -EFAULT; + + rc = gather_array(&pagelist, + mmapcmd.num, sizeof(struct privcmd_mmap_entry), + mmapcmd.entry); + + if (rc || list_empty(&pagelist)) + goto out; + + down_write(&mm->mmap_sem); + + { + struct page *page = list_first_entry(&pagelist, + struct page, lru); + struct privcmd_mmap_entry *msg = page_address(page); + + vma = find_vma(mm, msg->va); + rc = -EINVAL; + + if (!vma || (msg->va != vma->vm_start) || + !privcmd_enforce_singleshot_mapping(vma)) + goto out_up; + } + + state.va = vma->vm_start; + state.vma = vma; + state.domain = mmapcmd.dom; + + rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), + &pagelist, + mmap_mfn_range, &state); + + +out_up: + up_write(&mm->mmap_sem); + +out: + free_page_list(&pagelist); + + return rc; +} + +struct mmap_batch_state { + domid_t domain; + unsigned long va; + struct vm_area_struct *vma; + int err; + + xen_pfn_t __user *user; +}; + +static int mmap_batch_fn(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, + st->vma->vm_page_prot, st->domain) < 0) { + *mfnp |= 0xf0000000U; + st->err++; + } + st->va += PAGE_SIZE; + + return 0; +} + +static int mmap_return_errors(void *data, void *state) +{ + xen_pfn_t *mfnp = data; + struct mmap_batch_state *st = state; + + return put_user(*mfnp, st->user++); +} + +static struct vm_operations_struct privcmd_vm_ops; + +static long privcmd_ioctl_mmap_batch(void __user *udata) +{ + int ret; + struct privcmd_mmapbatch m; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long nr_pages; + LIST_HEAD(pagelist); + struct mmap_batch_state state; + + if (!xen_initial_domain()) + return -EPERM; + + if (copy_from_user(&m, udata, sizeof(m))) + return -EFAULT; + + nr_pages = m.num; + if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) + return -EINVAL; + + ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), + m.arr); + + if (ret || list_empty(&pagelist)) + goto out; + + down_write(&mm->mmap_sem); + + vma = find_vma(mm, m.addr); + ret = -EINVAL; + if (!vma || + vma->vm_ops != &privcmd_vm_ops || + (m.addr != vma->vm_start) || + ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || + !privcmd_enforce_singleshot_mapping(vma)) { + up_write(&mm->mmap_sem); + goto out; + } + + state.domain = m.dom; + state.vma = vma; + state.va = m.addr; + state.err = 0; + + ret = traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, mmap_batch_fn, &state); + + up_write(&mm->mmap_sem); + + if (state.err > 0) { + state.user = m.arr; + ret = traverse_pages(m.num, sizeof(xen_pfn_t), + &pagelist, + mmap_return_errors, &state); + } + +out: + free_page_list(&pagelist); + + return ret; +} + +static long privcmd_ioctl(struct file *file, + unsigned int cmd, unsigned long data) +{ + int ret = -ENOSYS; + void __user *udata = (void __user *) data; + + switch (cmd) { + case IOCTL_PRIVCMD_HYPERCALL: + ret = privcmd_ioctl_hypercall(udata); + break; + + case IOCTL_PRIVCMD_MMAP: + ret = privcmd_ioctl_mmap(udata); + break; + + case IOCTL_PRIVCMD_MMAPBATCH: + ret = privcmd_ioctl_mmap_batch(udata); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +#ifndef HAVE_ARCH_PRIVCMD_MMAP +static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", + vma, vma->vm_start, vma->vm_end, + vmf->pgoff, vmf->virtual_address); + + return VM_FAULT_SIGBUS; +} + +static struct vm_operations_struct privcmd_vm_ops = { + .fault = privcmd_fault +}; + +static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) +{ + /* Unsupported for auto-translate guests. */ + if (xen_feature(XENFEAT_auto_translated_physmap)) + return -ENOSYS; + + /* DONTCOPY is essential for Xen because copy_page_range doesn't know + * how to recreate these mappings */ + vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; + vma->vm_ops = &privcmd_vm_ops; + vma->vm_private_data = NULL; + + return 0; +} + +static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) +{ + return (xchg(&vma->vm_private_data, (void *)1) == NULL); +} +#endif + +const struct file_operations xen_privcmd_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = privcmd_ioctl, + .mmap = privcmd_mmap, +}; +EXPORT_SYMBOL_GPL(xen_privcmd_fops); + +static struct miscdevice privcmd_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/privcmd", + .fops = &xen_privcmd_fops, +}; + +static int __init privcmd_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&privcmd_dev); + if (err != 0) { + printk(KERN_ERR "Could not register Xen privcmd device\n"); + return err; + } + return 0; +} + +static void __exit privcmd_exit(void) +{ + misc_deregister(&privcmd_dev); +} + +module_init(privcmd_init); +module_exit(privcmd_exit); diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h new file mode 100644 index 000000000000..14facaeed36f --- /dev/null +++ b/drivers/xen/privcmd.h @@ -0,0 +1,3 @@ +#include + +extern const struct file_operations xen_privcmd_fops; diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 4fde9440fe1f..5d45ff13cc01 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_XENFS) += xenfs.o -xenfs-y = super.o xenbus.o privcmd.o +xenfs-y = super.o xenbus.o xenfs-$(CONFIG_XEN_DOM0) += xenstored.o diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c deleted file mode 100644 index dbd3b16fd131..000000000000 --- a/drivers/xen/xenfs/privcmd.c +++ /dev/null @@ -1,400 +0,0 @@ -/****************************************************************************** - * privcmd.c - * - * Interface to privileged domain-0 commands. - * - * Copyright (c) 2002-2004, K A Fraser, B Dragovic - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#ifndef HAVE_ARCH_PRIVCMD_MMAP -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma); -#endif - -static long privcmd_ioctl_hypercall(void __user *udata) -{ - struct privcmd_hypercall hypercall; - long ret; - - if (copy_from_user(&hypercall, udata, sizeof(hypercall))) - return -EFAULT; - - ret = privcmd_call(hypercall.op, - hypercall.arg[0], hypercall.arg[1], - hypercall.arg[2], hypercall.arg[3], - hypercall.arg[4]); - - return ret; -} - -static void free_page_list(struct list_head *pages) -{ - struct page *p, *n; - - list_for_each_entry_safe(p, n, pages, lru) - __free_page(p); - - INIT_LIST_HEAD(pages); -} - -/* - * Given an array of items in userspace, return a list of pages - * containing the data. If copying fails, either because of memory - * allocation failure or a problem reading user memory, return an - * error code; its up to the caller to dispose of any partial list. - */ -static int gather_array(struct list_head *pagelist, - unsigned nelem, size_t size, - void __user *data) -{ - unsigned pageidx; - void *pagedata; - int ret; - - if (size > PAGE_SIZE) - return 0; - - pageidx = PAGE_SIZE; - pagedata = NULL; /* quiet, gcc */ - while (nelem--) { - if (pageidx > PAGE_SIZE-size) { - struct page *page = alloc_page(GFP_KERNEL); - - ret = -ENOMEM; - if (page == NULL) - goto fail; - - pagedata = page_address(page); - - list_add_tail(&page->lru, pagelist); - pageidx = 0; - } - - ret = -EFAULT; - if (copy_from_user(pagedata + pageidx, data, size)) - goto fail; - - data += size; - pageidx += size; - } - - ret = 0; - -fail: - return ret; -} - -/* - * Call function "fn" on each element of the array fragmented - * over a list of pages. - */ -static int traverse_pages(unsigned nelem, size_t size, - struct list_head *pos, - int (*fn)(void *data, void *state), - void *state) -{ - void *pagedata; - unsigned pageidx; - int ret = 0; - - BUG_ON(size > PAGE_SIZE); - - pageidx = PAGE_SIZE; - pagedata = NULL; /* hush, gcc */ - - while (nelem--) { - if (pageidx > PAGE_SIZE-size) { - struct page *page; - pos = pos->next; - page = list_entry(pos, struct page, lru); - pagedata = page_address(page); - pageidx = 0; - } - - ret = (*fn)(pagedata + pageidx, state); - if (ret) - break; - pageidx += size; - } - - return ret; -} - -struct mmap_mfn_state { - unsigned long va; - struct vm_area_struct *vma; - domid_t domain; -}; - -static int mmap_mfn_range(void *data, void *state) -{ - struct privcmd_mmap_entry *msg = data; - struct mmap_mfn_state *st = state; - struct vm_area_struct *vma = st->vma; - int rc; - - /* Do not allow range to wrap the address space. */ - if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) || - ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va)) - return -EINVAL; - - /* Range chunks must be contiguous in va space. */ - if ((msg->va != st->va) || - ((msg->va+(msg->npages< vma->vm_end)) - return -EINVAL; - - rc = xen_remap_domain_mfn_range(vma, - msg->va & PAGE_MASK, - msg->mfn, msg->npages, - vma->vm_page_prot, - st->domain); - if (rc < 0) - return rc; - - st->va += msg->npages << PAGE_SHIFT; - - return 0; -} - -static long privcmd_ioctl_mmap(void __user *udata) -{ - struct privcmd_mmap mmapcmd; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int rc; - LIST_HEAD(pagelist); - struct mmap_mfn_state state; - - if (!xen_initial_domain()) - return -EPERM; - - if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd))) - return -EFAULT; - - rc = gather_array(&pagelist, - mmapcmd.num, sizeof(struct privcmd_mmap_entry), - mmapcmd.entry); - - if (rc || list_empty(&pagelist)) - goto out; - - down_write(&mm->mmap_sem); - - { - struct page *page = list_first_entry(&pagelist, - struct page, lru); - struct privcmd_mmap_entry *msg = page_address(page); - - vma = find_vma(mm, msg->va); - rc = -EINVAL; - - if (!vma || (msg->va != vma->vm_start) || - !privcmd_enforce_singleshot_mapping(vma)) - goto out_up; - } - - state.va = vma->vm_start; - state.vma = vma; - state.domain = mmapcmd.dom; - - rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry), - &pagelist, - mmap_mfn_range, &state); - - -out_up: - up_write(&mm->mmap_sem); - -out: - free_page_list(&pagelist); - - return rc; -} - -struct mmap_batch_state { - domid_t domain; - unsigned long va; - struct vm_area_struct *vma; - int err; - - xen_pfn_t __user *user; -}; - -static int mmap_batch_fn(void *data, void *state) -{ - xen_pfn_t *mfnp = data; - struct mmap_batch_state *st = state; - - if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1, - st->vma->vm_page_prot, st->domain) < 0) { - *mfnp |= 0xf0000000U; - st->err++; - } - st->va += PAGE_SIZE; - - return 0; -} - -static int mmap_return_errors(void *data, void *state) -{ - xen_pfn_t *mfnp = data; - struct mmap_batch_state *st = state; - - return put_user(*mfnp, st->user++); -} - -static struct vm_operations_struct privcmd_vm_ops; - -static long privcmd_ioctl_mmap_batch(void __user *udata) -{ - int ret; - struct privcmd_mmapbatch m; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - unsigned long nr_pages; - LIST_HEAD(pagelist); - struct mmap_batch_state state; - - if (!xen_initial_domain()) - return -EPERM; - - if (copy_from_user(&m, udata, sizeof(m))) - return -EFAULT; - - nr_pages = m.num; - if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT))) - return -EINVAL; - - ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), - m.arr); - - if (ret || list_empty(&pagelist)) - goto out; - - down_write(&mm->mmap_sem); - - vma = find_vma(mm, m.addr); - ret = -EINVAL; - if (!vma || - vma->vm_ops != &privcmd_vm_ops || - (m.addr != vma->vm_start) || - ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) || - !privcmd_enforce_singleshot_mapping(vma)) { - up_write(&mm->mmap_sem); - goto out; - } - - state.domain = m.dom; - state.vma = vma; - state.va = m.addr; - state.err = 0; - - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, mmap_batch_fn, &state); - - up_write(&mm->mmap_sem); - - if (state.err > 0) { - state.user = m.arr; - ret = traverse_pages(m.num, sizeof(xen_pfn_t), - &pagelist, - mmap_return_errors, &state); - } - -out: - free_page_list(&pagelist); - - return ret; -} - -static long privcmd_ioctl(struct file *file, - unsigned int cmd, unsigned long data) -{ - int ret = -ENOSYS; - void __user *udata = (void __user *) data; - - switch (cmd) { - case IOCTL_PRIVCMD_HYPERCALL: - ret = privcmd_ioctl_hypercall(udata); - break; - - case IOCTL_PRIVCMD_MMAP: - ret = privcmd_ioctl_mmap(udata); - break; - - case IOCTL_PRIVCMD_MMAPBATCH: - ret = privcmd_ioctl_mmap_batch(udata); - break; - - default: - ret = -EINVAL; - break; - } - - return ret; -} - -#ifndef HAVE_ARCH_PRIVCMD_MMAP -static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", - vma, vma->vm_start, vma->vm_end, - vmf->pgoff, vmf->virtual_address); - - return VM_FAULT_SIGBUS; -} - -static struct vm_operations_struct privcmd_vm_ops = { - .fault = privcmd_fault -}; - -static int privcmd_mmap(struct file *file, struct vm_area_struct *vma) -{ - /* Unsupported for auto-translate guests. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return -ENOSYS; - - /* DONTCOPY is essential for Xen because copy_page_range doesn't know - * how to recreate these mappings */ - vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP; - vma->vm_ops = &privcmd_vm_ops; - vma->vm_private_data = NULL; - - return 0; -} - -static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) -{ - return (xchg(&vma->vm_private_data, (void *)1) == NULL); -} -#endif - -const struct file_operations privcmd_file_ops = { - .unlocked_ioctl = privcmd_ioctl, - .mmap = privcmd_mmap, -}; diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index 1aa389719846..a55fbf9a1519 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -16,6 +16,7 @@ #include #include "xenfs.h" +#include "../privcmd.h" #include @@ -84,7 +85,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) [1] = {}, { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, - { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR }, + { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, {""}, }; int rc; diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h index b68aa6200003..5056306e7aa8 100644 --- a/drivers/xen/xenfs/xenfs.h +++ b/drivers/xen/xenfs/xenfs.h @@ -2,7 +2,6 @@ #define _XENFS_XENBUS_H extern const struct file_operations xenbus_file_ops; -extern const struct file_operations privcmd_file_ops; extern const struct file_operations xsd_kva_file_ops; extern const struct file_operations xsd_port_file_ops; -- cgit v1.2.3 From 2fb3683e7b164ee2b324039f7c9d90fe5b1a259b Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Sat, 10 Dec 2011 19:29:47 +0100 Subject: xen: Add xenbus device driver Access to xenbus is currently handled via xenfs. This adds a device driver for xenbus and makes xenfs use this code. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/Makefile | 1 + drivers/xen/xenbus/xenbus_comms.h | 4 + drivers/xen/xenbus/xenbus_dev_frontend.c | 624 +++++++++++++++++++++++++++++++ drivers/xen/xenfs/Makefile | 2 +- drivers/xen/xenfs/super.c | 3 +- drivers/xen/xenfs/xenbus.c | 593 ----------------------------- drivers/xen/xenfs/xenfs.h | 1 - 7 files changed, 632 insertions(+), 596 deletions(-) create mode 100644 drivers/xen/xenbus/xenbus_dev_frontend.c delete mode 100644 drivers/xen/xenfs/xenbus.c (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index 8dca685358b4..a2ea363b9f34 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile @@ -1,4 +1,5 @@ obj-y += xenbus.o +obj-y += xenbus_dev_frontend.o xenbus-objs = xenbus-objs += xenbus_client.o diff --git a/drivers/xen/xenbus/xenbus_comms.h b/drivers/xen/xenbus/xenbus_comms.h index c21db7513736..6e42800fa499 100644 --- a/drivers/xen/xenbus/xenbus_comms.h +++ b/drivers/xen/xenbus/xenbus_comms.h @@ -31,6 +31,8 @@ #ifndef _XENBUS_COMMS_H #define _XENBUS_COMMS_H +#include + int xs_init(void); int xb_init_comms(void); @@ -43,4 +45,6 @@ int xs_input_avail(void); extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; +extern const struct file_operations xen_xenbus_fops; + #endif /* _XENBUS_COMMS_H */ diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c new file mode 100644 index 000000000000..fb30cffe0338 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -0,0 +1,624 @@ +/* + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Changes: + * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem + * and /proc/xen compatibility mount point. + * Turned xenfs into a loadable module. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xenbus_comms.h" + +#include +#include + +MODULE_LICENSE("GPL"); + +/* + * An element of a list of outstanding transactions, for which we're + * still waiting a reply. + */ +struct xenbus_transaction_holder { + struct list_head list; + struct xenbus_transaction handle; +}; + +/* + * A buffer of data on the queue. + */ +struct read_buffer { + struct list_head list; + unsigned int cons; + unsigned int len; + char msg[]; +}; + +struct xenbus_file_priv { + /* + * msgbuffer_mutex is held while partial requests are built up + * and complete requests are acted on. It therefore protects + * the "transactions" and "watches" lists, and the partial + * request length and buffer. + * + * reply_mutex protects the reply being built up to return to + * usermode. It nests inside msgbuffer_mutex but may be held + * alone during a watch callback. + */ + struct mutex msgbuffer_mutex; + + /* In-progress transactions */ + struct list_head transactions; + + /* Active watches. */ + struct list_head watches; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[PAGE_SIZE]; + } u; + + /* Response queue. */ + struct mutex reply_mutex; + struct list_head read_buffers; + wait_queue_head_t read_waitq; + +}; + +/* Read out any raw xenbus messages queued up. */ +static ssize_t xenbus_file_read(struct file *filp, + char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + struct read_buffer *rb; + unsigned i; + int ret; + + mutex_lock(&u->reply_mutex); +again: + while (list_empty(&u->read_buffers)) { + mutex_unlock(&u->reply_mutex); + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + ret = wait_event_interruptible(u->read_waitq, + !list_empty(&u->read_buffers)); + if (ret) + return ret; + mutex_lock(&u->reply_mutex); + } + + rb = list_entry(u->read_buffers.next, struct read_buffer, list); + i = 0; + while (i < len) { + unsigned sz = min((unsigned)len - i, rb->len - rb->cons); + + ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); + + i += sz - ret; + rb->cons += sz - ret; + + if (ret != 0) { + if (i == 0) + i = -EFAULT; + goto out; + } + + /* Clear out buffer if it has been consumed */ + if (rb->cons == rb->len) { + list_del(&rb->list); + kfree(rb); + if (list_empty(&u->read_buffers)) + break; + rb = list_entry(u->read_buffers.next, + struct read_buffer, list); + } + } + if (i == 0) + goto again; + +out: + mutex_unlock(&u->reply_mutex); + return i; +} + +/* + * Add a buffer to the queue. Caller must hold the appropriate lock + * if the queue is not local. (Commonly the caller will build up + * multiple queued buffers on a temporary local list, and then add it + * to the appropriate list under lock once all the buffers have een + * successfully allocated.) + */ +static int queue_reply(struct list_head *queue, const void *data, size_t len) +{ + struct read_buffer *rb; + + if (len == 0) + return 0; + + rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); + if (rb == NULL) + return -ENOMEM; + + rb->cons = 0; + rb->len = len; + + memcpy(rb->msg, data, len); + + list_add_tail(&rb->list, queue); + return 0; +} + +/* + * Free all the read_buffer s on a list. + * Caller must have sole reference to list. + */ +static void queue_cleanup(struct list_head *list) +{ + struct read_buffer *rb; + + while (!list_empty(list)) { + rb = list_entry(list->next, struct read_buffer, list); + list_del(list->next); + kfree(rb); + } +} + +struct watch_adapter { + struct list_head list; + struct xenbus_watch watch; + struct xenbus_file_priv *dev_data; + char *token; +}; + +static void free_watch_adapter(struct watch_adapter *watch) +{ + kfree(watch->watch.node); + kfree(watch->token); + kfree(watch); +} + +static struct watch_adapter *alloc_watch_adapter(const char *path, + const char *token) +{ + struct watch_adapter *watch; + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (watch == NULL) + goto out_fail; + + watch->watch.node = kstrdup(path, GFP_KERNEL); + if (watch->watch.node == NULL) + goto out_free; + + watch->token = kstrdup(token, GFP_KERNEL); + if (watch->token == NULL) + goto out_free; + + return watch; + +out_free: + free_watch_adapter(watch); + +out_fail: + return NULL; +} + +static void watch_fired(struct xenbus_watch *watch, + const char **vec, + unsigned int len) +{ + struct watch_adapter *adap; + struct xsd_sockmsg hdr; + const char *path, *token; + int path_len, tok_len, body_len, data_len = 0; + int ret; + LIST_HEAD(staging_q); + + adap = container_of(watch, struct watch_adapter, watch); + + path = vec[XS_WATCH_PATH]; + token = adap->token; + + path_len = strlen(path) + 1; + tok_len = strlen(token) + 1; + if (len > 2) + data_len = vec[len] - vec[2] + 1; + body_len = path_len + tok_len + data_len; + + hdr.type = XS_WATCH_EVENT; + hdr.len = body_len; + + mutex_lock(&adap->dev_data->reply_mutex); + + ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); + if (!ret) + ret = queue_reply(&staging_q, path, path_len); + if (!ret) + ret = queue_reply(&staging_q, token, tok_len); + if (!ret && len > 2) + ret = queue_reply(&staging_q, vec[2], data_len); + + if (!ret) { + /* success: pass reply list onto watcher */ + list_splice_tail(&staging_q, &adap->dev_data->read_buffers); + wake_up(&adap->dev_data->read_waitq); + } else + queue_cleanup(&staging_q); + + mutex_unlock(&adap->dev_data->reply_mutex); +} + +static int xenbus_write_transaction(unsigned msg_type, + struct xenbus_file_priv *u) +{ + int rc; + void *reply; + struct xenbus_transaction_holder *trans = NULL; + LIST_HEAD(staging_q); + + if (msg_type == XS_TRANSACTION_START) { + trans = kmalloc(sizeof(*trans), GFP_KERNEL); + if (!trans) { + rc = -ENOMEM; + goto out; + } + } + + reply = xenbus_dev_request_and_reply(&u->u.msg); + if (IS_ERR(reply)) { + kfree(trans); + rc = PTR_ERR(reply); + goto out; + } + + if (msg_type == XS_TRANSACTION_START) { + trans->handle.id = simple_strtoul(reply, NULL, 0); + + list_add(&trans->list, &u->transactions); + } else if (msg_type == XS_TRANSACTION_END) { + list_for_each_entry(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; + BUG_ON(&trans->list == &u->transactions); + list_del(&trans->list); + + kfree(trans); + } + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); + if (!rc) + rc = queue_reply(&staging_q, reply, u->u.msg.len); + if (!rc) { + list_splice_tail(&staging_q, &u->read_buffers); + wake_up(&u->read_waitq); + } else { + queue_cleanup(&staging_q); + } + mutex_unlock(&u->reply_mutex); + + kfree(reply); + +out: + return rc; +} + +static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) +{ + struct watch_adapter *watch, *tmp_watch; + char *path, *token; + int err, rc; + LIST_HEAD(staging_q); + + path = u->u.buffer + sizeof(u->u.msg); + token = memchr(path, 0, u->u.msg.len); + if (token == NULL) { + rc = -EILSEQ; + goto out; + } + token++; + + if (msg_type == XS_WATCH) { + watch = alloc_watch_adapter(path, token); + if (watch == NULL) { + rc = -ENOMEM; + goto out; + } + + watch->watch.callback = watch_fired; + watch->dev_data = u; + + err = register_xenbus_watch(&watch->watch); + if (err) { + free_watch_adapter(watch); + rc = err; + goto out; + } + list_add(&watch->list, &u->watches); + } else { + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + if (!strcmp(watch->token, token) && + !strcmp(watch->watch.node, path)) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + break; + } + } + } + + /* Success. Synthesize a reply to say all is OK. */ + { + struct { + struct xsd_sockmsg hdr; + char body[3]; + } __packed reply = { + { + .type = msg_type, + .len = sizeof(reply.body) + }, + "OK" + }; + + mutex_lock(&u->reply_mutex); + rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); + wake_up(&u->read_waitq); + mutex_unlock(&u->reply_mutex); + } + +out: + return rc; +} + +static ssize_t xenbus_file_write(struct file *filp, + const char __user *ubuf, + size_t len, loff_t *ppos) +{ + struct xenbus_file_priv *u = filp->private_data; + uint32_t msg_type; + int rc = len; + int ret; + LIST_HEAD(staging_q); + + /* + * We're expecting usermode to be writing properly formed + * xenbus messages. If they write an incomplete message we + * buffer it up. Once it is complete, we act on it. + */ + + /* + * Make sure concurrent writers can't stomp all over each + * other's messages and make a mess of our partial message + * buffer. We don't make any attemppt to stop multiple + * writers from making a mess of each other's incomplete + * messages; we're just trying to guarantee our own internal + * consistency and make sure that single writes are handled + * atomically. + */ + mutex_lock(&u->msgbuffer_mutex); + + /* Get this out of the way early to avoid confusion */ + if (len == 0) + goto out; + + /* Can't write a xenbus message larger we can buffer */ + if ((len + u->len) > sizeof(u->u.buffer)) { + /* On error, dump existing buffer */ + u->len = 0; + rc = -EINVAL; + goto out; + } + + ret = copy_from_user(u->u.buffer + u->len, ubuf, len); + + if (ret != 0) { + rc = -EFAULT; + goto out; + } + + /* Deal with a partial copy. */ + len -= ret; + rc = len; + + u->len += len; + + /* Return if we haven't got a full message yet */ + if (u->len < sizeof(u->u.msg)) + goto out; /* not even the header yet */ + + /* If we're expecting a message that's larger than we can + possibly send, dump what we have and return an error. */ + if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { + rc = -E2BIG; + u->len = 0; + goto out; + } + + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + goto out; /* incomplete data portion */ + + /* + * OK, now we have a complete message. Do something with it. + */ + + msg_type = u->u.msg.type; + + switch (msg_type) { + case XS_WATCH: + case XS_UNWATCH: + /* (Un)Ask for some path to be watched for changes */ + ret = xenbus_write_watch(msg_type, u); + break; + + default: + /* Send out a transaction */ + ret = xenbus_write_transaction(msg_type, u); + break; + } + if (ret != 0) + rc = ret; + + /* Buffered message consumed */ + u->len = 0; + + out: + mutex_unlock(&u->msgbuffer_mutex); + return rc; +} + +static int xenbus_file_open(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u; + + if (xen_store_evtchn == 0) + return -ENOENT; + + nonseekable_open(inode, filp); + + u = kzalloc(sizeof(*u), GFP_KERNEL); + if (u == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&u->transactions); + INIT_LIST_HEAD(&u->watches); + INIT_LIST_HEAD(&u->read_buffers); + init_waitqueue_head(&u->read_waitq); + + mutex_init(&u->reply_mutex); + mutex_init(&u->msgbuffer_mutex); + + filp->private_data = u; + + return 0; +} + +static int xenbus_file_release(struct inode *inode, struct file *filp) +{ + struct xenbus_file_priv *u = filp->private_data; + struct xenbus_transaction_holder *trans, *tmp; + struct watch_adapter *watch, *tmp_watch; + struct read_buffer *rb, *tmp_rb; + + /* + * No need for locking here because there are no other users, + * by definition. + */ + + list_for_each_entry_safe(trans, tmp, &u->transactions, list) { + xenbus_transaction_end(trans->handle, 1); + list_del(&trans->list); + kfree(trans); + } + + list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { + unregister_xenbus_watch(&watch->watch); + list_del(&watch->list); + free_watch_adapter(watch); + } + + list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { + list_del(&rb->list); + kfree(rb); + } + kfree(u); + + return 0; +} + +static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) +{ + struct xenbus_file_priv *u = file->private_data; + + poll_wait(file, &u->read_waitq, wait); + if (!list_empty(&u->read_buffers)) + return POLLIN | POLLRDNORM; + return 0; +} + +const struct file_operations xen_xenbus_fops = { + .read = xenbus_file_read, + .write = xenbus_file_write, + .open = xenbus_file_open, + .release = xenbus_file_release, + .poll = xenbus_file_poll, + .llseek = no_llseek, +}; +EXPORT_SYMBOL_GPL(xen_xenbus_fops); + +static struct miscdevice xenbus_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus", + .fops = &xen_xenbus_fops, +}; + +static int __init xenbus_init(void) +{ + int err; + + if (!xen_domain()) + return -ENODEV; + + err = misc_register(&xenbus_dev); + if (err) + printk(KERN_ERR "Could not register xenbus device\n"); + return err; +} + +static void __exit xenbus_exit(void) +{ + misc_deregister(&xenbus_dev); +} + +module_init(xenbus_init); +module_exit(xenbus_exit); diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile index 5d45ff13cc01..b019865fcc56 100644 --- a/drivers/xen/xenfs/Makefile +++ b/drivers/xen/xenfs/Makefile @@ -1,4 +1,4 @@ obj-$(CONFIG_XENFS) += xenfs.o -xenfs-y = super.o xenbus.o +xenfs-y = super.o xenfs-$(CONFIG_XEN_DOM0) += xenstored.o diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index a55fbf9a1519..a84b53c01436 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -17,6 +17,7 @@ #include "xenfs.h" #include "../privcmd.h" +#include "../xenbus/xenbus_comms.h" #include @@ -83,7 +84,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent) { static struct tree_descr xenfs_files[] = { [1] = {}, - { "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR }, + { "xenbus", &xen_xenbus_fops, S_IRUSR|S_IWUSR }, { "capabilities", &capabilities_file_ops, S_IRUGO }, { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR }, {""}, diff --git a/drivers/xen/xenfs/xenbus.c b/drivers/xen/xenfs/xenbus.c deleted file mode 100644 index bbd000f88af7..000000000000 --- a/drivers/xen/xenfs/xenbus.c +++ /dev/null @@ -1,593 +0,0 @@ -/* - * Driver giving user-space access to the kernel's xenbus connection - * to xenstore. - * - * Copyright (c) 2005, Christian Limpach - * Copyright (c) 2005, Rusty Russell, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Changes: - * 2008-10-07 Alex Zeffertt Replaced /proc/xen/xenbus with xenfs filesystem - * and /proc/xen compatibility mount point. - * Turned xenfs into a loadable module. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "xenfs.h" -#include "../xenbus/xenbus_comms.h" - -#include -#include - -/* - * An element of a list of outstanding transactions, for which we're - * still waiting a reply. - */ -struct xenbus_transaction_holder { - struct list_head list; - struct xenbus_transaction handle; -}; - -/* - * A buffer of data on the queue. - */ -struct read_buffer { - struct list_head list; - unsigned int cons; - unsigned int len; - char msg[]; -}; - -struct xenbus_file_priv { - /* - * msgbuffer_mutex is held while partial requests are built up - * and complete requests are acted on. It therefore protects - * the "transactions" and "watches" lists, and the partial - * request length and buffer. - * - * reply_mutex protects the reply being built up to return to - * usermode. It nests inside msgbuffer_mutex but may be held - * alone during a watch callback. - */ - struct mutex msgbuffer_mutex; - - /* In-progress transactions */ - struct list_head transactions; - - /* Active watches. */ - struct list_head watches; - - /* Partial request. */ - unsigned int len; - union { - struct xsd_sockmsg msg; - char buffer[PAGE_SIZE]; - } u; - - /* Response queue. */ - struct mutex reply_mutex; - struct list_head read_buffers; - wait_queue_head_t read_waitq; - -}; - -/* Read out any raw xenbus messages queued up. */ -static ssize_t xenbus_file_read(struct file *filp, - char __user *ubuf, - size_t len, loff_t *ppos) -{ - struct xenbus_file_priv *u = filp->private_data; - struct read_buffer *rb; - unsigned i; - int ret; - - mutex_lock(&u->reply_mutex); -again: - while (list_empty(&u->read_buffers)) { - mutex_unlock(&u->reply_mutex); - if (filp->f_flags & O_NONBLOCK) - return -EAGAIN; - - ret = wait_event_interruptible(u->read_waitq, - !list_empty(&u->read_buffers)); - if (ret) - return ret; - mutex_lock(&u->reply_mutex); - } - - rb = list_entry(u->read_buffers.next, struct read_buffer, list); - i = 0; - while (i < len) { - unsigned sz = min((unsigned)len - i, rb->len - rb->cons); - - ret = copy_to_user(ubuf + i, &rb->msg[rb->cons], sz); - - i += sz - ret; - rb->cons += sz - ret; - - if (ret != 0) { - if (i == 0) - i = -EFAULT; - goto out; - } - - /* Clear out buffer if it has been consumed */ - if (rb->cons == rb->len) { - list_del(&rb->list); - kfree(rb); - if (list_empty(&u->read_buffers)) - break; - rb = list_entry(u->read_buffers.next, - struct read_buffer, list); - } - } - if (i == 0) - goto again; - -out: - mutex_unlock(&u->reply_mutex); - return i; -} - -/* - * Add a buffer to the queue. Caller must hold the appropriate lock - * if the queue is not local. (Commonly the caller will build up - * multiple queued buffers on a temporary local list, and then add it - * to the appropriate list under lock once all the buffers have een - * successfully allocated.) - */ -static int queue_reply(struct list_head *queue, const void *data, size_t len) -{ - struct read_buffer *rb; - - if (len == 0) - return 0; - - rb = kmalloc(sizeof(*rb) + len, GFP_KERNEL); - if (rb == NULL) - return -ENOMEM; - - rb->cons = 0; - rb->len = len; - - memcpy(rb->msg, data, len); - - list_add_tail(&rb->list, queue); - return 0; -} - -/* - * Free all the read_buffer s on a list. - * Caller must have sole reference to list. - */ -static void queue_cleanup(struct list_head *list) -{ - struct read_buffer *rb; - - while (!list_empty(list)) { - rb = list_entry(list->next, struct read_buffer, list); - list_del(list->next); - kfree(rb); - } -} - -struct watch_adapter { - struct list_head list; - struct xenbus_watch watch; - struct xenbus_file_priv *dev_data; - char *token; -}; - -static void free_watch_adapter(struct watch_adapter *watch) -{ - kfree(watch->watch.node); - kfree(watch->token); - kfree(watch); -} - -static struct watch_adapter *alloc_watch_adapter(const char *path, - const char *token) -{ - struct watch_adapter *watch; - - watch = kzalloc(sizeof(*watch), GFP_KERNEL); - if (watch == NULL) - goto out_fail; - - watch->watch.node = kstrdup(path, GFP_KERNEL); - if (watch->watch.node == NULL) - goto out_free; - - watch->token = kstrdup(token, GFP_KERNEL); - if (watch->token == NULL) - goto out_free; - - return watch; - -out_free: - free_watch_adapter(watch); - -out_fail: - return NULL; -} - -static void watch_fired(struct xenbus_watch *watch, - const char **vec, - unsigned int len) -{ - struct watch_adapter *adap; - struct xsd_sockmsg hdr; - const char *path, *token; - int path_len, tok_len, body_len, data_len = 0; - int ret; - LIST_HEAD(staging_q); - - adap = container_of(watch, struct watch_adapter, watch); - - path = vec[XS_WATCH_PATH]; - token = adap->token; - - path_len = strlen(path) + 1; - tok_len = strlen(token) + 1; - if (len > 2) - data_len = vec[len] - vec[2] + 1; - body_len = path_len + tok_len + data_len; - - hdr.type = XS_WATCH_EVENT; - hdr.len = body_len; - - mutex_lock(&adap->dev_data->reply_mutex); - - ret = queue_reply(&staging_q, &hdr, sizeof(hdr)); - if (!ret) - ret = queue_reply(&staging_q, path, path_len); - if (!ret) - ret = queue_reply(&staging_q, token, tok_len); - if (!ret && len > 2) - ret = queue_reply(&staging_q, vec[2], data_len); - - if (!ret) { - /* success: pass reply list onto watcher */ - list_splice_tail(&staging_q, &adap->dev_data->read_buffers); - wake_up(&adap->dev_data->read_waitq); - } else - queue_cleanup(&staging_q); - - mutex_unlock(&adap->dev_data->reply_mutex); -} - -static int xenbus_write_transaction(unsigned msg_type, - struct xenbus_file_priv *u) -{ - int rc; - void *reply; - struct xenbus_transaction_holder *trans = NULL; - LIST_HEAD(staging_q); - - if (msg_type == XS_TRANSACTION_START) { - trans = kmalloc(sizeof(*trans), GFP_KERNEL); - if (!trans) { - rc = -ENOMEM; - goto out; - } - } - - reply = xenbus_dev_request_and_reply(&u->u.msg); - if (IS_ERR(reply)) { - kfree(trans); - rc = PTR_ERR(reply); - goto out; - } - - if (msg_type == XS_TRANSACTION_START) { - trans->handle.id = simple_strtoul(reply, NULL, 0); - - list_add(&trans->list, &u->transactions); - } else if (msg_type == XS_TRANSACTION_END) { - list_for_each_entry(trans, &u->transactions, list) - if (trans->handle.id == u->u.msg.tx_id) - break; - BUG_ON(&trans->list == &u->transactions); - list_del(&trans->list); - - kfree(trans); - } - - mutex_lock(&u->reply_mutex); - rc = queue_reply(&staging_q, &u->u.msg, sizeof(u->u.msg)); - if (!rc) - rc = queue_reply(&staging_q, reply, u->u.msg.len); - if (!rc) { - list_splice_tail(&staging_q, &u->read_buffers); - wake_up(&u->read_waitq); - } else { - queue_cleanup(&staging_q); - } - mutex_unlock(&u->reply_mutex); - - kfree(reply); - -out: - return rc; -} - -static int xenbus_write_watch(unsigned msg_type, struct xenbus_file_priv *u) -{ - struct watch_adapter *watch, *tmp_watch; - char *path, *token; - int err, rc; - LIST_HEAD(staging_q); - - path = u->u.buffer + sizeof(u->u.msg); - token = memchr(path, 0, u->u.msg.len); - if (token == NULL) { - rc = -EILSEQ; - goto out; - } - token++; - - if (msg_type == XS_WATCH) { - watch = alloc_watch_adapter(path, token); - if (watch == NULL) { - rc = -ENOMEM; - goto out; - } - - watch->watch.callback = watch_fired; - watch->dev_data = u; - - err = register_xenbus_watch(&watch->watch); - if (err) { - free_watch_adapter(watch); - rc = err; - goto out; - } - list_add(&watch->list, &u->watches); - } else { - list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { - if (!strcmp(watch->token, token) && - !strcmp(watch->watch.node, path)) { - unregister_xenbus_watch(&watch->watch); - list_del(&watch->list); - free_watch_adapter(watch); - break; - } - } - } - - /* Success. Synthesize a reply to say all is OK. */ - { - struct { - struct xsd_sockmsg hdr; - char body[3]; - } __packed reply = { - { - .type = msg_type, - .len = sizeof(reply.body) - }, - "OK" - }; - - mutex_lock(&u->reply_mutex); - rc = queue_reply(&u->read_buffers, &reply, sizeof(reply)); - wake_up(&u->read_waitq); - mutex_unlock(&u->reply_mutex); - } - -out: - return rc; -} - -static ssize_t xenbus_file_write(struct file *filp, - const char __user *ubuf, - size_t len, loff_t *ppos) -{ - struct xenbus_file_priv *u = filp->private_data; - uint32_t msg_type; - int rc = len; - int ret; - LIST_HEAD(staging_q); - - /* - * We're expecting usermode to be writing properly formed - * xenbus messages. If they write an incomplete message we - * buffer it up. Once it is complete, we act on it. - */ - - /* - * Make sure concurrent writers can't stomp all over each - * other's messages and make a mess of our partial message - * buffer. We don't make any attemppt to stop multiple - * writers from making a mess of each other's incomplete - * messages; we're just trying to guarantee our own internal - * consistency and make sure that single writes are handled - * atomically. - */ - mutex_lock(&u->msgbuffer_mutex); - - /* Get this out of the way early to avoid confusion */ - if (len == 0) - goto out; - - /* Can't write a xenbus message larger we can buffer */ - if ((len + u->len) > sizeof(u->u.buffer)) { - /* On error, dump existing buffer */ - u->len = 0; - rc = -EINVAL; - goto out; - } - - ret = copy_from_user(u->u.buffer + u->len, ubuf, len); - - if (ret != 0) { - rc = -EFAULT; - goto out; - } - - /* Deal with a partial copy. */ - len -= ret; - rc = len; - - u->len += len; - - /* Return if we haven't got a full message yet */ - if (u->len < sizeof(u->u.msg)) - goto out; /* not even the header yet */ - - /* If we're expecting a message that's larger than we can - possibly send, dump what we have and return an error. */ - if ((sizeof(u->u.msg) + u->u.msg.len) > sizeof(u->u.buffer)) { - rc = -E2BIG; - u->len = 0; - goto out; - } - - if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) - goto out; /* incomplete data portion */ - - /* - * OK, now we have a complete message. Do something with it. - */ - - msg_type = u->u.msg.type; - - switch (msg_type) { - case XS_WATCH: - case XS_UNWATCH: - /* (Un)Ask for some path to be watched for changes */ - ret = xenbus_write_watch(msg_type, u); - break; - - default: - /* Send out a transaction */ - ret = xenbus_write_transaction(msg_type, u); - break; - } - if (ret != 0) - rc = ret; - - /* Buffered message consumed */ - u->len = 0; - - out: - mutex_unlock(&u->msgbuffer_mutex); - return rc; -} - -static int xenbus_file_open(struct inode *inode, struct file *filp) -{ - struct xenbus_file_priv *u; - - if (xen_store_evtchn == 0) - return -ENOENT; - - nonseekable_open(inode, filp); - - u = kzalloc(sizeof(*u), GFP_KERNEL); - if (u == NULL) - return -ENOMEM; - - INIT_LIST_HEAD(&u->transactions); - INIT_LIST_HEAD(&u->watches); - INIT_LIST_HEAD(&u->read_buffers); - init_waitqueue_head(&u->read_waitq); - - mutex_init(&u->reply_mutex); - mutex_init(&u->msgbuffer_mutex); - - filp->private_data = u; - - return 0; -} - -static int xenbus_file_release(struct inode *inode, struct file *filp) -{ - struct xenbus_file_priv *u = filp->private_data; - struct xenbus_transaction_holder *trans, *tmp; - struct watch_adapter *watch, *tmp_watch; - struct read_buffer *rb, *tmp_rb; - - /* - * No need for locking here because there are no other users, - * by definition. - */ - - list_for_each_entry_safe(trans, tmp, &u->transactions, list) { - xenbus_transaction_end(trans->handle, 1); - list_del(&trans->list); - kfree(trans); - } - - list_for_each_entry_safe(watch, tmp_watch, &u->watches, list) { - unregister_xenbus_watch(&watch->watch); - list_del(&watch->list); - free_watch_adapter(watch); - } - - list_for_each_entry_safe(rb, tmp_rb, &u->read_buffers, list) { - list_del(&rb->list); - kfree(rb); - } - kfree(u); - - return 0; -} - -static unsigned int xenbus_file_poll(struct file *file, poll_table *wait) -{ - struct xenbus_file_priv *u = file->private_data; - - poll_wait(file, &u->read_waitq, wait); - if (!list_empty(&u->read_buffers)) - return POLLIN | POLLRDNORM; - return 0; -} - -const struct file_operations xenbus_file_ops = { - .read = xenbus_file_read, - .write = xenbus_file_write, - .open = xenbus_file_open, - .release = xenbus_file_release, - .poll = xenbus_file_poll, - .llseek = no_llseek, -}; diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h index 5056306e7aa8..6b80c7779c02 100644 --- a/drivers/xen/xenfs/xenfs.h +++ b/drivers/xen/xenfs/xenfs.h @@ -1,7 +1,6 @@ #ifndef _XENFS_XENBUS_H #define _XENFS_XENBUS_H -extern const struct file_operations xenbus_file_ops; extern const struct file_operations xsd_kva_file_ops; extern const struct file_operations xsd_port_file_ops; -- cgit v1.2.3 From e9f0fec3f5d406c500861da779d16a779a110055 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Sat, 10 Dec 2011 19:29:48 +0100 Subject: xen: Add xenbus_backend device Access for xenstored to the event channel and pre-allocated ring is managed via xenfs. This adds its own character device featuring mmap for the ring and an ioctl for the event channel. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/Makefile | 1 + drivers/xen/xenbus/xenbus_dev_backend.c | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) create mode 100644 drivers/xen/xenbus/xenbus_dev_backend.c (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/Makefile b/drivers/xen/xenbus/Makefile index a2ea363b9f34..31e2e9050c7a 100644 --- a/drivers/xen/xenbus/Makefile +++ b/drivers/xen/xenbus/Makefile @@ -10,4 +10,5 @@ xenbus-objs += xenbus_probe.o xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o xenbus-objs += $(xenbus-be-objs-y) +obj-$(CONFIG_XEN_BACKEND) += xenbus_dev_backend.o obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c new file mode 100644 index 000000000000..a2092bd97693 --- /dev/null +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -0,0 +1,89 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "xenbus_comms.h" + +MODULE_LICENSE("GPL"); + +static int xenbus_backend_open(struct inode *inode, struct file *filp) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + return nonseekable_open(inode, filp); +} + +static long xenbus_backend_ioctl(struct file *file, unsigned int cmd, unsigned long data) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (cmd) { + case IOCTL_XENBUS_BACKEND_EVTCHN: + if (xen_store_evtchn > 0) + return xen_store_evtchn; + return -ENODEV; + + default: + return -ENOTTY; + } +} + +static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_pfn(xen_store_interface), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +const struct file_operations xenbus_backend_fops = { + .open = xenbus_backend_open, + .mmap = xenbus_backend_mmap, + .unlocked_ioctl = xenbus_backend_ioctl, +}; + +static struct miscdevice xenbus_backend_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "xen/xenbus_backend", + .fops = &xenbus_backend_fops, +}; + +static int __init xenbus_backend_init(void) +{ + int err; + + if (!xen_initial_domain()) + return -ENODEV; + + err = misc_register(&xenbus_backend_dev); + if (err) + printk(KERN_ERR "Could not register xenbus backend device\n"); + return err; +} + +static void __exit xenbus_backend_exit(void) +{ + misc_deregister(&xenbus_backend_dev); +} + +module_init(xenbus_backend_init); +module_exit(xenbus_backend_exit); -- cgit v1.2.3 From a63f9857134b0b6565bf86a06a94df876bdd18d3 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Sat, 10 Dec 2011 19:29:49 +0100 Subject: xen/privcmd: Remove unused support for arch specific privcmp mmap This was used for ia64. But there is no working ia64 support in sight, so remove it for now. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/privcmd.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 4e8d3da89ad5..ccee0f16bcf8 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -365,7 +365,6 @@ static long privcmd_ioctl(struct file *file, return ret; } -#ifndef HAVE_ARCH_PRIVCMD_MMAP static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n", @@ -398,7 +397,6 @@ static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma) { return (xchg(&vma->vm_private_data, (void *)1) == NULL); } -#endif const struct file_operations xen_privcmd_fops = { .owner = THIS_MODULE, -- cgit v1.2.3 From fe7acdbec195339e2fbcee518229e85fb9c329b1 Mon Sep 17 00:00:00 2001 From: Bastian Blank Date: Sat, 10 Dec 2011 19:29:50 +0100 Subject: xen/xenbus-frontend: Make error message more clear Add the work frontend to the error message because we now also have a backend device. Signed-off-by: Bastian Blank Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index fb30cffe0338..9f6be7d59a68 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -611,7 +611,7 @@ static int __init xenbus_init(void) err = misc_register(&xenbus_dev); if (err) - printk(KERN_ERR "Could not register xenbus device\n"); + printk(KERN_ERR "Could not register xenbus frontend device\n"); return err; } -- cgit v1.2.3 From 12275dd4b747f5d87fa36229774d76bca8e63068 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 19 Dec 2011 09:30:35 -0500 Subject: Revert "xen/pv-on-hvm kexec: add xs_reset_watches to shutdown watches from old kernel" This reverts commit ddacf5ef684a655abe2bb50c4b2a5b72ae0d5e05. As when booting the kernel under Amazon EC2 as an HVM guest it ends up hanging during startup. Reverting this we loose the fix for kexec booting to the crash kernels. Fixes Canonical BZ #901305 (http://bugs.launchpad.net/bugs/901305) Tested-by: Alessandro Salvatori Reported-by: Stefan Bader Acked-by: Ian Campbell Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index b3b8f2f3ad10..ede860f921df 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -621,15 +621,6 @@ static struct xenbus_watch *find_watch(const char *token) return NULL; } -static void xs_reset_watches(void) -{ - int err; - - err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL)); - if (err && err != -EEXIST) - printk(KERN_WARNING "xs_reset_watches failed: %d\n", err); -} - /* Register callback to watch this node. */ int register_xenbus_watch(struct xenbus_watch *watch) { @@ -906,9 +897,5 @@ int xs_init(void) if (IS_ERR(task)) return PTR_ERR(task); - /* shutdown watches for kexec boot */ - if (xen_hvm_domain()) - xs_reset_watches(); - return 0; } -- cgit v1.2.3 From b79d2ff98caee60c07a7598aba3b26acd1655a99 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Mon, 19 Dec 2011 15:08:15 -0500 Subject: xen/xenbus-frontend: Fix compile error with randconfig drivers/xen/xenbus/xenbus_dev_frontend.c: In function 'xenbus_init': drivers/xen/xenbus/xenbus_dev_frontend.c:609:2: error: implicit declaration of function 'xen_domain' Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_frontend.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 9f6be7d59a68..aec01420d979 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -58,6 +58,7 @@ #include "xenbus_comms.h" #include +#include #include MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 2c5d37d30fbd27d424a18abc16786cb152a37017 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Mon, 19 Dec 2011 14:55:14 -0500 Subject: xenbus: Support HVM backends Add HVM implementations of xenbus_(map,unmap)_ring_v(alloc,free) so that ring mappings can be done without using GNTMAP_contains_pte which is not supported on HVM. This also removes the need to use vmlist_lock on PV by tracking the allocated xenbus rings. Signed-off-by: Daniel De Graaf [v1: Fix compile error when XENBUS_FRONTEND is defined as module] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_client.c | 176 ++++++++++++++++++++++++++++++++----- drivers/xen/xenbus/xenbus_probe.c | 2 + drivers/xen/xenbus/xenbus_probe.h | 2 + 3 files changed, 159 insertions(+), 21 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 1906125eab49..0fa52916ad05 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -32,15 +32,39 @@ #include #include +#include #include #include #include #include #include #include +#include #include #include #include +#include + +#include "xenbus_probe.h" + +struct xenbus_map_node { + struct list_head next; + union { + struct vm_struct *area; /* PV */ + struct page *page; /* HVM */ + }; + grant_handle_t handle; +}; + +static DEFINE_SPINLOCK(xenbus_valloc_lock); +static LIST_HEAD(xenbus_valloc_pages); + +struct xenbus_ring_ops { + int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); + int (*unmap)(struct xenbus_device *dev, void *vaddr); +}; + +static const struct xenbus_ring_ops *ring_ops __read_mostly; const char *xenbus_strstate(enum xenbus_state state) { @@ -435,20 +459,34 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); * XenbusStateClosing and the error message will be saved in XenStore. */ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) +{ + return ring_ops->map(dev, gnt_ref, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, + int gnt_ref, void **vaddr) { struct gnttab_map_grant_ref op = { .flags = GNTMAP_host_map | GNTMAP_contains_pte, .ref = gnt_ref, .dom = dev->otherend_id, }; + struct xenbus_map_node *node; struct vm_struct *area; pte_t *pte; *vaddr = NULL; + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + area = alloc_vm_area(PAGE_SIZE, &pte); - if (!area) + if (!area) { + kfree(node); return -ENOMEM; + } op.host_addr = arbitrary_virt_to_machine(pte).maddr; @@ -457,19 +495,59 @@ int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) if (op.status != GNTST_okay) { free_vm_area(area); + kfree(node); xenbus_dev_fatal(dev, op.status, "mapping in shared page %d from domain %d", gnt_ref, dev->otherend_id); return op.status; } - /* Stuff the handle in an unused field */ - area->phys_addr = (unsigned long)op.handle; + node->handle = op.handle; + node->area = area; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); *vaddr = area->addr; return 0; } -EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); + +static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, + int gnt_ref, void **vaddr) +{ + struct xenbus_map_node *node; + int err; + void *addr; + + *vaddr = NULL; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); + if (err) + goto out_err; + + addr = pfn_to_kaddr(page_to_pfn(node->page)); + + err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); + if (err) + goto out_err; + + spin_lock(&xenbus_valloc_lock); + list_add(&node->next, &xenbus_valloc_pages); + spin_unlock(&xenbus_valloc_lock); + + *vaddr = addr; + return 0; + + out_err: + free_xenballooned_pages(1, &node->page); + kfree(node); + return err; +} /** @@ -525,32 +603,36 @@ EXPORT_SYMBOL_GPL(xenbus_map_ring); */ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) { - struct vm_struct *area; + return ring_ops->unmap(dev, vaddr); +} +EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); + +static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) +{ + struct xenbus_map_node *node; struct gnttab_unmap_grant_ref op = { .host_addr = (unsigned long)vaddr, }; unsigned int level; - /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) - * method so that we don't have to muck with vmalloc internals here. - * We could force the user to hang on to their struct vm_struct from - * xenbus_map_ring_valloc, but these 6 lines considerably simplify - * this API. - */ - read_lock(&vmlist_lock); - for (area = vmlist; area != NULL; area = area->next) { - if (area->addr == vaddr) - break; + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + if (node->area->addr == vaddr) { + list_del(&node->next); + goto found; + } } - read_unlock(&vmlist_lock); + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); - if (!area) { + if (!node) { xenbus_dev_error(dev, -ENOENT, "can't find mapped virtual address %p", vaddr); return GNTST_bad_virt_addr; } - op.handle = (grant_handle_t)area->phys_addr; + op.handle = node->handle; op.host_addr = arbitrary_virt_to_machine( lookup_address((unsigned long)vaddr, &level)).maddr; @@ -558,16 +640,50 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) BUG(); if (op.status == GNTST_okay) - free_vm_area(area); + free_vm_area(node->area); else xenbus_dev_error(dev, op.status, "unmapping page at handle %d error %d", - (int16_t)area->phys_addr, op.status); + node->handle, op.status); + kfree(node); return op.status; } -EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); +static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) +{ + int rv; + struct xenbus_map_node *node; + void *addr; + + spin_lock(&xenbus_valloc_lock); + list_for_each_entry(node, &xenbus_valloc_pages, next) { + addr = pfn_to_kaddr(page_to_pfn(node->page)); + if (addr == vaddr) { + list_del(&node->next); + goto found; + } + } + node = NULL; + found: + spin_unlock(&xenbus_valloc_lock); + + if (!node) { + xenbus_dev_error(dev, -ENOENT, + "can't find mapped virtual address %p", vaddr); + return GNTST_bad_virt_addr; + } + + rv = xenbus_unmap_ring(dev, node->handle, addr); + + if (!rv) + free_xenballooned_pages(1, &node->page); + else + WARN(1, "Leaking %p\n", vaddr); + + kfree(node); + return rv; +} /** * xenbus_unmap_ring @@ -617,3 +733,21 @@ enum xenbus_state xenbus_read_driver_state(const char *path) return result; } EXPORT_SYMBOL_GPL(xenbus_read_driver_state); + +static const struct xenbus_ring_ops ring_ops_pv = { + .map = xenbus_map_ring_valloc_pv, + .unmap = xenbus_unmap_ring_vfree_pv, +}; + +static const struct xenbus_ring_ops ring_ops_hvm = { + .map = xenbus_map_ring_valloc_hvm, + .unmap = xenbus_unmap_ring_vfree_hvm, +}; + +void __init xenbus_ring_ops_init(void) +{ + if (xen_pv_domain()) + ring_ops = &ring_ops_pv; + else + ring_ops = &ring_ops_hvm; +} diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 1b178c6e8937..1c05b2508ae8 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -730,6 +730,8 @@ static int __init xenbus_init(void) if (!xen_domain()) return -ENODEV; + xenbus_ring_ops_init(); + if (xen_hvm_domain()) { uint64_t v = 0; err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 9b1de4e34c64..460d784a769a 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -76,4 +76,6 @@ extern void xenbus_otherend_changed(struct xenbus_watch *watch, extern int xenbus_read_otherend_details(struct xenbus_device *xendev, char *id_node, char *path_node); +void xenbus_ring_ops_init(void); + #endif -- cgit v1.2.3 From 2946a52ac7d57c9d02db477e3684259d86446ea7 Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Wed, 14 Dec 2011 15:12:10 -0500 Subject: xenbus: Use grant-table wrapper functions For xenbus_{map,unmap}_ring to work on HVM, the grant table operations must be set up using the gnttab_set_{map,unmap}_op functions instead of directly populating the fields of gnttab_map_grant_ref. These functions simply populate the structure on paravirtualized Xen; however, on HVM they must call __pa() on vaddr when populating op->host_addr because the hypervisor cannot directly interpret guest-virtual addresses. Signed-off-by: Daniel De Graaf [v1: Fixed cleanpatch error] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_client.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c index 0fa52916ad05..566d2adbd6ea 100644 --- a/drivers/xen/xenbus/xenbus_client.c +++ b/drivers/xen/xenbus/xenbus_client.c @@ -567,12 +567,10 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr) { - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, + dev->otherend_id); if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) BUG(); @@ -698,10 +696,9 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr) { - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) BUG(); -- cgit v1.2.3 From 7d17e84bb8356b1d9f4402dd82a0e270a3d59a4f Mon Sep 17 00:00:00 2001 From: Daniel De Graaf Date: Wed, 14 Dec 2011 15:12:11 -0500 Subject: xen/grant-table: Support mappings required by blkback Add support for mappings without GNTMAP_contains_pte. This was not supported because the unmap operation assumed that this flag was being used; adding a parameter to the unmap operation to allow the PTE clearing to be disabled is sufficient to make unmap capable of supporting either mapping type. Signed-off-by: Daniel De Graaf [v1: Fix cleanpatch warnings] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntdev.c | 3 ++- drivers/xen/grant-table.c | 24 +++++------------------- 2 files changed, 7 insertions(+), 20 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index f52f661f8f82..99d8151c824a 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -314,7 +314,8 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages) } } - err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, pages); + err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset, + pages, true); if (err) return err; diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c index a3d0e1e278c1..1cd94daa71db 100644 --- a/drivers/xen/grant-table.c +++ b/drivers/xen/grant-table.c @@ -761,24 +761,10 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, (map_ops[i].host_addr & ~PAGE_MASK)); mfn = pte_mfn(*pte); } else { - /* If you really wanted to do this: - * mfn = PFN_DOWN(map_ops[i].dev_bus_addr); - * - * The reason we do not implement it is b/c on the - * unmap path (gnttab_unmap_refs) we have no means of - * checking whether the page is !GNTMAP_contains_pte. - * - * That is without some extra data-structure to carry - * the struct page, bool clear_pte, and list_head next - * tuples and deal with allocation/delallocation, etc. - * - * The users of this API set the GNTMAP_contains_pte - * flag so lets just return not supported until it - * becomes neccessary to implement. - */ - return -EOPNOTSUPP; + mfn = PFN_DOWN(map_ops[i].dev_bus_addr); } - ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); + ret = m2p_add_override(mfn, pages[i], kmap_ops ? + &kmap_ops[i] : NULL); if (ret) return ret; } @@ -788,7 +774,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops, EXPORT_SYMBOL_GPL(gnttab_map_refs); int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, - struct page **pages, unsigned int count) + struct page **pages, unsigned int count, bool clear_pte) { int i, ret; @@ -800,7 +786,7 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops, return ret; for (i = 0; i < count; i++) { - ret = m2p_remove_override(pages[i], true /* clear the PTE */); + ret = m2p_remove_override(pages[i], clear_pte); if (ret) return ret; } -- cgit v1.2.3 From 01464a60a4d21fb649e088f7ae5136c6fb130889 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 21 Dec 2011 14:19:47 -0500 Subject: xen/xenbus: Fix compile error - missing header for xen_initial_domain() drivers/xen/xenbus/xenbus_dev_backend.c:74:2: error: implicit declaration of function 'xen_initial_domain' Reported-by: Randy Dunlap Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_backend.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c index a2092bd97693..3d3be78c1093 100644 --- a/drivers/xen/xenbus/xenbus_dev_backend.c +++ b/drivers/xen/xenbus/xenbus_dev_backend.c @@ -6,6 +6,7 @@ #include #include +#include #include #include -- cgit v1.2.3 From 2e16341438c9eca15a2e0bb2ad8555bbdf24b86d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Fri, 23 Dec 2011 18:39:29 +0100 Subject: xen-gntalloc: introduce missing kfree Error handling code following a kmalloc should free the allocated data. Out_unlock is used on both success and failure, so free vm_priv before jumping to that label. A simplified version of the semantic match that finds the problem is as follows: (http://coccinelle.lip6.fr) // @r exists@ local idexpression x; statement S; identifier f1; position p1,p2; expression *ptr != NULL; @@ x@p1 = \(kmalloc\|kzalloc\|kcalloc\)(...); ... if (x == NULL) S <... when != x when != if (...) { <+...x...+> } x->f1 ...> ( return \(0\|<+...x...+>\|ptr\); | return@p2 ...; ) @script:python@ p1 << r.p1; p2 << r.p2; @@ print "* file: %s kmalloc %s return %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall [v1: Altered the description a bit] Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/gntalloc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen') diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c index e2400c8963fa..934985d14c24 100644 --- a/drivers/xen/gntalloc.c +++ b/drivers/xen/gntalloc.c @@ -525,6 +525,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma) rv = -ENOENT; pr_debug("%s: Could not find grant reference", __func__); + kfree(vm_priv); goto out_unlock; } -- cgit v1.2.3 From 73db144b58a32fc39733db6a7e1fe582072ad26a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 22 Dec 2011 09:08:13 +0000 Subject: Xen: consolidate and simplify struct xenbus_driver instantiation The 'name', 'owner', and 'mod_name' members are redundant with the identically named fields in the 'driver' sub-structure. Rather than switching each instance to specify these fields explicitly, introduce a macro to simplify this. Eliminate further redundancy by allowing the drvname argument to DEFINE_XENBUS_DRIVER() to be blank (in which case the first entry from the ID table will be used for .driver.name). Also eliminate the questionable xenbus_register_{back,front}end() wrappers - their sole remaining purpose was the checking of the 'owner' field, proper setting of which shouldn't be an issue anymore when the macro gets used. v2: Restore DRV_NAME for the driver name in xen-pciback. Signed-off-by: Jan Beulich Cc: Jens Axboe Cc: Dmitry Torokhov Cc: Florian Tobias Schandinat Cc: Ian Campbell Cc: David S. Miller Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/xenbus.c | 13 +++++-------- drivers/xen/xenbus/xenbus_probe.c | 7 +------ drivers/xen/xenbus/xenbus_probe.h | 4 +--- drivers/xen/xenbus/xenbus_probe_backend.c | 8 +++----- drivers/xen/xenbus/xenbus_probe_frontend.c | 8 +++----- 5 files changed, 13 insertions(+), 27 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 075525945e36..c80f9c84d08e 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -707,19 +707,16 @@ static int xen_pcibk_xenbus_remove(struct xenbus_device *dev) return 0; } -static const struct xenbus_device_id xenpci_ids[] = { +static const struct xenbus_device_id xen_pcibk_ids[] = { {"pci"}, {""}, }; -static struct xenbus_driver xenbus_xen_pcibk_driver = { - .name = DRV_NAME, - .owner = THIS_MODULE, - .ids = xenpci_ids, +static DEFINE_XENBUS_DRIVER(xen_pcibk, DRV_NAME, .probe = xen_pcibk_xenbus_probe, .remove = xen_pcibk_xenbus_remove, .otherend_changed = xen_pcibk_frontend_changed, -}; +); const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend; @@ -735,11 +732,11 @@ int __init xen_pcibk_xenbus_register(void) if (passthrough) xen_pcibk_backend = &xen_pcibk_passthrough_backend; pr_info(DRV_NAME ": backend is %s\n", xen_pcibk_backend->name); - return xenbus_register_backend(&xenbus_xen_pcibk_driver); + return xenbus_register_backend(&xen_pcibk_driver); } void __exit xen_pcibk_xenbus_unregister(void) { destroy_workqueue(xen_pcibk_wq); - xenbus_unregister_driver(&xenbus_xen_pcibk_driver); + xenbus_unregister_driver(&xen_pcibk_driver); } diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c index 1c05b2508ae8..3864967202b5 100644 --- a/drivers/xen/xenbus/xenbus_probe.c +++ b/drivers/xen/xenbus/xenbus_probe.c @@ -291,14 +291,9 @@ void xenbus_dev_shutdown(struct device *_dev) EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name) + struct xen_bus_type *bus) { - drv->driver.name = drv->name; drv->driver.bus = &bus->bus; - drv->driver.owner = owner; - drv->driver.mod_name = mod_name; return driver_register(&drv->driver); } diff --git a/drivers/xen/xenbus/xenbus_probe.h b/drivers/xen/xenbus/xenbus_probe.h index 460d784a769a..bb4f92ed8730 100644 --- a/drivers/xen/xenbus/xenbus_probe.h +++ b/drivers/xen/xenbus/xenbus_probe.h @@ -53,9 +53,7 @@ extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name); + struct xen_bus_type *bus); extern int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c index c3c7cd195c11..257be37d9091 100644 --- a/drivers/xen/xenbus/xenbus_probe_backend.c +++ b/drivers/xen/xenbus/xenbus_probe_backend.c @@ -232,15 +232,13 @@ int xenbus_dev_is_online(struct xenbus_device *dev) } EXPORT_SYMBOL_GPL(xenbus_dev_is_online); -int __xenbus_register_backend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +int xenbus_register_backend(struct xenbus_driver *drv) { drv->read_otherend_details = read_frontend_details; - return xenbus_register_driver_common(drv, &xenbus_backend, - owner, mod_name); + return xenbus_register_driver_common(drv, &xenbus_backend); } -EXPORT_SYMBOL_GPL(__xenbus_register_backend); +EXPORT_SYMBOL_GPL(xenbus_register_backend); static int backend_probe_and_watch(struct notifier_block *notifier, unsigned long event, diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c index 2f73195512b4..9c57819df51a 100644 --- a/drivers/xen/xenbus/xenbus_probe_frontend.c +++ b/drivers/xen/xenbus/xenbus_probe_frontend.c @@ -230,15 +230,13 @@ static void wait_for_devices(struct xenbus_driver *xendrv) print_device_status); } -int __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +int xenbus_register_frontend(struct xenbus_driver *drv) { int ret; drv->read_otherend_details = read_backend_details; - ret = xenbus_register_driver_common(drv, &xenbus_frontend, - owner, mod_name); + ret = xenbus_register_driver_common(drv, &xenbus_frontend); if (ret) return ret; @@ -247,7 +245,7 @@ int __xenbus_register_frontend(struct xenbus_driver *drv, return 0; } -EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +EXPORT_SYMBOL_GPL(xenbus_register_frontend); static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq); static int backend_state; -- cgit v1.2.3 From 9e7860cee18241633eddb36a4c34c7b61d8cecbc Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 Jan 2012 09:34:49 +0000 Subject: xen/xenbus: Reject replies with payload > XENSTORE_PAYLOAD_MAX. Haogang Chen found out that: There is a potential integer overflow in process_msg() that could result in cross-domain attack. body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); When a malicious guest passes 0xffffffff in msg->hdr.len, the subsequent call to xb_read() would write to a zero-length buffer. The other end of this connection is always the xenstore backend daemon so there is no guest (malicious or otherwise) which can do this. The xenstore daemon is a trusted component in the system. However this seem like a reasonable robustness improvement so we should have it. And Ian when read the API docs found that: The payload length (len field of the header) is limited to 4096 (XENSTORE_PAYLOAD_MAX) in both directions. If a client exceeds the limit, its xenstored connection will be immediately killed by xenstored, which is usually catastrophic from the client's point of view. Clients (particularly domains, which cannot just reconnect) should avoid this. so this patch checks against that instead. This also avoids a potential integer overflow pointed out by Haogang Chen. Signed-off-by: Ian Campbell Cc: Haogang Chen CC: stable@kernel.org Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index b3b8f2f3ad10..6f0121e3be69 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -810,6 +810,12 @@ static int process_msg(void) goto out; } + if (msg->hdr.len > XENSTORE_PAYLOAD_MAX) { + kfree(msg); + err = -EINVAL; + goto out; + } + body = kmalloc(msg->hdr.len + 1, GFP_NOIO | __GFP_HIGH); if (body == NULL) { kfree(msg); -- cgit v1.2.3 From 50bf73796e85ed6a061df6d8474f7cef7870df6a Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 Jan 2012 11:39:51 +0000 Subject: xenbus: maximum buffer size is XENSTORE_PAYLOAD_MAX Use this now that it is defined even though it happens to be == PAGE_SIZE. The code which takes requests from userspace already validates against the size of this buffer so no further checks are required to ensure that userspace requests comply with the protocol in this respect. Signed-off-by: Ian Campbell Cc: Haogang Chen Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index aec01420d979..527dc2a3b89f 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -105,7 +105,7 @@ struct xenbus_file_priv { unsigned int len; union { struct xsd_sockmsg msg; - char buffer[PAGE_SIZE]; + char buffer[XENSTORE_PAYLOAD_MAX]; } u; /* Response queue. */ -- cgit v1.2.3 From a800651e8893007d3a12bc281f0265f18043c4fa Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 4 Jan 2012 11:39:52 +0000 Subject: xen/xenbus: don't reimplement kvasprintf via a fixed size buffer Signed-off-by: Ian Campbell Cc: Haogang Chen Acked-by: Jan Beulich Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xenbus/xenbus_xs.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c index 6f0121e3be69..226d1ac55cf4 100644 --- a/drivers/xen/xenbus/xenbus_xs.c +++ b/drivers/xen/xenbus/xenbus_xs.c @@ -532,21 +532,18 @@ int xenbus_printf(struct xenbus_transaction t, { va_list ap; int ret; -#define PRINTF_BUFFER_SIZE 4096 - char *printf_buffer; - - printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH); - if (printf_buffer == NULL) - return -ENOMEM; + char *buf; va_start(ap, fmt); - ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + buf = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap); va_end(ap); - BUG_ON(ret > PRINTF_BUFFER_SIZE-1); - ret = xenbus_write(t, dir, node, printf_buffer); + if (!buf) + return -ENOMEM; + + ret = xenbus_write(t, dir, node, buf); - kfree(printf_buffer); + kfree(buf); return ret; } -- cgit v1.2.3 From 97309d3974fd371920cc8b932e2b6b5e4100c7c6 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 4 Jan 2012 14:10:32 -0500 Subject: xen/pciback: Move the PCI_DEV_FLAGS_ASSIGNED ops to the "[un|]bind" operation instead of doing it per guest creation/disconnection. Without this we could have potentially unloaded the vf driver from the xen pciback control even if the driver was binded to the xen-pciback. This will hold on to it until the user "unbind"s the PCI device using SysFS. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pci_stub.c | 2 ++ drivers/xen/xen-pciback/xenbus.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 8f06e1ed028c..405445965690 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -99,6 +99,7 @@ static void pcistub_device_release(struct kref *kref) kfree(pci_get_drvdata(psdev->dev)); pci_set_drvdata(psdev->dev, NULL); + psdev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; pci_dev_put(psdev->dev); kfree(psdev); @@ -331,6 +332,7 @@ static int __devinit pcistub_init_device(struct pci_dev *dev) dev_dbg(&dev->dev, "reset device\n"); xen_pcibk_reset_device(dev); + dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; return 0; config_release: diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index c80f9c84d08e..87c5dc3368d0 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -241,7 +241,6 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, goto out; dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); - dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; if (xen_register_device_domain_owner(dev, pdev->xdev->otherend_id) != 0) { dev_err(&dev->dev, "device has been assigned to another " \ @@ -281,7 +280,6 @@ static int xen_pcibk_remove_device(struct xen_pcibk_device *pdev, } dev_dbg(&dev->dev, "unregistering for %d\n", pdev->xdev->otherend_id); - dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; xen_unregister_device_domain_owner(dev); xen_pcibk_release_pci_dev(pdev, dev); -- cgit v1.2.3 From 3167355801505886209374daf86a452034e34ee8 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 4 Jan 2012 15:11:02 -0500 Subject: xen/pciback: Fix "device has been assigned to X domain!" warning The full warning is: "pciback 0000:05:00.0: device has been assigned to 2 domain! Over-writting the ownership, but beware." which is correct - the previous domain that was using the device forgot to unregister the ownership. This patch fixes this by calling the unregister ownership function when the PCI device is relinquished from the guest domain. Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/xen-pciback/pci_stub.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/xen') diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c index 405445965690..7944a17f5cbf 100644 --- a/drivers/xen/xen-pciback/pci_stub.c +++ b/drivers/xen/xen-pciback/pci_stub.c @@ -235,6 +235,8 @@ void pcistub_put_pci_dev(struct pci_dev *dev) xen_pcibk_config_free_dyn_fields(found_psdev->dev); xen_pcibk_config_reset_dev(found_psdev->dev); + xen_unregister_device_domain_owner(found_psdev->dev); + spin_lock_irqsave(&found_psdev->lock, flags); found_psdev->pdev = NULL; spin_unlock_irqrestore(&found_psdev->lock, flags); -- cgit v1.2.3 From 6c254de16a1d14c1ac931d3aa08dc88ac9fc582b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 4 Jan 2012 14:16:45 -0500 Subject: xen/pciback: Expand the warning message to include domain id. When a PCI device is transferred to another domain and it is still in usage (from the internal perspective), mention which other domain is using it to aid in debugging. [v2: Truncate the verbose message per Jan Beulich suggestion] [v3: Suggestions from Ian Campbell on the wording] Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Jan Beulich --- drivers/xen/xen-pciback/xenbus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 87c5dc3368d0..8e1c44d8ab46 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -243,8 +243,8 @@ static int xen_pcibk_export_device(struct xen_pcibk_device *pdev, dev_dbg(&dev->dev, "registering for %d\n", pdev->xdev->otherend_id); if (xen_register_device_domain_owner(dev, pdev->xdev->otherend_id) != 0) { - dev_err(&dev->dev, "device has been assigned to another " \ - "domain! Over-writting the ownership, but beware.\n"); + dev_err(&dev->dev, "Stealing ownership from dom%d.\n", + xen_find_device_domain_owner(dev)); xen_unregister_device_domain_owner(dev); xen_register_device_domain_owner(dev, pdev->xdev->otherend_id); } -- cgit v1.2.3 From 90ab5ee94171b3e28de6bb42ee30b527014e0be7 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Jan 2012 09:32:20 +1030 Subject: module_param: make bool parameters really bool (drivers & misc) module_param(bool) used to counter-intuitively take an int. In fddd5201 (mid-2009) we allowed bool or int/unsigned int using a messy trick. It's time to remove the int/unsigned int option. For this version it'll simply give a warning, but it'll break next kernel version. Acked-by: Mauro Carvalho Chehab Signed-off-by: Rusty Russell --- drivers/xen/xen-pciback/conf_space.c | 2 +- drivers/xen/xen-pciback/xenbus.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/xen') diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c index 52fed16d8701..30d7be026c18 100644 --- a/drivers/xen/xen-pciback/conf_space.c +++ b/drivers/xen/xen-pciback/conf_space.c @@ -16,7 +16,7 @@ #include "conf_space.h" #include "conf_space_quirks.h" -static int permissive; +static bool permissive; module_param(permissive, bool, 0644); /* This is where xen_pcibk_read_config_byte, xen_pcibk_read_config_word, diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c index 8e1c44d8ab46..d5dcf8d5d3d9 100644 --- a/drivers/xen/xen-pciback/xenbus.c +++ b/drivers/xen/xen-pciback/xenbus.c @@ -16,7 +16,7 @@ #define INVALID_EVTCHN_IRQ (-1) struct workqueue_struct *xen_pcibk_wq; -static int __read_mostly passthrough; +static bool __read_mostly passthrough; module_param(passthrough, bool, S_IRUGO); MODULE_PARM_DESC(passthrough, "Option to specify how to export PCI topology to guest:\n"\ -- cgit v1.2.3 From 9ef9b20bd0eef609f07960a997c13cab8fe15d2e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Thu, 19 Jan 2012 10:24:31 +1100 Subject: xen: using EXPORT_SYMBOL requires including export.h Fix these warnings: drivers/xen/biomerge.c:14:1: warning: data definition has no type or storage class [enabled by default] drivers/xen/biomerge.c:14:1: warning: type defaults to 'int' in declaration of 'EXPORT_SYMBOL' [-Wimplicit-int] drivers/xen/biomerge.c:14:1: warning: parameter names (without types) in function declaration [enabled by default] And this build error: ERROR: "xen_biovec_phys_mergeable" [drivers/block/nvme.ko] undefined! Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- drivers/xen/biomerge.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/xen') diff --git a/drivers/xen/biomerge.c b/drivers/xen/biomerge.c index 18c1bb6ffce3..0edb91c0de6b 100644 --- a/drivers/xen/biomerge.c +++ b/drivers/xen/biomerge.c @@ -1,5 +1,6 @@ #include #include +#include #include bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, -- cgit v1.2.3