From b053b2aef25d00773fa6762dcd4b7f5c9c42d171 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:32:35 -0700 Subject: KVM: x86: Add EOI exit bitmap inference In order to support a userspace IOAPIC interacting with an in kernel APIC, the EOI exit bitmaps need to be configurable. If the IOAPIC is in userspace (i.e. the irqchip has been split), the EOI exit bitmaps will be set whenever the GSI Routes are configured. In particular, for the low MSI routes are reservable for userspace IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the destination vector of the route will be set for the destination VCPU. The intention is for the userspace IOAPICs to use the reservable MSI routes to inject interrupts into the guest. This is a slight abuse of the notion of an MSI Route, given that MSIs classically bypass the IOAPIC. It might be worthwhile to add an additional route type to improve clarity. Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- virt/kvm/irqchip.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index d7ea8e20dae4..716a1c4db528 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,16 +31,6 @@ #include #include "irq.h" -struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; - u32 nr_rt_entries; - /* - * Array indexed by gsi. Each entry contains list of irq chips - * the gsi is connected to. - */ - struct hlist_head map[0]; -}; - int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi) { @@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm, kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); + kvm_arch_irq_routing_update(kvm); + synchronize_srcu_expedited(&kvm->irq_srcu); new = old; -- cgit v1.2.3 From d3febddde9c7a959dbb189a700e937db50fad4d6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 25 Aug 2015 17:05:46 +0800 Subject: kvm: use kmalloc() instead of kzalloc() during iodev register/unregister All fields of kvm_io_range were initialized or copied explicitly afterwards. So switch to use kmalloc(). Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..23116dcb2129 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3341,7 +3341,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; - new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * + new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); if (!new_bus) return -ENOMEM; @@ -3373,7 +3373,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, if (r) return r; - new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * + new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); if (!new_bus) return -ENOMEM; -- cgit v1.2.3 From e9ea5069d9e569c32ab913c39467df32e056b3a7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:59 +0800 Subject: kvm: add capability for any-length ioeventfds Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 4 +--- virt/kvm/kvm_main.c | 1 + 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 79db45336e3a..ac89299b8699 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -914,9 +914,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return -EINVAL; /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) + if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) return -EINVAL; ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 23116dcb2129..afd7ae6aec65 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2718,6 +2718,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IRQFD: case KVM_CAP_IRQFD_RESAMPLE: #endif + case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: return 1; #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING -- cgit v1.2.3 From 166c9775f1f8b8f00ad1db0fa5c8fc74059d965d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:42 +0800 Subject: KVM: create kvm_irqfd.h Move _irqfd_resampler and _irqfd struct declarations in a new public header: kvm_irqfd.h. They are respectively renamed into kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes will be used by architecture specific code, in the context of IRQ bypass manager integration. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 95 +++++++++++++----------------------------------------- 1 file changed, 23 insertions(+), 72 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index ac89299b8699..413f5a6b61ba 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -39,68 +40,14 @@ #include #ifdef CONFIG_HAVE_KVM_IRQFD -/* - * -------------------------------------------------------------------- - * irqfd: Allows an fd to be used to inject an interrupt to the guest - * - * Credit goes to Avi Kivity for the original idea. - * -------------------------------------------------------------------- - */ - -/* - * Resampling irqfds are a special variety of irqfds used to emulate - * level triggered interrupts. The interrupt is asserted on eventfd - * trigger. On acknowledgement through the irq ack notifier, the - * interrupt is de-asserted and userspace is notified through the - * resamplefd. All resamplers on the same gsi are de-asserted - * together, so we don't need to track the state of each individual - * user. We can also therefore share the same irq source ID. - */ -struct _irqfd_resampler { - struct kvm *kvm; - /* - * List of resampling struct _irqfd objects sharing this gsi. - * RCU list modified under kvm->irqfds.resampler_lock - */ - struct list_head list; - struct kvm_irq_ack_notifier notifier; - /* - * Entry in list of kvm->irqfd.resampler_list. Use for sharing - * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock - */ - struct list_head link; -}; - -struct _irqfd { - /* Used for MSI fast-path */ - struct kvm *kvm; - wait_queue_t wait; - /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry irq_entry; - seqcount_t irq_entry_sc; - /* Used for level IRQ fast-path */ - int gsi; - struct work_struct inject; - /* The resampler used by this irqfd (resampler-only) */ - struct _irqfd_resampler *resampler; - /* Eventfd notified on resample (resampler-only) */ - struct eventfd_ctx *resamplefd; - /* Entry in list of irqfds for a resampler (resampler-only) */ - struct list_head resampler_link; - /* Used for setup/shutdown */ - struct eventfd_ctx *eventfd; - struct list_head list; - poll_table pt; - struct work_struct shutdown; -}; static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, inject); struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { @@ -121,12 +68,13 @@ irqfd_inject(struct work_struct *work) static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; int idx; - resampler = container_of(kian, struct _irqfd_resampler, notifier); + resampler = container_of(kian, + struct kvm_kernel_irqfd_resampler, notifier); kvm = resampler->kvm; kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, @@ -141,9 +89,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) } static void -irqfd_resampler_shutdown(struct _irqfd *irqfd) +irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) { - struct _irqfd_resampler *resampler = irqfd->resampler; + struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; struct kvm *kvm = resampler->kvm; mutex_lock(&kvm->irqfds.resampler_lock); @@ -168,7 +116,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) static void irqfd_shutdown(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, shutdown); u64 cnt; /* @@ -198,7 +147,7 @@ irqfd_shutdown(struct work_struct *work) /* assumes kvm->irqfds.lock is held */ static bool -irqfd_is_active(struct _irqfd *irqfd) +irqfd_is_active(struct kvm_kernel_irqfd *irqfd) { return list_empty(&irqfd->list) ? false : true; } @@ -209,7 +158,7 @@ irqfd_is_active(struct _irqfd *irqfd) * assumes kvm->irqfds.lock is held */ static void -irqfd_deactivate(struct _irqfd *irqfd) +irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) { BUG_ON(!irqfd_is_active(irqfd)); @@ -224,7 +173,8 @@ irqfd_deactivate(struct _irqfd *irqfd) static int irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); + struct kvm_kernel_irqfd *irqfd = + container_of(wait, struct kvm_kernel_irqfd, wait); unsigned long flags = (unsigned long)key; struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; @@ -274,12 +224,13 @@ static void irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { - struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); + struct kvm_kernel_irqfd *irqfd = + container_of(pt, struct kvm_kernel_irqfd, pt); add_wait_queue(wqh, &irqfd->wait); } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) +static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; @@ -304,7 +255,7 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; @@ -340,7 +291,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) irqfd->eventfd = eventfd; if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; resamplefd = eventfd_ctx_fdget(args->resamplefd); if (IS_ERR(resamplefd)) { @@ -525,7 +476,7 @@ kvm_eventfd_init(struct kvm *kvm) static int kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct eventfd_ctx *eventfd; eventfd = eventfd_ctx_fdget(args->fd); @@ -581,7 +532,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) void kvm_irqfd_release(struct kvm *kvm) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; spin_lock_irq(&kvm->irqfds.lock); @@ -604,7 +555,7 @@ kvm_irqfd_release(struct kvm *kvm) */ void kvm_irq_routing_update(struct kvm *kvm) { - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); -- cgit v1.2.3 From 1a02b27035f82091d51ecafcb9ccaac1f31d4eb2 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:43 +0800 Subject: KVM: introduce kvm_arch functions for IRQ bypass This patch introduces - kvm_arch_irq_bypass_add_producer - kvm_arch_irq_bypass_del_producer - kvm_arch_irq_bypass_stop - kvm_arch_irq_bypass_start They make possible to specialize the KVM IRQ bypass consumer in case CONFIG_KVM_HAVE_IRQ_BYPASS is set. Signed-off-by: Eric Auger [Add weak implementations of the callbacks. - Feng] Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e2c876d5a03b..9f8014dda2cf 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -47,3 +47,6 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y depends on COMPAT && !S390 + +config HAVE_KVM_IRQ_BYPASS + bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 413f5a6b61ba..c4f7abec4261 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -252,6 +252,18 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) write_seqcount_end(&irqfd->irq_entry_sc); } +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +void __attribute__((weak)) kvm_arch_irq_bypass_stop( + struct irq_bypass_consumer *cons) +{ +} + +void __attribute__((weak)) kvm_arch_irq_bypass_start( + struct irq_bypass_consumer *cons) +{ +} +#endif + static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { -- cgit v1.2.3 From 9016cfb577a15abd6a7990890ccf6bf1edf04d31 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:44 +0800 Subject: KVM: eventfd: add irq bypass consumer management This patch adds the registration/unregistration of an irq_bypass_consumer on irqfd assignment/deassignment. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c4f7abec4261..7df356d8f1fd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,9 @@ irqfd_shutdown(struct work_struct *work) /* * It is now safe to release the object's resources */ +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irq_bypass_unregister_consumer(&irqfd->consumer); +#endif eventfd_ctx_put(irqfd->eventfd); kfree(irqfd); } @@ -391,6 +395,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) * we might race against the POLLHUP */ fdput(f); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irqfd->consumer.token = (void *)irqfd->eventfd; + irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; + irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; + irqfd->consumer.stop = kvm_arch_irq_bypass_stop; + irqfd->consumer.start = kvm_arch_irq_bypass_start; + ret = irq_bypass_register_consumer(&irqfd->consumer); + if (ret) + pr_info("irq bypass consumer (token %p) registration fails: %d\n", + irqfd->consumer.token, ret); +#endif return 0; -- cgit v1.2.3 From f70c20aaf141adb715a2d750c55154073b02a9c3 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:53 +0800 Subject: KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd' This patch adds an arch specific hooks 'arch_update' in 'struct kvm_kernel_irqfd'. On Intel side, it is used to update the IRTE when VT-d posted-interrupts is used. Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7df356d8f1fd..b637965746bb 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -266,6 +266,13 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start( struct irq_bypass_consumer *cons) { } + +int __attribute__((weak)) kvm_arch_update_irqfd_routing( + struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + return 0; +} #endif static int @@ -586,9 +593,19 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_lock_irq(&kvm->irqfds.lock); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) + list_for_each_entry(irqfd, &kvm->irqfds.items, list) { irqfd_update(kvm, irqfd); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + if (irqfd->producer) { + int ret = kvm_arch_update_irqfd_routing( + irqfd->kvm, irqfd->producer->irq, + irqfd->gsi, 1); + WARN_ON(ret); + } +#endif + } + spin_unlock_irq(&kvm->irqfds.lock); } -- cgit v1.2.3 From bf9f6ac8d74969690df1485b33b7c238ca9f2269 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:55 +0800 Subject: KVM: Update Posted-Interrupts Descriptor when vCPU is blocked This patch updates the Posted-Interrupts Descriptor when vCPU is blocked. pre-block: - Add the vCPU to the blocked per-CPU list - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR post-block: - Remove the vCPU from the per-CPU list Signed-off-by: Feng Wu [Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo] Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index afd7ae6aec65..a75502c93c3e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) init_waitqueue_head(&vcpu->wq); kvm_async_pf_vcpu_init(vcpu); + vcpu->pre_pcpu = -1; + INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { r = -ENOMEM; -- cgit v1.2.3 From 6003a4201077da41427dae5e71bb2f31dfdc0c10 Mon Sep 17 00:00:00 2001 From: Kosuke Tatsukawa Date: Fri, 9 Oct 2015 12:21:55 +0000 Subject: kvm: fix waitqueue_active without memory barrier in virt/kvm/async_pf.c async_pf_execute() seems to be missing a memory barrier which might cause the waker to not notice the waiter and miss sending a wake_up as in the following figure. async_pf_execute kvm_vcpu_block ------------------------------------------------------------------------ spin_lock(&vcpu->async_pf.lock); if (waitqueue_active(&vcpu->wq)) /* The CPU might reorder the test for the waitqueue up here, before prior writes complete */ prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); /*if (kvm_vcpu_check_block(vcpu) < 0) */ /*if (kvm_arch_vcpu_runnable(vcpu)) { */ ... return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) ... return 0; list_add_tail(&apf->link, &vcpu->async_pf.done); spin_unlock(&vcpu->async_pf.lock); waited = true; schedule(); ------------------------------------------------------------------------ The attached patch adds the missing memory barrier. I found this issue when I was looking through the linux source code for places calling waitqueue_active() before wake_up*(), but without preceding memory barriers, after sending a patch to fix a similar issue in drivers/tty/n_tty.c (Details about the original issue can be found here: https://lkml.org/lkml/2015/9/28/849). Signed-off-by: Kosuke Tatsukawa Signed-off-by: Paolo Bonzini --- virt/kvm/async_pf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 44660aee335f..77d42be6970e 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work) trace_kvm_async_pf_completed(addr, gva); + /* + * This memory barrier pairs with prepare_to_wait's set_current_state() + */ + smp_mb(); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); -- cgit v1.2.3 From 351dc6477cd35136ce4668401b1b1332a62908a8 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:45 +0300 Subject: kvm/eventfd: avoid loop inside irqfd_update() The loop(for) inside irqfd_update() is unnecessary because any other value for irq_entry.type will just trigger schedule_work(&irqfd->inject) in irqfd_wakeup. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b637965746bb..518421e65b0d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -238,20 +238,17 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - int i, n_entries; + int n_entries; n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); - irqfd->irq_entry.type = 0; - e = entries; - for (i = 0; i < n_entries; ++i, ++e) { - /* Only fast-path MSI. */ - if (e->type == KVM_IRQ_ROUTING_MSI) - irqfd->irq_entry = *e; - } + if (n_entries == 1) + irqfd->irq_entry = *e; + else + irqfd->irq_entry.type = 0; write_seqcount_end(&irqfd->irq_entry_sc); } -- cgit v1.2.3 From ba1aefcd6db5536d3eb3ca3ce7bd6786960140ea Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:46 +0300 Subject: kvm/eventfd: factor out kvm_notify_acked_gsi() Factor out kvm_notify_acked_gsi() helper to iterate over EOI listeners and notify those matching the given gsi. It will be reused in the upcoming Hyper-V SynIC implementation. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 518421e65b0d..f6b986a41823 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -451,9 +451,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) } EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; + + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); +} + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ int gsi, idx; trace_kvm_ack_irq(irqchip, pin); @@ -461,10 +470,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); + kvm_notify_acked_gsi(kvm, gsi); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3 From c9a5eccac1abf50649949f15754a7635f263a1ff Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:47 +0300 Subject: kvm/eventfd: add arch-specific set_irq Allow for arch-specific interrupt types to be set. For that, add kvm_arch_set_irq() which takes interrupt type-specific action if it recognizes the interrupt type given, and -EWOULDBLOCK otherwise. The default implementation always returns -EWOULDBLOCK. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f6b986a41823..e29fd2640709 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -171,6 +171,15 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } +int __attribute__((weak)) kvm_arch_set_irq( + struct kvm_kernel_irq_routing_entry *irq, + struct kvm *kvm, int irq_source_id, + int level, + bool line_status) +{ + return -EWOULDBLOCK; +} + /* * Called with wqh->lock held and interrupts disabled */ @@ -195,7 +204,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) if (irq.type == KVM_IRQ_ROUTING_MSI) kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); - else + else if (kvm_arch_set_irq(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3 From f33143d80907602deb1b96db42da93507ed03b31 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:48 +0300 Subject: kvm/irqchip: allow only multiple irqchip routes per GSI Any other irq routing types (MSI, S390_ADAPTER, upcoming Hyper-V SynIC) map one-to-one to GSI. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- virt/kvm/irqchip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 716a1c4db528..f0b08a2a48ba 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -144,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, /* * Do not allow GSI to be mapped to the same irqchip more than once. - * Allow only one to one mapping between GSI and MSI. + * Allow only one to one mapping between GSI and non-irqchip routing. */ hlist_for_each_entry(ei, &rt->map[ue->gsi], link) - if (ei->type == KVM_IRQ_ROUTING_MSI || - ue->type == KVM_IRQ_ROUTING_MSI || + if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || + ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return r; -- cgit v1.2.3 From 3217f7c25bca66eed9b07f0b8bfd1937169b0736 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 27 Aug 2015 16:41:15 +0200 Subject: KVM: Add kvm_arch_vcpu_{un}blocking callbacks Some times it is useful for architecture implementations of KVM to know when the VCPU thread is about to block or when it comes back from blocking (arm/arm64 needs to know this to properly implement timers, for example). Therefore provide a generic architecture callback function in line with what we do elsewhere for KVM generic-arch interactions. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/kvm_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'virt/kvm') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..7873d6daccb1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2018,6 +2018,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) } while (single_task_running() && ktime_before(cur, stop)); } + kvm_arch_vcpu_blocking(vcpu); + for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); @@ -2031,6 +2033,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); cur = ktime_get(); + kvm_arch_vcpu_unblocking(vcpu); out: block_ns = ktime_to_ns(cur) - ktime_to_ns(start); -- cgit v1.2.3 From d35268da66870d733ae763fd7f9b06a1f63f395e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 25 Aug 2015 19:48:21 +0200 Subject: arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block We currently schedule a soft timer every time we exit the guest if the timer did not expire while running the guest. This is really not necessary, because the only work we do in the timer work function is to kick the vcpu. Kicking the vcpu does two things: (1) If the vpcu thread is on a waitqueue, make it runnable and remove it from the waitqueue. (2) If the vcpu is running on a different physical CPU from the one doing the kick, it sends a reschedule IPI. The second case cannot happen, because the soft timer is only ever scheduled when the vcpu is not running. The first case is only relevant when the vcpu thread is on a waitqueue, which is only the case when the vcpu thread has called kvm_vcpu_block(). Therefore, we only need to make sure a timer is scheduled for kvm_vcpu_block(), which we do by encapsulating all calls to kvm_vcpu_block() with kvm_timer_{un}schedule calls. Additionally, we only schedule a soft timer if the timer is enabled and unmasked, since it is useless otherwise. Note that theoretically userspace can use the SET_ONE_REG interface to change registers that should cause the timer to fire, even if the vcpu is blocked without a scheduled timer, but this case was not supported before this patch and we leave it for future work for now. Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 94 ++++++++++++++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 34 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b9d3a32cbc04..32095fbb5d7c 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -111,14 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) && + !kvm_vgic_get_phys_irq_active(timer->map); +} + bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; cycle_t cval, now; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || - kvm_vgic_get_phys_irq_active(timer->map)) + if (!kvm_timer_irq_can_fire(vcpu)) return false; cval = timer->cntv_cval; @@ -127,12 +134,57 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } +/* + * Schedule the background timer before calling kvm_vcpu_block, so that this + * thread is removed from its waitqueue and made runnable when there's a timer + * interrupt to handle. + */ +void kvm_timer_schedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 ns; + cycle_t cval, now; + + BUG_ON(timer_is_armed(timer)); + + /* + * No need to schedule a background timer if the guest timer has + * already expired, because kvm_vcpu_block will return before putting + * the thread to sleep. + */ + if (kvm_timer_should_fire(vcpu)) + return; + + /* + * If the timer is not capable of raising interrupts (disabled or + * masked), then there's no more work for us to do. + */ + if (!kvm_timer_irq_can_fire(vcpu)) + return; + + /* The timer has not yet expired, schedule a background timer */ + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + timer_arm(timer, ns); +} + +void kvm_timer_unschedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + timer_disarm(timer); +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer * - * Disarm any pending soft timers, since the world-switch code will write the - * virtual timer state back to the physical CPU. + * Check if the virtual timer has expired while we were running in the host, + * and inject an interrupt if that was the case. */ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { @@ -140,17 +192,6 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) bool phys_active; int ret; - /* - * We're about to run this vcpu again, so there is no need to - * keep the background timer running, as we're about to - * populate the CPU timer again. - */ - timer_disarm(timer); - - /* - * If the timer expired while we were not scheduled, now is the time - * to inject it. - */ if (kvm_timer_should_fire(vcpu)) kvm_timer_inject_irq(vcpu); @@ -176,32 +217,17 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * kvm_timer_sync_hwstate - sync timer state from cpu * @vcpu: The vcpu pointer * - * Check if the virtual timer was armed and either schedule a corresponding - * soft timer or inject directly if already expired. + * Check if the virtual timer has expired while we were running in the guest, + * and inject an interrupt if that was the case. */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - u64 ns; BUG_ON(timer_is_armed(timer)); - if (kvm_timer_should_fire(vcpu)) { - /* - * Timer has already expired while we were not - * looking. Inject the interrupt and carry on. - */ + if (kvm_timer_should_fire(vcpu)) kvm_timer_inject_irq(vcpu); - return; - } - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 9103617df202d74e5c65f8af84a9aa727f812a06 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 25 Aug 2015 22:50:57 +0200 Subject: arm/arm64: KVM: vgic: Factor out level irq processing on guest exit Currently vgic_process_maintenance() processes dealing with a completed level-triggered interrupt directly, but we are soon going to reuse this logic for level-triggered mapped interrupts with the HW bit set, so move this logic into a separate static function. Probably the most scary part of this commit is convincing yourself that the current flow is safe compared to the old one. In the following I try to list the changes and why they are harmless: Move vgic_irq_clear_queued after kvm_notify_acked_irq: Harmless because the only potential effect of clearing the queued flag wrt. kvm_set_irq is that vgic_update_irq_pending does not set the pending bit on the emulated CPU interface or in the pending_on_cpu bitmask if the function is called with level=1. However, the point of kvm_notify_acked_irq is to call kvm_set_irq with level=0, and we set the queued flag again in __kvm_vgic_sync_hwstate later on if the level is stil high. Move vgic_set_lr before kvm_notify_acked_irq: Also, harmless because the LR are cpu-local operations and kvm_notify_acked only affects the dist Move vgic_dist_irq_clear_soft_pend after kvm_notify_acked_irq: Also harmless, because now we check the level state in the clear_soft_pend function and lower the pending bits if the level is low. Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 94 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 38 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 66c66165e712..367a180fb5ac 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -107,6 +107,7 @@ static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, int virt_irq); +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; @@ -357,6 +358,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); + if (!vgic_dist_irq_get_level(vcpu, irq)) { + vgic_dist_irq_clear_pending(vcpu, irq); + if (!compute_pending_for_cpu(vcpu)) + clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); + } } static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) @@ -1338,12 +1344,56 @@ epilog: } } +static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) +{ + int level_pending = 0; + + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + + /* + * If the IRQ was EOIed (called from vgic_process_maintenance) or it + * went from active to non-active (called from vgic_sync_hwirq) it was + * also ACKed and we we therefore assume we can clear the soft pending + * state (should it had been set) for this interrupt. + * + * Note: if the IRQ soft pending state was set after the IRQ was + * acked, it actually shouldn't be cleared, but we have no way of + * knowing that unless we start trapping ACKs when the soft-pending + * state is set. + */ + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + + /* + * Tell the gic to start sampling the line of this interrupt again. + */ + vgic_irq_clear_queued(vcpu, vlr.irq); + + /* Any additional pending interrupt? */ + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); + level_pending = 1; + } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); + } + + /* + * Despite being EOIed, the LR may not have + * been marked as empty. + */ + vgic_sync_lr_elrsr(vcpu, lr, vlr); + + return level_pending; +} + static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool level_pending = false; struct kvm *kvm = vcpu->kvm; + int level_pending = 0; kvm_debug("STATUS = %08x\n", status); @@ -1358,54 +1408,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); - spin_lock(&dist->lock); - vgic_irq_clear_queued(vcpu, vlr.irq); + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); WARN_ON(vlr.state & LR_STATE_MASK); - vlr.state = 0; - vgic_set_lr(vcpu, lr, vlr); - /* - * If the IRQ was EOIed it was also ACKed and we we - * therefore assume we can clear the soft pending - * state (should it had been set) for this interrupt. - * - * Note: if the IRQ soft pending state was set after - * the IRQ was acked, it actually shouldn't be - * cleared, but we have no way of knowing that unless - * we start trapping ACKs when the soft-pending state - * is set. - */ - vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); /* * kvm_notify_acked_irq calls kvm_set_irq() - * to reset the IRQ level. Need to release the - * lock for kvm_set_irq to grab it. + * to reset the IRQ level, which grabs the dist->lock + * so we call this before taking the dist->lock. */ - spin_unlock(&dist->lock); - kvm_notify_acked_irq(kvm, 0, vlr.irq - VGIC_NR_PRIVATE_IRQS); - spin_lock(&dist->lock); - - /* Any additional pending interrupt? */ - if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { - vgic_cpu_irq_set(vcpu, vlr.irq); - level_pending = true; - } else { - vgic_dist_irq_clear_pending(vcpu, vlr.irq); - vgic_cpu_irq_clear(vcpu, vlr.irq); - } + spin_lock(&dist->lock); + level_pending |= process_level_irq(vcpu, lr, vlr); spin_unlock(&dist->lock); - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - vgic_sync_lr_elrsr(vcpu, lr, vlr); } } -- cgit v1.2.3 From 8bf9a701e103fd17dbdf0355e43ff5200b4823aa Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 14:42:16 +0200 Subject: arm/arm64: KVM: Implement GICD_ICFGR as RO for PPIs The GICD_ICFGR allows the bits for the SGIs and PPIs to be read only. We currently simulate this behavior by writing a hardcoded value to the register for the SGIs and PPIs on every write of these bits to the register (ignoring what the guest actually wrote), and by writing the same value as the reset value to the register. This is a bit counter-intuitive, as the register is RO for these bits, and we can just implement it that way, allowing us to control the value of the bits purely in the reset code. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 367a180fb5ac..f8ca2e9d2f0b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -690,10 +690,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 8) { - *reg = ~0U; /* Force PPIs/SGIs to 1 */ + /* Ignore writes to read-only SGI and PPI bits */ + if (offset < 8) return false; - } val = vgic_cfg_compress(val); if (offset & 4) { -- cgit v1.2.3 From 54723bb37feac347a169359536f3dff122cabca3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 14:45:20 +0200 Subject: arm/arm64: KVM: Use appropriate define in VGIC reset code We currently initialize the SGIs to be enabled in the VGIC code, but we use the VGIC_NR_PPIS define for this purpose, instead of the the more natural VGIC_NR_SGIS. Change this slightly confusing use of the defines. Note: This should have no functional change, as both names are defined to the number 16. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f8ca2e9d2f0b..a44ecf9eca4e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2128,8 +2128,12 @@ int vgic_init(struct kvm *kvm) break; } - for (i = 0; i < dist->nr_irqs; i++) { - if (i < VGIC_NR_PPIS) + /* + * Enable all SGIs and configure all private IRQs as + * edge-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + if (i < VGIC_NR_SGIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); if (i < VGIC_NR_PRIVATE_IRQS) -- cgit v1.2.3 From 4b4b4512da2a844b8da2585609b67fae1ce4f4db Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 15:01:27 +0200 Subject: arm/arm64: KVM: Rework the arch timer to use level-triggered semantics The arch timer currently uses edge-triggered semantics in the sense that the line is never sampled by the vgic and lowering the line from the timer to the vgic doesn't have any effect on the pending state of virtual interrupts in the vgic. This means that we do not support a guest with the otherwise valid behavior of (1) disable interrupts (2) enable the timer (3) disable the timer (4) enable interrupts. Such a guest would validly not expect to see any interrupts on real hardware, but will see interrupts on KVM. This patch fixes this shortcoming through the following series of changes. First, we change the flow of the timer/vgic sync/flush operations. Now the timer is always flushed/synced before the vgic, because the vgic samples the state of the timer output. This has the implication that we move the timer operations in to non-preempible sections, but that is fine after the previous commit getting rid of hrtimer schedules on every entry/exit. Second, we change the internal behavior of the timer, letting the timer keep track of its previous output state, and only lower/raise the line to the vgic when the state changes. Note that in theory this could have been accomplished more simply by signalling the vgic every time the state *potentially* changed, but we don't want to be hitting the vgic more often than necessary. Third, we get rid of the use of the map->active field in the vgic and instead simply set the interrupt as active on the physical distributor whenever the input to the GIC is asserted and conversely clear the physical active state when the input to the GIC is deasserted. Fourth, and finally, we now initialize the timer PPIs (and all the other unused PPIs for now), to be level-triggered, and modify the sync code to sample the line state on HW sync and re-inject a new interrupt if it is still pending at that time. Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 81 ++++++++++++++++++++++++------------ virt/kvm/arm/vgic.c | 102 ++++++++++++---------------------------------- 2 files changed, 82 insertions(+), 101 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 32095fbb5d7c..523816d8c402 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -59,18 +59,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) } } -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) -{ - int ret; - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - kvm_vgic_set_phys_irq_active(timer->map, true); - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, - timer->map, - timer->irq->level); - WARN_ON(ret); -} - static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; @@ -116,8 +104,7 @@ static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) && - !kvm_vgic_get_phys_irq_active(timer->map); + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); } bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) @@ -134,6 +121,41 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) +{ + int ret; + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + BUG_ON(!vgic_initialized(vcpu->kvm)); + + timer->irq.level = new_level; + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, + timer->map, + timer->irq.level); + WARN_ON(ret); +} + +/* + * Check if there was a change in the timer state (should we raise or lower + * the line level to the GIC). + */ +static void kvm_timer_update_state(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * If userspace modified the timer registers via SET_ONE_REG before + * the vgic was initialized, we mustn't set the timer->irq.level value + * because the guest would never see the interrupt. Instead wait + * until we call this function from kvm_timer_flush_hwstate. + */ + if (!vgic_initialized(vcpu->kvm)) + return; + + if (kvm_timer_should_fire(vcpu) != timer->irq.level) + kvm_timer_update_irq(vcpu, !timer->irq.level); +} + /* * Schedule the background timer before calling kvm_vcpu_block, so that this * thread is removed from its waitqueue and made runnable when there's a timer @@ -192,17 +214,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) bool phys_active; int ret; - if (kvm_timer_should_fire(vcpu)) - kvm_timer_inject_irq(vcpu); + kvm_timer_update_state(vcpu); /* - * We keep track of whether the edge-triggered interrupt has been - * signalled to the vgic/guest, and if so, we mask the interrupt and - * the physical distributor to prevent the timer from raising a - * physical interrupt whenever we run a guest, preventing forward - * VCPU progress. + * If we enter the guest with the virtual input level to the VGIC + * asserted, then we have already told the VGIC what we need to, and + * we don't need to exit from the guest until the guest deactivates + * the already injected interrupt, so therefore we should set the + * hardware active state to prevent unnecessary exits from the guest. + * + * Conversely, if the virtual input level is deasserted, then always + * clear the hardware active state to ensure that hardware interrupts + * from the timer triggers a guest exit. */ - if (kvm_vgic_get_phys_irq_active(timer->map)) + if (timer->irq.level) phys_active = true; else phys_active = false; @@ -226,8 +251,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) BUG_ON(timer_is_armed(timer)); - if (kvm_timer_should_fire(vcpu)) - kvm_timer_inject_irq(vcpu); + /* + * The guest could have modified the timer registers or the timer + * could have expired, update the timer state. + */ + kvm_timer_update_state(vcpu); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, @@ -242,7 +270,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * kvm_vcpu_set_target(). To handle this, we determine * vcpu timer irq number when the vcpu is reset. */ - timer->irq = irq; + timer->irq.irq = irq->irq; /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 @@ -251,6 +279,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * the ARMv7 architecture. */ timer->cntv_ctl = 0; + kvm_timer_update_state(vcpu); /* * Tell the VGIC that the virtual interrupt is tied to a @@ -295,6 +324,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) default: return -1; } + + kvm_timer_update_state(vcpu); return 0; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a44ecf9eca4e..3c2909c1bda3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -537,34 +537,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, return false; } -/* - * If a mapped interrupt's state has been modified by the guest such that it - * is no longer active or pending, without it have gone through the sync path, - * then the map->active field must be cleared so the interrupt can be taken - * again. - */ -static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct list_head *root; - struct irq_phys_map_entry *entry; - struct irq_phys_map *map; - - rcu_read_lock(); - - /* Check for PPIs */ - root = &vgic_cpu->irq_phys_map_list; - list_for_each_entry_rcu(entry, root, entry) { - map = &entry->map; - - if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) && - !vgic_irq_is_active(vcpu, map->virt_irq)) - map->active = false; - } - - rcu_read_unlock(); -} - bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id) @@ -595,7 +567,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, vcpu_id, offset); vgic_reg_access(mmio, reg, offset, mode); - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -633,7 +604,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -1443,29 +1413,37 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) /* * Save the physical active state, and reset it to inactive. * - * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. + * Return true if there's a pending level triggered interrupt line to queue. */ -static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) +static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct irq_phys_map *map; + bool phys_active; + bool level_pending; int ret; if (!(vlr.state & LR_HW)) - return 0; + return false; map = vgic_irq_map_search(vcpu, vlr.irq); BUG_ON(!map); ret = irq_get_irqchip_state(map->irq, IRQCHIP_STATE_ACTIVE, - &map->active); + &phys_active); WARN_ON(ret); - if (map->active) + if (phys_active) return 0; - return 1; + /* Mapped edge-triggered interrupts not yet supported. */ + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); + spin_lock(&dist->lock); + level_pending = process_level_irq(vcpu, lr, vlr); + spin_unlock(&dist->lock); + return level_pending; } /* Sync back the VGIC state after a guest run */ @@ -1490,18 +1468,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) continue; vlr = vgic_get_lr(vcpu, lr); - if (vgic_sync_hwirq(vcpu, vlr)) { - /* - * So this is a HW interrupt that the guest - * EOI-ed. Clean the LR state and allow the - * interrupt to be sampled again. - */ - vlr.state = 0; - vlr.hwirq = 0; - vgic_set_lr(vcpu, lr, vlr); - vgic_irq_clear_queued(vcpu, vlr.irq); - set_bit(lr, elrsr_ptr); - } + if (vgic_sync_hwirq(vcpu, lr, vlr)) + level_pending = true; if (!test_bit(lr, elrsr_ptr)) continue; @@ -1880,30 +1848,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) kfree(entry); } -/** - * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ - * - * Return the logical active state of a mapped interrupt. This doesn't - * necessarily reflects the current HW state. - */ -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) -{ - BUG_ON(!map); - return map->active; -} - -/** - * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ - * - * Set the logical active state of a mapped interrupt. This doesn't - * immediately affects the HW state. - */ -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) -{ - BUG_ON(!map); - map->active = active; -} - /** * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping * @vcpu: The VCPU pointer @@ -2129,17 +2073,23 @@ int vgic_init(struct kvm *kvm) } /* - * Enable all SGIs and configure all private IRQs as - * edge-triggered. + * Enable and configure all SGIs to be edge-triggere and + * configure all PPIs as level-triggered. */ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - if (i < VGIC_NR_SGIS) + if (i < VGIC_NR_SGIS) { + /* SGIs */ vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) vgic_bitmap_set_irq_val(&dist->irq_cfg, vcpu->vcpu_id, i, VGIC_CFG_EDGE); + } else if (i < VGIC_NR_PRIVATE_IRQS) { + /* PPIs */ + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_LEVEL); + } } vgic_enable(vcpu); -- cgit v1.2.3 From 8fe2f19e6e6015911bdd4cfcdb23a32e146ba570 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 4 Sep 2015 21:25:12 +0200 Subject: arm/arm64: KVM: Support edge-triggered forwarded interrupts We mark edge-triggered interrupts with the HW bit set as queued to prevent the VGIC code from injecting LRs with both the Active and Pending bits set at the same time while also setting the HW bit, because the hardware does not support this. However, this means that we must also clear the queued flag when we sync back a LR where the state on the physical distributor went from active to inactive because the guest deactivated the interrupt. At this point we must also check if the interrupt is pending on the distributor, and tell the VGIC to queue it again if it is. Since these actions on the sync path are extremely close to those for level-triggered interrupts, rename process_level_irq to process_queued_irq, allowing it to cater for both cases. Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 3c2909c1bda3..84abc6f38c1d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1313,13 +1313,10 @@ epilog: } } -static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) +static int process_queued_irq(struct kvm_vcpu *vcpu, + int lr, struct vgic_lr vlr) { - int level_pending = 0; - - vlr.state = 0; - vlr.hwirq = 0; - vgic_set_lr(vcpu, lr, vlr); + int pending = 0; /* * If the IRQ was EOIed (called from vgic_process_maintenance) or it @@ -1335,26 +1332,35 @@ static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); /* - * Tell the gic to start sampling the line of this interrupt again. + * Tell the gic to start sampling this interrupt again. */ vgic_irq_clear_queued(vcpu, vlr.irq); /* Any additional pending interrupt? */ - if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { - vgic_cpu_irq_set(vcpu, vlr.irq); - level_pending = 1; + if (vgic_irq_is_edge(vcpu, vlr.irq)) { + BUG_ON(!(vlr.state & LR_HW)); + pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); } else { - vgic_dist_irq_clear_pending(vcpu, vlr.irq); - vgic_cpu_irq_clear(vcpu, vlr.irq); + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); + pending = 1; + } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); + } } /* * Despite being EOIed, the LR may not have * been marked as empty. */ + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + vgic_sync_lr_elrsr(vcpu, lr, vlr); - return level_pending; + return pending; } static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) @@ -1391,7 +1397,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) vlr.irq - VGIC_NR_PRIVATE_IRQS); spin_lock(&dist->lock); - level_pending |= process_level_irq(vcpu, lr, vlr); + level_pending |= process_queued_irq(vcpu, lr, vlr); spin_unlock(&dist->lock); } } @@ -1413,7 +1419,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) /* * Save the physical active state, and reset it to inactive. * - * Return true if there's a pending level triggered interrupt line to queue. + * Return true if there's a pending forwarded interrupt to queue. */ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { @@ -1438,10 +1444,8 @@ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) if (phys_active) return 0; - /* Mapped edge-triggered interrupts not yet supported. */ - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); spin_lock(&dist->lock); - level_pending = process_level_irq(vcpu, lr, vlr); + level_pending = process_queued_irq(vcpu, lr, vlr); spin_unlock(&dist->lock); return level_pending; } -- cgit v1.2.3 From e21f09108754dfdfbb30e547f4edbd3b6884eedb Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 13:57:20 +0200 Subject: arm/arm64: KVM: Add tracepoints for vgic and timer The VGIC and timer code for KVM arm/arm64 doesn't have any tracepoints or tracepoint infrastructure defined. Rewriting some of the timer code handling showed me how much we need this, so let's add these simple trace points once and for all and we can easily expand with additional trace points in these files as we go along. Cc: Wei Huang Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 4 +++ virt/kvm/arm/trace.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic.c | 5 ++++ 3 files changed, 72 insertions(+) create mode 100644 virt/kvm/arm/trace.h (limited to 'virt/kvm') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 523816d8c402..21a0ab2d8919 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -28,6 +28,8 @@ #include #include +#include "trace.h" + static struct timecounter *timecounter; static struct workqueue_struct *wqueue; static unsigned int host_vtimer_irq; @@ -129,6 +131,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) BUG_ON(!vgic_initialized(vcpu->kvm)); timer->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, + timer->irq.level); ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, timer->map, timer->irq.level); diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h new file mode 100644 index 000000000000..37d8b98867d5 --- /dev/null +++ b/virt/kvm/arm/trace.h @@ -0,0 +1,63 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for vgic + */ +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( bool, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level: %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +/* + * Tracepoints for arch_timer + */ +TRACE_EVENT(kvm_timer_update_irq, + TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +#endif /* _TRACE_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 84abc6f38c1d..d4669eb29b77 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -34,6 +34,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + /* * How the whole thing works (courtesy of Christoffer Dall): * @@ -1574,6 +1577,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, int enabled; bool ret = true, can_inject = true; + trace_vgic_update_irq_pending(cpuid, irq_num, level); + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) return -EINVAL; -- cgit v1.2.3 From c4cd4c168b81dad53e659d18cdae653bc0ec2384 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:29 +0300 Subject: KVM: arm/arm64: Optimize away redundant LR tracking Currently we use vgic_irq_lr_map in order to track which LRs hold which IRQs, and lr_used bitmap in order to track which LRs are used or free. vgic_irq_lr_map is actually used only for piggy-back optimization, and can be easily replaced by iteration over lr_used. This is good because in future, when LPI support is introduced, number of IRQs will grow up to at least 16384, while numbers from 1024 to 8192 are never going to be used. This would be a huge memory waste. In its turn, lr_used is also completely redundant since ae705930fca6322600690df9dc1c7d0516145a93 ("arm/arm64: KVM: Keep elrsr/aisr in sync with software model"), because together with lr_used we also update elrsr. This allows to easily replace lr_used with elrsr, inverting all conditions (because in elrsr '1' means 'free'). Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic-v3.c | 1 + virt/kvm/arm/vgic.c | 53 ++++++++++++++------------------------------------ 3 files changed, 17 insertions(+), 38 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 8d7b04db8471..c0f5d7fad9ea 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -158,6 +158,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) * anyway. */ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; /* Get the show on the road... */ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 7dd5d62f10a1..92003cb61a0a 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -193,6 +193,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) * anyway. */ vgic_v3->vgic_vmcr = 0; + vgic_v3->vgic_elrsr = ~0; /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d4669eb29b77..265a41035728 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -108,6 +108,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); +static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, int virt_irq); static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); @@ -691,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int i; - for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr lr = vgic_get_lr(vcpu, i); /* @@ -1098,7 +1101,6 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); /* @@ -1112,8 +1114,6 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } @@ -1128,10 +1128,11 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) */ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1188,8 +1189,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, */ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); struct vgic_lr vlr; int lr; @@ -1200,28 +1202,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) kvm_debug("Queue IRQ%d\n", irq); - lr = vgic_cpu->vgic_irq_lr_map[irq]; - /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { vlr = vgic_get_lr(vcpu, lr); - if (vlr.source == sgi_source_id) { + if (vlr.irq == irq && vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } /* Try to use another LR for this interrupt */ - lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic->nr_lr); + lr = find_first_bit(elrsr_ptr, vgic->nr_lr); if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_irq_lr_map[irq] = lr; - set_bit(lr, vgic_cpu->lr_used); vlr.irq = irq; vlr.source = sgi_source_id; @@ -1456,7 +1452,6 @@ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) /* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; u64 elrsr; unsigned long *elrsr_ptr; @@ -1469,22 +1464,10 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Deal with HW interrupts, and clear mappings for empty LRs */ for (lr = 0; lr < vgic->nr_lr; lr++) { - struct vgic_lr vlr; - - if (!test_bit(lr, vgic_cpu->lr_used)) - continue; - - vlr = vgic_get_lr(vcpu, lr); - if (vgic_sync_hwirq(vcpu, lr, vlr)) - level_pending = true; - - if (!test_bit(lr, elrsr_ptr)) - continue; - - clear_bit(lr, vgic_cpu->lr_used); + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); + level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); BUG_ON(vlr.irq >= dist->nr_irqs); - vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ @@ -1912,12 +1895,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->pending_shared); kfree(vgic_cpu->active_shared); kfree(vgic_cpu->pend_act_shared); - kfree(vgic_cpu->vgic_irq_lr_map); vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); vgic_cpu->pending_shared = NULL; vgic_cpu->active_shared = NULL; vgic_cpu->pend_act_shared = NULL; - vgic_cpu->vgic_irq_lr_map = NULL; } static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) @@ -1928,18 +1909,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->active_shared - || !vgic_cpu->pend_act_shared - || !vgic_cpu->vgic_irq_lr_map) { + || !vgic_cpu->pend_act_shared) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); - /* * Store the number of LRs per vcpu, so we don't have to go * all the way to the distributor structure to find out. Only -- cgit v1.2.3 From 212c76545dde8370ebde2a170e4f8e1ed8441dc0 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:30 +0300 Subject: KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings 1. Remove unnecessary 'irq' argument, because irq number can be retrieved from the LR. 2. Since cff9211eb1a1f58ce7f5a2d596b617928fd4be0e ("arm/arm64: KVM: Fix arch timer behavior for disabled interrupts ") LR_STATE_PENDING is queued back by vgic_retire_lr() itself. Also, it clears vlr.state itself. Therefore, we remove the same, now duplicated, check with all accompanying bit manipulations from vgic_unqueue_irqs(). 3. vgic_retire_lr() is always accompanied by vgic_irq_clear_queued(). Since it already does more than just clearing the LR, move vgic_irq_clear_queued() inside of it. Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 265a41035728..96e45f3da534 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -105,7 +105,7 @@ #include "vgic.h" static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); +static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); @@ -717,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * interrupt then move the active state to the * distributor tracking bit. */ - if (lr.state & LR_STATE_ACTIVE) { + if (lr.state & LR_STATE_ACTIVE) vgic_irq_set_active(vcpu, lr.irq); - lr.state &= ~LR_STATE_ACTIVE; - } /* * Reestablish the pending state on the distributor and the - * CPU interface. It may have already been pending, but that - * is fine, then we are only setting a few bits that were - * already set. + * CPU interface and mark the LR as free for other use. */ - if (lr.state & LR_STATE_PENDING) { - vgic_dist_irq_set_pending(vcpu, lr.irq); - lr.state &= ~LR_STATE_PENDING; - } - - vgic_set_lr(vcpu, i, lr); - - /* - * Mark the LR as free for other use. - */ - BUG_ON(lr.state & LR_STATE_MASK); - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); + vgic_retire_lr(i, vcpu); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -1099,16 +1083,18 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) vgic_ops->enable(vcpu); } -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) +static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + vgic_irq_clear_queued(vcpu, vlr.irq); + /* * We must transfer the pending state back to the distributor before * retiring the LR, otherwise we may loose edge-triggered interrupts. */ if (vlr.state & LR_STATE_PENDING) { - vgic_dist_irq_set_pending(vcpu, irq); + vgic_dist_irq_set_pending(vcpu, vlr.irq); vlr.hwirq = 0; } @@ -1135,11 +1121,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { - vgic_retire_lr(lr, vlr.irq, vcpu); - if (vgic_irq_is_queued(vcpu, vlr.irq)) - vgic_irq_clear_queued(vcpu, vlr.irq); - } + if (!vgic_irq_is_enabled(vcpu, vlr.irq)) + vgic_retire_lr(lr, vcpu); } } -- cgit v1.2.3 From 26caea7693cb99833fe4ecc544c842289d6b3f69 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:31 +0300 Subject: KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr() Now we see that vgic_set_lr() and vgic_sync_lr_elrsr() are always used together. Merge them into one function, saving from second vgic_ops dereferencing every time. Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic-v2.c | 5 ----- virt/kvm/arm/vgic-v3.c | 5 ----- virt/kvm/arm/vgic.c | 14 ++------------ 3 files changed, 2 insertions(+), 22 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index c0f5d7fad9ea..ff02f08df74d 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); else @@ -167,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v2_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .clear_eisr = vgic_v2_clear_eisr, diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 92003cb61a0a..487d6357b7e7 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, } vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; -} -static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); else @@ -212,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v3_ops = { .get_lr = vgic_v3_get_lr, .set_lr = vgic_v3_set_lr, - .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, .get_elrsr = vgic_v3_get_elrsr, .get_eisr = vgic_v3_get_eisr, .clear_eisr = vgic_v3_clear_eisr, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 96e45f3da534..fe451d4885ae 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1032,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops->set_lr(vcpu, lr, vlr); } -static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr vlr) -{ - vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); -} - static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { return vgic_ops->get_elrsr(vcpu); @@ -1100,7 +1094,6 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -1162,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, } vgic_set_lr(vcpu, lr_nr, vlr); - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -1340,8 +1332,6 @@ static int process_queued_irq(struct kvm_vcpu *vcpu, vlr.hwirq = 0; vgic_set_lr(vcpu, lr, vlr); - vgic_sync_lr_elrsr(vcpu, lr, vlr); - return pending; } @@ -1442,8 +1432,6 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) bool level_pending; level_pending = vgic_process_maintenance(vcpu); - elrsr = vgic_get_elrsr(vcpu); - elrsr_ptr = u64_to_bitmask(&elrsr); /* Deal with HW interrupts, and clear mappings for empty LRs */ for (lr = 0; lr < vgic->nr_lr; lr++) { @@ -1454,6 +1442,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = u64_to_bitmask(&elrsr); pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); -- cgit v1.2.3 From 6956d8946d5d1cb2ac913caa8d4259a4d0e00c48 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Oct 2015 04:37:18 -0600 Subject: KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs The symbol was missing a KVM dependency. Signed-off-by: Jan Beulich Signed-off-by: Paolo Bonzini --- virt/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt/kvm') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 9f8014dda2cf..7a79b6853583 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -46,7 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y - depends on COMPAT && !S390 + depends on KVM && COMPAT && !S390 config HAVE_KVM_IRQ_BYPASS bool -- cgit v1.2.3 From b97e6de9c96cefaa02a6a7464731ea504b45e150 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 19:16:47 +0100 Subject: KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not want to do too much work in atomic context, in particular not walking all the VCPUs of the virtual machine. So we want to distinguish the architecture-specific injection function for irqfd from kvm_set_msi. Since it's still empty, reuse the newly added kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'virt/kvm') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index e29fd2640709..46dbc0a7dfc1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -171,7 +171,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } -int __attribute__((weak)) kvm_arch_set_irq( +int __attribute__((weak)) kvm_arch_set_irq_inatomic( struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, int irq_source_id, int level, @@ -201,12 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = irqfd->irq_entry; } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq.type == KVM_IRQ_ROUTING_MSI) - kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false); - else if (kvm_arch_set_irq(&irq, kvm, - KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false) == -EWOULDBLOCK) + if (kvm_arch_set_irq_inatomic(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3