From b053b2aef25d00773fa6762dcd4b7f5c9c42d171 Mon Sep 17 00:00:00 2001
From: Steve Rutherford <srutherford@google.com>
Date: Wed, 29 Jul 2015 23:32:35 -0700
Subject: KVM: x86: Add EOI exit bitmap inference

In order to support a userspace IOAPIC interacting with an in kernel
APIC, the EOI exit bitmaps need to be configurable.

If the IOAPIC is in userspace (i.e. the irqchip has been split), the
EOI exit bitmaps will be set whenever the GSI Routes are configured.
In particular, for the low MSI routes are reservable for userspace
IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the
destination vector of the route will be set for the destination VCPU.

The intention is for the userspace IOAPICs to use the reservable MSI
routes to inject interrupts into the guest.

This is a slight abuse of the notion of an MSI Route, given that MSIs
classically bypass the IOAPIC. It might be worthwhile to add an
additional route type to improve clarity.

Compile tested for Intel x86.

Signed-off-by: Steve Rutherford <srutherford@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/irqchip.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index d7ea8e20dae4..716a1c4db528 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -31,16 +31,6 @@
 #include <trace/events/kvm.h>
 #include "irq.h"
 
-struct kvm_irq_routing_table {
-	int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS];
-	u32 nr_rt_entries;
-	/*
-	 * Array indexed by gsi. Each entry contains list of irq chips
-	 * the gsi is connected to.
-	 */
-	struct hlist_head map[0];
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
 {
@@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	kvm_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
+	kvm_arch_irq_routing_update(kvm);
+
 	synchronize_srcu_expedited(&kvm->irq_srcu);
 
 	new = old;
-- 
cgit v1.2.3


From d3febddde9c7a959dbb189a700e937db50fad4d6 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 25 Aug 2015 17:05:46 +0800
Subject: kvm: use kmalloc() instead of kzalloc() during iodev
 register/unregister

All fields of kvm_io_range were initialized or copied explicitly
afterwards. So switch to use kmalloc().

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8db1d9361993..23116dcb2129 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3341,7 +3341,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 	if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1)
 		return -ENOSPC;
 
-	new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) *
+	new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) *
 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
 	if (!new_bus)
 		return -ENOMEM;
@@ -3373,7 +3373,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	if (r)
 		return r;
 
-	new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) *
+	new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) *
 			  sizeof(struct kvm_io_range)), GFP_KERNEL);
 	if (!new_bus)
 		return -ENOMEM;
-- 
cgit v1.2.3


From e9ea5069d9e569c32ab913c39467df32e056b3a7 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 15 Sep 2015 14:41:59 +0800
Subject: kvm: add capability for any-length ioeventfds

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c  | 4 +---
 virt/kvm/kvm_main.c | 1 +
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 79db45336e3a..ac89299b8699 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -914,9 +914,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 		return -EINVAL;
 
 	/* ioeventfd with no length can't be combined with DATAMATCH */
-	if (!args->len &&
-	    args->flags & (KVM_IOEVENTFD_FLAG_PIO |
-			   KVM_IOEVENTFD_FLAG_DATAMATCH))
+	if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
 		return -EINVAL;
 
 	ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 23116dcb2129..afd7ae6aec65 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2718,6 +2718,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 	case KVM_CAP_IRQFD:
 	case KVM_CAP_IRQFD_RESAMPLE:
 #endif
+	case KVM_CAP_IOEVENTFD_ANY_LENGTH:
 	case KVM_CAP_CHECK_EXTENSION_VM:
 		return 1;
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-- 
cgit v1.2.3


From 166c9775f1f8b8f00ad1db0fa5c8fc74059d965d Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 18 Sep 2015 22:29:42 +0800
Subject: KVM: create kvm_irqfd.h

Move _irqfd_resampler and _irqfd struct declarations in a new
public header: kvm_irqfd.h. They are respectively renamed into
kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes
will be used by architecture specific code, in the context of
IRQ bypass manager integration.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 95 +++++++++++++-----------------------------------------
 1 file changed, 23 insertions(+), 72 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index ac89299b8699..413f5a6b61ba 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -23,6 +23,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
+#include <linux/kvm_irqfd.h>
 #include <linux/workqueue.h>
 #include <linux/syscalls.h>
 #include <linux/wait.h>
@@ -39,68 +40,14 @@
 #include <kvm/iodev.h>
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
-/*
- * --------------------------------------------------------------------
- * irqfd: Allows an fd to be used to inject an interrupt to the guest
- *
- * Credit goes to Avi Kivity for the original idea.
- * --------------------------------------------------------------------
- */
-
-/*
- * Resampling irqfds are a special variety of irqfds used to emulate
- * level triggered interrupts.  The interrupt is asserted on eventfd
- * trigger.  On acknowledgement through the irq ack notifier, the
- * interrupt is de-asserted and userspace is notified through the
- * resamplefd.  All resamplers on the same gsi are de-asserted
- * together, so we don't need to track the state of each individual
- * user.  We can also therefore share the same irq source ID.
- */
-struct _irqfd_resampler {
-	struct kvm *kvm;
-	/*
-	 * List of resampling struct _irqfd objects sharing this gsi.
-	 * RCU list modified under kvm->irqfds.resampler_lock
-	 */
-	struct list_head list;
-	struct kvm_irq_ack_notifier notifier;
-	/*
-	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
-	 * resamplers among irqfds on the same gsi.
-	 * Accessed and modified under kvm->irqfds.resampler_lock
-	 */
-	struct list_head link;
-};
-
-struct _irqfd {
-	/* Used for MSI fast-path */
-	struct kvm *kvm;
-	wait_queue_t wait;
-	/* Update side is protected by irqfds.lock */
-	struct kvm_kernel_irq_routing_entry irq_entry;
-	seqcount_t irq_entry_sc;
-	/* Used for level IRQ fast-path */
-	int gsi;
-	struct work_struct inject;
-	/* The resampler used by this irqfd (resampler-only) */
-	struct _irqfd_resampler *resampler;
-	/* Eventfd notified on resample (resampler-only) */
-	struct eventfd_ctx *resamplefd;
-	/* Entry in list of irqfds for a resampler (resampler-only) */
-	struct list_head resampler_link;
-	/* Used for setup/shutdown */
-	struct eventfd_ctx *eventfd;
-	struct list_head list;
-	poll_table pt;
-	struct work_struct shutdown;
-};
 
 static struct workqueue_struct *irqfd_cleanup_wq;
 
 static void
 irqfd_inject(struct work_struct *work)
 {
-	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(work, struct kvm_kernel_irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
 
 	if (!irqfd->resampler) {
@@ -121,12 +68,13 @@ irqfd_inject(struct work_struct *work)
 static void
 irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 {
-	struct _irqfd_resampler *resampler;
+	struct kvm_kernel_irqfd_resampler *resampler;
 	struct kvm *kvm;
-	struct _irqfd *irqfd;
+	struct kvm_kernel_irqfd *irqfd;
 	int idx;
 
-	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+	resampler = container_of(kian,
+			struct kvm_kernel_irqfd_resampler, notifier);
 	kvm = resampler->kvm;
 
 	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
@@ -141,9 +89,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
 }
 
 static void
-irqfd_resampler_shutdown(struct _irqfd *irqfd)
+irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
 {
-	struct _irqfd_resampler *resampler = irqfd->resampler;
+	struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
 	struct kvm *kvm = resampler->kvm;
 
 	mutex_lock(&kvm->irqfds.resampler_lock);
@@ -168,7 +116,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
 static void
 irqfd_shutdown(struct work_struct *work)
 {
-	struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(work, struct kvm_kernel_irqfd, shutdown);
 	u64 cnt;
 
 	/*
@@ -198,7 +147,7 @@ irqfd_shutdown(struct work_struct *work)
 
 /* assumes kvm->irqfds.lock is held */
 static bool
-irqfd_is_active(struct _irqfd *irqfd)
+irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
 {
 	return list_empty(&irqfd->list) ? false : true;
 }
@@ -209,7 +158,7 @@ irqfd_is_active(struct _irqfd *irqfd)
  * assumes kvm->irqfds.lock is held
  */
 static void
-irqfd_deactivate(struct _irqfd *irqfd)
+irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 {
 	BUG_ON(!irqfd_is_active(irqfd));
 
@@ -224,7 +173,8 @@ irqfd_deactivate(struct _irqfd *irqfd)
 static int
 irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 {
-	struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(wait, struct kvm_kernel_irqfd, wait);
 	unsigned long flags = (unsigned long)key;
 	struct kvm_kernel_irq_routing_entry irq;
 	struct kvm *kvm = irqfd->kvm;
@@ -274,12 +224,13 @@ static void
 irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
 			poll_table *pt)
 {
-	struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt);
+	struct kvm_kernel_irqfd *irqfd =
+		container_of(pt, struct kvm_kernel_irqfd, pt);
 	add_wait_queue(wqh, &irqfd->wait);
 }
 
 /* Must be called under irqfds.lock */
-static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
+static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
@@ -304,7 +255,7 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd)
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct _irqfd *irqfd, *tmp;
+	struct kvm_kernel_irqfd *irqfd, *tmp;
 	struct fd f;
 	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
@@ -340,7 +291,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	irqfd->eventfd = eventfd;
 
 	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
-		struct _irqfd_resampler *resampler;
+		struct kvm_kernel_irqfd_resampler *resampler;
 
 		resamplefd = eventfd_ctx_fdget(args->resamplefd);
 		if (IS_ERR(resamplefd)) {
@@ -525,7 +476,7 @@ kvm_eventfd_init(struct kvm *kvm)
 static int
 kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	struct _irqfd *irqfd, *tmp;
+	struct kvm_kernel_irqfd *irqfd, *tmp;
 	struct eventfd_ctx *eventfd;
 
 	eventfd = eventfd_ctx_fdget(args->fd);
@@ -581,7 +532,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 void
 kvm_irqfd_release(struct kvm *kvm)
 {
-	struct _irqfd *irqfd, *tmp;
+	struct kvm_kernel_irqfd *irqfd, *tmp;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
@@ -604,7 +555,7 @@ kvm_irqfd_release(struct kvm *kvm)
  */
 void kvm_irq_routing_update(struct kvm *kvm)
 {
-	struct _irqfd *irqfd;
+	struct kvm_kernel_irqfd *irqfd;
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-- 
cgit v1.2.3


From 1a02b27035f82091d51ecafcb9ccaac1f31d4eb2 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 18 Sep 2015 22:29:43 +0800
Subject: KVM: introduce kvm_arch functions for IRQ bypass

This patch introduces
- kvm_arch_irq_bypass_add_producer
- kvm_arch_irq_bypass_del_producer
- kvm_arch_irq_bypass_stop
- kvm_arch_irq_bypass_start

They make possible to specialize the KVM IRQ bypass consumer in
case CONFIG_KVM_HAVE_IRQ_BYPASS is set.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
[Add weak implementations of the callbacks. - Feng]
Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/Kconfig   |  3 +++
 virt/kvm/eventfd.c | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'virt/kvm')

diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index e2c876d5a03b..9f8014dda2cf 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -47,3 +47,6 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 config KVM_COMPAT
        def_bool y
        depends on COMPAT && !S390
+
+config HAVE_KVM_IRQ_BYPASS
+       bool
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 413f5a6b61ba..c4f7abec4261 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -252,6 +252,18 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
 	write_seqcount_end(&irqfd->irq_entry_sc);
 }
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+void __attribute__((weak)) kvm_arch_irq_bypass_stop(
+				struct irq_bypass_consumer *cons)
+{
+}
+
+void __attribute__((weak)) kvm_arch_irq_bypass_start(
+				struct irq_bypass_consumer *cons)
+{
+}
+#endif
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 {
-- 
cgit v1.2.3


From 9016cfb577a15abd6a7990890ccf6bf1edf04d31 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@linaro.org>
Date: Fri, 18 Sep 2015 22:29:44 +0800
Subject: KVM: eventfd: add irq bypass consumer management

This patch adds the registration/unregistration of an
irq_bypass_consumer on irqfd assignment/deassignment.

Signed-off-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c4f7abec4261..7df356d8f1fd 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
 #include <linux/srcu.h>
 #include <linux/slab.h>
 #include <linux/seqlock.h>
+#include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 
 #include <kvm/iodev.h>
@@ -140,6 +141,9 @@ irqfd_shutdown(struct work_struct *work)
 	/*
 	 * It is now safe to release the object's resources
 	 */
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+	irq_bypass_unregister_consumer(&irqfd->consumer);
+#endif
 	eventfd_ctx_put(irqfd->eventfd);
 	kfree(irqfd);
 }
@@ -391,6 +395,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	 * we might race against the POLLHUP
 	 */
 	fdput(f);
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+	irqfd->consumer.token = (void *)irqfd->eventfd;
+	irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+	irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+	irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
+	irqfd->consumer.start = kvm_arch_irq_bypass_start;
+	ret = irq_bypass_register_consumer(&irqfd->consumer);
+	if (ret)
+		pr_info("irq bypass consumer (token %p) registration fails: %d\n",
+				irqfd->consumer.token, ret);
+#endif
 
 	return 0;
 
-- 
cgit v1.2.3


From f70c20aaf141adb715a2d750c55154073b02a9c3 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@intel.com>
Date: Fri, 18 Sep 2015 22:29:53 +0800
Subject: KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd'

This patch adds an arch specific hooks 'arch_update' in
'struct kvm_kernel_irqfd'. On Intel side, it is used to
update the IRTE when VT-d posted-interrupts is used.

Signed-off-by: Feng Wu <feng.wu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 7df356d8f1fd..b637965746bb 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -266,6 +266,13 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start(
 				struct irq_bypass_consumer *cons)
 {
 }
+
+int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
+				struct kvm *kvm, unsigned int host_irq,
+				uint32_t guest_irq, bool set)
+{
+	return 0;
+}
 #endif
 
 static int
@@ -586,9 +593,19 @@ void kvm_irq_routing_update(struct kvm *kvm)
 
 	spin_lock_irq(&kvm->irqfds.lock);
 
-	list_for_each_entry(irqfd, &kvm->irqfds.items, list)
+	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
 		irqfd_update(kvm, irqfd);
 
+#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
+		if (irqfd->producer) {
+			int ret = kvm_arch_update_irqfd_routing(
+					irqfd->kvm, irqfd->producer->irq,
+					irqfd->gsi, 1);
+			WARN_ON(ret);
+		}
+#endif
+	}
+
 	spin_unlock_irq(&kvm->irqfds.lock);
 }
 
-- 
cgit v1.2.3


From bf9f6ac8d74969690df1485b33b7c238ca9f2269 Mon Sep 17 00:00:00 2001
From: Feng Wu <feng.wu@intel.com>
Date: Fri, 18 Sep 2015 22:29:55 +0800
Subject: KVM: Update Posted-Interrupts Descriptor when vCPU is blocked

This patch updates the Posted-Interrupts Descriptor when vCPU
is blocked.

pre-block:
- Add the vCPU to the blocked per-CPU list
- Set 'NV' to POSTED_INTR_WAKEUP_VECTOR

post-block:
- Remove the vCPU from the per-CPU list

Signed-off-by: Feng Wu <feng.wu@intel.com>
[Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index afd7ae6aec65..a75502c93c3e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 	init_waitqueue_head(&vcpu->wq);
 	kvm_async_pf_vcpu_init(vcpu);
 
+	vcpu->pre_pcpu = -1;
+	INIT_LIST_HEAD(&vcpu->blocked_vcpu_list);
+
 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 	if (!page) {
 		r = -ENOMEM;
-- 
cgit v1.2.3


From 6003a4201077da41427dae5e71bb2f31dfdc0c10 Mon Sep 17 00:00:00 2001
From: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
Date: Fri, 9 Oct 2015 12:21:55 +0000
Subject: kvm: fix waitqueue_active without memory barrier in
 virt/kvm/async_pf.c

async_pf_execute() seems to be missing a memory barrier which might
cause the waker to not notice the waiter and miss sending a wake_up as
in the following figure.

        async_pf_execute                    kvm_vcpu_block
------------------------------------------------------------------------
spin_lock(&vcpu->async_pf.lock);
if (waitqueue_active(&vcpu->wq))
/* The CPU might reorder the test for
   the waitqueue up here, before
   prior writes complete */
                                    prepare_to_wait(&vcpu->wq, &wait,
                                      TASK_INTERRUPTIBLE);
                                    /*if (kvm_vcpu_check_block(vcpu) < 0) */
                                     /*if (kvm_arch_vcpu_runnable(vcpu)) { */
                                      ...
                                      return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                                        !vcpu->arch.apf.halted)
                                        || !list_empty_careful(&vcpu->async_pf.done)
                                     ...
                                     return 0;
list_add_tail(&apf->link,
  &vcpu->async_pf.done);
spin_unlock(&vcpu->async_pf.lock);
                                    waited = true;
                                    schedule();
------------------------------------------------------------------------

The attached patch adds the missing memory barrier.

I found this issue when I was looking through the linux source code
for places calling waitqueue_active() before wake_up*(), but without
preceding memory barriers, after sending a patch to fix a similar
issue in drivers/tty/n_tty.c  (Details about the original issue can be
found here: https://lkml.org/lkml/2015/9/28/849).

Signed-off-by: Kosuke Tatsukawa <tatsu@ab.jp.nec.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/async_pf.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'virt/kvm')

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 44660aee335f..77d42be6970e 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work)
 
 	trace_kvm_async_pf_completed(addr, gva);
 
+	/*
+	 * This memory barrier pairs with prepare_to_wait's set_current_state()
+	 */
+	smp_mb();
 	if (waitqueue_active(&vcpu->wq))
 		wake_up_interruptible(&vcpu->wq);
 
-- 
cgit v1.2.3


From 351dc6477cd35136ce4668401b1b1332a62908a8 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Fri, 16 Oct 2015 10:07:45 +0300
Subject: kvm/eventfd: avoid loop inside irqfd_update()

The loop(for) inside irqfd_update() is unnecessary
because any other value for irq_entry.type will just trigger
schedule_work(&irqfd->inject) in irqfd_wakeup.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b637965746bb..518421e65b0d 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -238,20 +238,17 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
 {
 	struct kvm_kernel_irq_routing_entry *e;
 	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
-	int i, n_entries;
+	int n_entries;
 
 	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
 
 	write_seqcount_begin(&irqfd->irq_entry_sc);
 
-	irqfd->irq_entry.type = 0;
-
 	e = entries;
-	for (i = 0; i < n_entries; ++i, ++e) {
-		/* Only fast-path MSI. */
-		if (e->type == KVM_IRQ_ROUTING_MSI)
-			irqfd->irq_entry = *e;
-	}
+	if (n_entries == 1)
+		irqfd->irq_entry = *e;
+	else
+		irqfd->irq_entry.type = 0;
 
 	write_seqcount_end(&irqfd->irq_entry_sc);
 }
-- 
cgit v1.2.3


From ba1aefcd6db5536d3eb3ca3ce7bd6786960140ea Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Fri, 16 Oct 2015 10:07:46 +0300
Subject: kvm/eventfd: factor out kvm_notify_acked_gsi()

Factor out kvm_notify_acked_gsi() helper to iterate over EOI listeners
and notify those matching the given gsi.

It will be reused in the upcoming Hyper-V SynIC implementation.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 518421e65b0d..f6b986a41823 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -451,9 +451,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
 }
 EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
 
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
 {
 	struct kvm_irq_ack_notifier *kian;
+
+	hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
+				 link)
+		if (kian->gsi == gsi)
+			kian->irq_acked(kian);
+}
+
+void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
+{
 	int gsi, idx;
 
 	trace_kvm_ack_irq(irqchip, pin);
@@ -461,10 +470,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
-					 link)
-			if (kian->gsi == gsi)
-				kian->irq_acked(kian);
+		kvm_notify_acked_gsi(kvm, gsi);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
-- 
cgit v1.2.3


From c9a5eccac1abf50649949f15754a7635f263a1ff Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Fri, 16 Oct 2015 10:07:47 +0300
Subject: kvm/eventfd: add arch-specific set_irq

Allow for arch-specific interrupt types to be set.  For that, add
kvm_arch_set_irq() which takes interrupt type-specific action if it
recognizes the interrupt type given, and -EWOULDBLOCK otherwise.

The default implementation always returns -EWOULDBLOCK.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index f6b986a41823..e29fd2640709 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -171,6 +171,15 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
 }
 
+int __attribute__((weak)) kvm_arch_set_irq(
+				struct kvm_kernel_irq_routing_entry *irq,
+				struct kvm *kvm, int irq_source_id,
+				int level,
+				bool line_status)
+{
+	return -EWOULDBLOCK;
+}
+
 /*
  * Called with wqh->lock held and interrupts disabled
  */
@@ -195,7 +204,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 		if (irq.type == KVM_IRQ_ROUTING_MSI)
 			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
 					false);
-		else
+		else if (kvm_arch_set_irq(&irq, kvm,
+					  KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+					  false) == -EWOULDBLOCK)
 			schedule_work(&irqfd->inject);
 		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
-- 
cgit v1.2.3


From f33143d80907602deb1b96db42da93507ed03b31 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Fri, 16 Oct 2015 10:07:48 +0300
Subject: kvm/irqchip: allow only multiple irqchip routes per GSI

Any other irq routing types (MSI, S390_ADAPTER, upcoming Hyper-V
SynIC) map one-to-one to GSI.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/irqchip.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 716a1c4db528..f0b08a2a48ba 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -144,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 
 	/*
 	 * Do not allow GSI to be mapped to the same irqchip more than once.
-	 * Allow only one to one mapping between GSI and MSI.
+	 * Allow only one to one mapping between GSI and non-irqchip routing.
 	 */
 	hlist_for_each_entry(ei, &rt->map[ue->gsi], link)
-		if (ei->type == KVM_IRQ_ROUTING_MSI ||
-		    ue->type == KVM_IRQ_ROUTING_MSI ||
+		if (ei->type != KVM_IRQ_ROUTING_IRQCHIP ||
+		    ue->type != KVM_IRQ_ROUTING_IRQCHIP ||
 		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
 			return r;
 
-- 
cgit v1.2.3


From 3217f7c25bca66eed9b07f0b8bfd1937169b0736 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 27 Aug 2015 16:41:15 +0200
Subject: KVM: Add kvm_arch_vcpu_{un}blocking callbacks

Some times it is useful for architecture implementations of KVM to know
when the VCPU thread is about to block or when it comes back from
blocking (arm/arm64 needs to know this to properly implement timers, for
example).

Therefore provide a generic architecture callback function in line with
what we do elsewhere for KVM generic-arch interactions.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/kvm_main.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'virt/kvm')

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8db1d9361993..7873d6daccb1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2018,6 +2018,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 		} while (single_task_running() && ktime_before(cur, stop));
 	}
 
+	kvm_arch_vcpu_blocking(vcpu);
+
 	for (;;) {
 		prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
 
@@ -2031,6 +2033,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	finish_wait(&vcpu->wq, &wait);
 	cur = ktime_get();
 
+	kvm_arch_vcpu_unblocking(vcpu);
 out:
 	block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
 
-- 
cgit v1.2.3


From d35268da66870d733ae763fd7f9b06a1f63f395e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 25 Aug 2015 19:48:21 +0200
Subject: arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block

We currently schedule a soft timer every time we exit the guest if the
timer did not expire while running the guest.  This is really not
necessary, because the only work we do in the timer work function is to
kick the vcpu.

Kicking the vcpu does two things:
(1) If the vpcu thread is on a waitqueue, make it runnable and remove it
from the waitqueue.
(2) If the vcpu is running on a different physical CPU from the one
doing the kick, it sends a reschedule IPI.

The second case cannot happen, because the soft timer is only ever
scheduled when the vcpu is not running.  The first case is only relevant
when the vcpu thread is on a waitqueue, which is only the case when the
vcpu thread has called kvm_vcpu_block().

Therefore, we only need to make sure a timer is scheduled for
kvm_vcpu_block(), which we do by encapsulating all calls to
kvm_vcpu_block() with kvm_timer_{un}schedule calls.

Additionally, we only schedule a soft timer if the timer is enabled and
unmasked, since it is useless otherwise.

Note that theoretically userspace can use the SET_ONE_REG interface to
change registers that should cause the timer to fire, even if the vcpu
is blocked without a scheduled timer, but this case was not supported
before this patch and we leave it for future work for now.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 94 ++++++++++++++++++++++++++++++-----------------
 1 file changed, 60 insertions(+), 34 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index b9d3a32cbc04..32095fbb5d7c 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -111,14 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
 	return HRTIMER_NORESTART;
 }
 
+static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
+		(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) &&
+		!kvm_vgic_get_phys_irq_active(timer->map);
+}
+
 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	cycle_t cval, now;
 
-	if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
-	    !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
-	    kvm_vgic_get_phys_irq_active(timer->map))
+	if (!kvm_timer_irq_can_fire(vcpu))
 		return false;
 
 	cval = timer->cntv_cval;
@@ -127,12 +134,57 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 	return cval <= now;
 }
 
+/*
+ * Schedule the background timer before calling kvm_vcpu_block, so that this
+ * thread is removed from its waitqueue and made runnable when there's a timer
+ * interrupt to handle.
+ */
+void kvm_timer_schedule(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	u64 ns;
+	cycle_t cval, now;
+
+	BUG_ON(timer_is_armed(timer));
+
+	/*
+	 * No need to schedule a background timer if the guest timer has
+	 * already expired, because kvm_vcpu_block will return before putting
+	 * the thread to sleep.
+	 */
+	if (kvm_timer_should_fire(vcpu))
+		return;
+
+	/*
+	 * If the timer is not capable of raising interrupts (disabled or
+	 * masked), then there's no more work for us to do.
+	 */
+	if (!kvm_timer_irq_can_fire(vcpu))
+		return;
+
+	/*  The timer has not yet expired, schedule a background timer */
+	cval = timer->cntv_cval;
+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+	ns = cyclecounter_cyc2ns(timecounter->cc,
+				 cval - now,
+				 timecounter->mask,
+				 &timecounter->frac);
+	timer_arm(timer, ns);
+}
+
+void kvm_timer_unschedule(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	timer_disarm(timer);
+}
+
 /**
  * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu
  * @vcpu: The vcpu pointer
  *
- * Disarm any pending soft timers, since the world-switch code will write the
- * virtual timer state back to the physical CPU.
+ * Check if the virtual timer has expired while we were running in the host,
+ * and inject an interrupt if that was the case.
  */
 void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 {
@@ -140,17 +192,6 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 	bool phys_active;
 	int ret;
 
-	/*
-	 * We're about to run this vcpu again, so there is no need to
-	 * keep the background timer running, as we're about to
-	 * populate the CPU timer again.
-	 */
-	timer_disarm(timer);
-
-	/*
-	 * If the timer expired while we were not scheduled, now is the time
-	 * to inject it.
-	 */
 	if (kvm_timer_should_fire(vcpu))
 		kvm_timer_inject_irq(vcpu);
 
@@ -176,32 +217,17 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
  * kvm_timer_sync_hwstate - sync timer state from cpu
  * @vcpu: The vcpu pointer
  *
- * Check if the virtual timer was armed and either schedule a corresponding
- * soft timer or inject directly if already expired.
+ * Check if the virtual timer has expired while we were running in the guest,
+ * and inject an interrupt if that was the case.
  */
 void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-	cycle_t cval, now;
-	u64 ns;
 
 	BUG_ON(timer_is_armed(timer));
 
-	if (kvm_timer_should_fire(vcpu)) {
-		/*
-		 * Timer has already expired while we were not
-		 * looking. Inject the interrupt and carry on.
-		 */
+	if (kvm_timer_should_fire(vcpu))
 		kvm_timer_inject_irq(vcpu);
-		return;
-	}
-
-	cval = timer->cntv_cval;
-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
-	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
-				 &timecounter->frac);
-	timer_arm(timer, ns);
 }
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
-- 
cgit v1.2.3


From 9103617df202d74e5c65f8af84a9aa727f812a06 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 25 Aug 2015 22:50:57 +0200
Subject: arm/arm64: KVM: vgic: Factor out level irq processing on guest exit

Currently vgic_process_maintenance() processes dealing with a completed
level-triggered interrupt directly, but we are soon going to reuse this
logic for level-triggered mapped interrupts with the HW bit set, so
move this logic into a separate static function.

Probably the most scary part of this commit is convincing yourself that
the current flow is safe compared to the old one.  In the following I
try to list the changes and why they are harmless:

  Move vgic_irq_clear_queued after kvm_notify_acked_irq:
    Harmless because the only potential effect of clearing the queued
    flag wrt.  kvm_set_irq is that vgic_update_irq_pending does not set
    the pending bit on the emulated CPU interface or in the
    pending_on_cpu bitmask if the function is called with level=1.
    However, the point of kvm_notify_acked_irq is to call kvm_set_irq
    with level=0, and we set the queued flag again in
    __kvm_vgic_sync_hwstate later on if the level is stil high.

  Move vgic_set_lr before kvm_notify_acked_irq:
    Also, harmless because the LR are cpu-local operations and
    kvm_notify_acked only affects the dist

  Move vgic_dist_irq_clear_soft_pend after kvm_notify_acked_irq:
    Also harmless, because now we check the level state in the
    clear_soft_pend function and lower the pending bits if the level is
    low.

Reviewed-by: Eric Auger <eric.auger@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 94 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 56 insertions(+), 38 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 66c66165e712..367a180fb5ac 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -107,6 +107,7 @@ static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
 						int virt_irq);
+static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
 
 static const struct vgic_ops *vgic_ops;
 static const struct vgic_params *vgic;
@@ -357,6 +358,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq)
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 
 	vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0);
+	if (!vgic_dist_irq_get_level(vcpu, irq)) {
+		vgic_dist_irq_clear_pending(vcpu, irq);
+		if (!compute_pending_for_cpu(vcpu))
+			clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
+	}
 }
 
 static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
@@ -1338,12 +1344,56 @@ epilog:
 	}
 }
 
+static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
+{
+	int level_pending = 0;
+
+	vlr.state = 0;
+	vlr.hwirq = 0;
+	vgic_set_lr(vcpu, lr, vlr);
+
+	/*
+	 * If the IRQ was EOIed (called from vgic_process_maintenance) or it
+	 * went from active to non-active (called from vgic_sync_hwirq) it was
+	 * also ACKed and we we therefore assume we can clear the soft pending
+	 * state (should it had been set) for this interrupt.
+	 *
+	 * Note: if the IRQ soft pending state was set after the IRQ was
+	 * acked, it actually shouldn't be cleared, but we have no way of
+	 * knowing that unless we start trapping ACKs when the soft-pending
+	 * state is set.
+	 */
+	vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
+	/*
+	 * Tell the gic to start sampling the line of this interrupt again.
+	 */
+	vgic_irq_clear_queued(vcpu, vlr.irq);
+
+	/* Any additional pending interrupt? */
+	if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+		vgic_cpu_irq_set(vcpu, vlr.irq);
+		level_pending = 1;
+	} else {
+		vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+		vgic_cpu_irq_clear(vcpu, vlr.irq);
+	}
+
+	/*
+	 * Despite being EOIed, the LR may not have
+	 * been marked as empty.
+	 */
+	vgic_sync_lr_elrsr(vcpu, lr, vlr);
+
+	return level_pending;
+}
+
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
 	u32 status = vgic_get_interrupt_status(vcpu);
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	bool level_pending = false;
 	struct kvm *kvm = vcpu->kvm;
+	int level_pending = 0;
 
 	kvm_debug("STATUS = %08x\n", status);
 
@@ -1358,54 +1408,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 
 		for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) {
 			struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
-			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
-			spin_lock(&dist->lock);
-			vgic_irq_clear_queued(vcpu, vlr.irq);
+			WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 			WARN_ON(vlr.state & LR_STATE_MASK);
-			vlr.state = 0;
-			vgic_set_lr(vcpu, lr, vlr);
 
-			/*
-			 * If the IRQ was EOIed it was also ACKed and we we
-			 * therefore assume we can clear the soft pending
-			 * state (should it had been set) for this interrupt.
-			 *
-			 * Note: if the IRQ soft pending state was set after
-			 * the IRQ was acked, it actually shouldn't be
-			 * cleared, but we have no way of knowing that unless
-			 * we start trapping ACKs when the soft-pending state
-			 * is set.
-			 */
-			vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
 
 			/*
 			 * kvm_notify_acked_irq calls kvm_set_irq()
-			 * to reset the IRQ level. Need to release the
-			 * lock for kvm_set_irq to grab it.
+			 * to reset the IRQ level, which grabs the dist->lock
+			 * so we call this before taking the dist->lock.
 			 */
-			spin_unlock(&dist->lock);
-
 			kvm_notify_acked_irq(kvm, 0,
 					     vlr.irq - VGIC_NR_PRIVATE_IRQS);
-			spin_lock(&dist->lock);
-
-			/* Any additional pending interrupt? */
-			if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
-				vgic_cpu_irq_set(vcpu, vlr.irq);
-				level_pending = true;
-			} else {
-				vgic_dist_irq_clear_pending(vcpu, vlr.irq);
-				vgic_cpu_irq_clear(vcpu, vlr.irq);
-			}
 
+			spin_lock(&dist->lock);
+			level_pending |= process_level_irq(vcpu, lr, vlr);
 			spin_unlock(&dist->lock);
-
-			/*
-			 * Despite being EOIed, the LR may not have
-			 * been marked as empty.
-			 */
-			vgic_sync_lr_elrsr(vcpu, lr, vlr);
 		}
 	}
 
-- 
cgit v1.2.3


From 8bf9a701e103fd17dbdf0355e43ff5200b4823aa Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sun, 30 Aug 2015 14:42:16 +0200
Subject: arm/arm64: KVM: Implement GICD_ICFGR as RO for PPIs

The GICD_ICFGR allows the bits for the SGIs and PPIs to be read only.
We currently simulate this behavior by writing a hardcoded value to the
register for the SGIs and PPIs on every write of these bits to the
register (ignoring what the guest actually wrote), and by writing the
same value as the reset value to the register.

This is a bit counter-intuitive, as the register is RO for these bits,
and we can just implement it that way, allowing us to control the value
of the bits purely in the reset code.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 367a180fb5ac..f8ca2e9d2f0b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -690,10 +690,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 	vgic_reg_access(mmio, &val, offset,
 			ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
 	if (mmio->is_write) {
-		if (offset < 8) {
-			*reg = ~0U; /* Force PPIs/SGIs to 1 */
+		/* Ignore writes to read-only SGI and PPI bits */
+		if (offset < 8)
 			return false;
-		}
 
 		val = vgic_cfg_compress(val);
 		if (offset & 4) {
-- 
cgit v1.2.3


From 54723bb37feac347a169359536f3dff122cabca3 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sun, 30 Aug 2015 14:45:20 +0200
Subject: arm/arm64: KVM: Use appropriate define in VGIC reset code

We currently initialize the SGIs to be enabled in the VGIC code, but we
use the VGIC_NR_PPIS define for this purpose, instead of the the more
natural VGIC_NR_SGIS.  Change this slightly confusing use of the
defines.

Note: This should have no functional change, as both names are defined
to the number 16.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index f8ca2e9d2f0b..a44ecf9eca4e 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2128,8 +2128,12 @@ int vgic_init(struct kvm *kvm)
 			break;
 		}
 
-		for (i = 0; i < dist->nr_irqs; i++) {
-			if (i < VGIC_NR_PPIS)
+		/*
+		 * Enable all SGIs and configure all private IRQs as
+		 * edge-triggered.
+		 */
+		for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+			if (i < VGIC_NR_SGIS)
 				vgic_bitmap_set_irq_val(&dist->irq_enabled,
 							vcpu->vcpu_id, i, 1);
 			if (i < VGIC_NR_PRIVATE_IRQS)
-- 
cgit v1.2.3


From 4b4b4512da2a844b8da2585609b67fae1ce4f4db Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sun, 30 Aug 2015 15:01:27 +0200
Subject: arm/arm64: KVM: Rework the arch timer to use level-triggered
 semantics

The arch timer currently uses edge-triggered semantics in the sense that
the line is never sampled by the vgic and lowering the line from the
timer to the vgic doesn't have any effect on the pending state of
virtual interrupts in the vgic.  This means that we do not support a
guest with the otherwise valid behavior of (1) disable interrupts (2)
enable the timer (3) disable the timer (4) enable interrupts.  Such a
guest would validly not expect to see any interrupts on real hardware,
but will see interrupts on KVM.

This patch fixes this shortcoming through the following series of
changes.

First, we change the flow of the timer/vgic sync/flush operations.  Now
the timer is always flushed/synced before the vgic, because the vgic
samples the state of the timer output.  This has the implication that we
move the timer operations in to non-preempible sections, but that is
fine after the previous commit getting rid of hrtimer schedules on every
entry/exit.

Second, we change the internal behavior of the timer, letting the timer
keep track of its previous output state, and only lower/raise the line
to the vgic when the state changes.  Note that in theory this could have
been accomplished more simply by signalling the vgic every time the
state *potentially* changed, but we don't want to be hitting the vgic
more often than necessary.

Third, we get rid of the use of the map->active field in the vgic and
instead simply set the interrupt as active on the physical distributor
whenever the input to the GIC is asserted and conversely clear the
physical active state when the input to the GIC is deasserted.

Fourth, and finally, we now initialize the timer PPIs (and all the other
unused PPIs for now), to be level-triggered, and modify the sync code to
sample the line state on HW sync and re-inject a new interrupt if it is
still pending at that time.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c |  81 ++++++++++++++++++++++++------------
 virt/kvm/arm/vgic.c       | 102 ++++++++++++----------------------------------
 2 files changed, 82 insertions(+), 101 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 32095fbb5d7c..523816d8c402 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -59,18 +59,6 @@ static void timer_disarm(struct arch_timer_cpu *timer)
 	}
 }
 
-static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
-{
-	int ret;
-	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-
-	kvm_vgic_set_phys_irq_active(timer->map, true);
-	ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
-					 timer->map,
-					 timer->irq->level);
-	WARN_ON(ret);
-}
-
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
 {
 	struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
@@ -116,8 +104,7 @@ static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu)
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
 	return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) &&
-		(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) &&
-		!kvm_vgic_get_phys_irq_active(timer->map);
+		(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE);
 }
 
 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
@@ -134,6 +121,41 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 	return cval <= now;
 }
 
+static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
+{
+	int ret;
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	BUG_ON(!vgic_initialized(vcpu->kvm));
+
+	timer->irq.level = new_level;
+	ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
+					 timer->map,
+					 timer->irq.level);
+	WARN_ON(ret);
+}
+
+/*
+ * Check if there was a change in the timer state (should we raise or lower
+ * the line level to the GIC).
+ */
+static void kvm_timer_update_state(struct kvm_vcpu *vcpu)
+{
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+	/*
+	 * If userspace modified the timer registers via SET_ONE_REG before
+	 * the vgic was initialized, we mustn't set the timer->irq.level value
+	 * because the guest would never see the interrupt.  Instead wait
+	 * until we call this function from kvm_timer_flush_hwstate.
+	 */
+	if (!vgic_initialized(vcpu->kvm))
+	    return;
+
+	if (kvm_timer_should_fire(vcpu) != timer->irq.level)
+		kvm_timer_update_irq(vcpu, !timer->irq.level);
+}
+
 /*
  * Schedule the background timer before calling kvm_vcpu_block, so that this
  * thread is removed from its waitqueue and made runnable when there's a timer
@@ -192,17 +214,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
 	bool phys_active;
 	int ret;
 
-	if (kvm_timer_should_fire(vcpu))
-		kvm_timer_inject_irq(vcpu);
+	kvm_timer_update_state(vcpu);
 
 	/*
-	 * We keep track of whether the edge-triggered interrupt has been
-	 * signalled to the vgic/guest, and if so, we mask the interrupt and
-	 * the physical distributor to prevent the timer from raising a
-	 * physical interrupt whenever we run a guest, preventing forward
-	 * VCPU progress.
+	 * If we enter the guest with the virtual input level to the VGIC
+	 * asserted, then we have already told the VGIC what we need to, and
+	 * we don't need to exit from the guest until the guest deactivates
+	 * the already injected interrupt, so therefore we should set the
+	 * hardware active state to prevent unnecessary exits from the guest.
+	 *
+	 * Conversely, if the virtual input level is deasserted, then always
+	 * clear the hardware active state to ensure that hardware interrupts
+	 * from the timer triggers a guest exit.
 	 */
-	if (kvm_vgic_get_phys_irq_active(timer->map))
+	if (timer->irq.level)
 		phys_active = true;
 	else
 		phys_active = false;
@@ -226,8 +251,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 
 	BUG_ON(timer_is_armed(timer));
 
-	if (kvm_timer_should_fire(vcpu))
-		kvm_timer_inject_irq(vcpu);
+	/*
+	 * The guest could have modified the timer registers or the timer
+	 * could have expired, update the timer state.
+	 */
+	kvm_timer_update_state(vcpu);
 }
 
 int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
@@ -242,7 +270,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 	 * kvm_vcpu_set_target(). To handle this, we determine
 	 * vcpu timer irq number when the vcpu is reset.
 	 */
-	timer->irq = irq;
+	timer->irq.irq = irq->irq;
 
 	/*
 	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
@@ -251,6 +279,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 	 * the ARMv7 architecture.
 	 */
 	timer->cntv_ctl = 0;
+	kvm_timer_update_state(vcpu);
 
 	/*
 	 * Tell the VGIC that the virtual interrupt is tied to a
@@ -295,6 +324,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 	default:
 		return -1;
 	}
+
+	kvm_timer_update_state(vcpu);
 	return 0;
 }
 
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index a44ecf9eca4e..3c2909c1bda3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -537,34 +537,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm,
 	return false;
 }
 
-/*
- * If a mapped interrupt's state has been modified by the guest such that it
- * is no longer active or pending, without it have gone through the sync path,
- * then the map->active field must be cleared so the interrupt can be taken
- * again.
- */
-static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct list_head *root;
-	struct irq_phys_map_entry *entry;
-	struct irq_phys_map *map;
-
-	rcu_read_lock();
-
-	/* Check for PPIs */
-	root = &vgic_cpu->irq_phys_map_list;
-	list_for_each_entry_rcu(entry, root, entry) {
-		map = &entry->map;
-
-		if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
-		    !vgic_irq_is_active(vcpu, map->virt_irq))
-			map->active = false;
-	}
-
-	rcu_read_unlock();
-}
-
 bool vgic_handle_clear_pending_reg(struct kvm *kvm,
 				   struct kvm_exit_mmio *mmio,
 				   phys_addr_t offset, int vcpu_id)
@@ -595,7 +567,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
 					  vcpu_id, offset);
 		vgic_reg_access(mmio, reg, offset, mode);
 
-		vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
 		vgic_update_state(kvm);
 		return true;
 	}
@@ -633,7 +604,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm,
 			ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
 
 	if (mmio->is_write) {
-		vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
 		vgic_update_state(kvm);
 		return true;
 	}
@@ -1443,29 +1413,37 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 /*
  * Save the physical active state, and reset it to inactive.
  *
- * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
+ * Return true if there's a pending level triggered interrupt line to queue.
  */
-static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
+static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
 {
+	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	struct irq_phys_map *map;
+	bool phys_active;
+	bool level_pending;
 	int ret;
 
 	if (!(vlr.state & LR_HW))
-		return 0;
+		return false;
 
 	map = vgic_irq_map_search(vcpu, vlr.irq);
 	BUG_ON(!map);
 
 	ret = irq_get_irqchip_state(map->irq,
 				    IRQCHIP_STATE_ACTIVE,
-				    &map->active);
+				    &phys_active);
 
 	WARN_ON(ret);
 
-	if (map->active)
+	if (phys_active)
 		return 0;
 
-	return 1;
+	/* Mapped edge-triggered interrupts not yet supported. */
+	WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
+	spin_lock(&dist->lock);
+	level_pending = process_level_irq(vcpu, lr, vlr);
+	spin_unlock(&dist->lock);
+	return level_pending;
 }
 
 /* Sync back the VGIC state after a guest run */
@@ -1490,18 +1468,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 			continue;
 
 		vlr = vgic_get_lr(vcpu, lr);
-		if (vgic_sync_hwirq(vcpu, vlr)) {
-			/*
-			 * So this is a HW interrupt that the guest
-			 * EOI-ed. Clean the LR state and allow the
-			 * interrupt to be sampled again.
-			 */
-			vlr.state = 0;
-			vlr.hwirq = 0;
-			vgic_set_lr(vcpu, lr, vlr);
-			vgic_irq_clear_queued(vcpu, vlr.irq);
-			set_bit(lr, elrsr_ptr);
-		}
+		if (vgic_sync_hwirq(vcpu, lr, vlr))
+			level_pending = true;
 
 		if (!test_bit(lr, elrsr_ptr))
 			continue;
@@ -1880,30 +1848,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
 	kfree(entry);
 }
 
-/**
- * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
- *
- * Return the logical active state of a mapped interrupt. This doesn't
- * necessarily reflects the current HW state.
- */
-bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
-{
-	BUG_ON(!map);
-	return map->active;
-}
-
-/**
- * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
- *
- * Set the logical active state of a mapped interrupt. This doesn't
- * immediately affects the HW state.
- */
-void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
-{
-	BUG_ON(!map);
-	map->active = active;
-}
-
 /**
  * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
  * @vcpu: The VCPU pointer
@@ -2129,17 +2073,23 @@ int vgic_init(struct kvm *kvm)
 		}
 
 		/*
-		 * Enable all SGIs and configure all private IRQs as
-		 * edge-triggered.
+		 * Enable and configure all SGIs to be edge-triggere and
+		 * configure all PPIs as level-triggered.
 		 */
 		for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
-			if (i < VGIC_NR_SGIS)
+			if (i < VGIC_NR_SGIS) {
+				/* SGIs */
 				vgic_bitmap_set_irq_val(&dist->irq_enabled,
 							vcpu->vcpu_id, i, 1);
-			if (i < VGIC_NR_PRIVATE_IRQS)
 				vgic_bitmap_set_irq_val(&dist->irq_cfg,
 							vcpu->vcpu_id, i,
 							VGIC_CFG_EDGE);
+			} else if (i < VGIC_NR_PRIVATE_IRQS) {
+				/* PPIs */
+				vgic_bitmap_set_irq_val(&dist->irq_cfg,
+							vcpu->vcpu_id, i,
+							VGIC_CFG_LEVEL);
+			}
 		}
 
 		vgic_enable(vcpu);
-- 
cgit v1.2.3


From 8fe2f19e6e6015911bdd4cfcdb23a32e146ba570 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 4 Sep 2015 21:25:12 +0200
Subject: arm/arm64: KVM: Support edge-triggered forwarded interrupts

We mark edge-triggered interrupts with the HW bit set as queued to
prevent the VGIC code from injecting LRs with both the Active and
Pending bits set at the same time while also setting the HW bit,
because the hardware does not support this.

However, this means that we must also clear the queued flag when we sync
back a LR where the state on the physical distributor went from active
to inactive because the guest deactivated the interrupt.  At this point
we must also check if the interrupt is pending on the distributor, and
tell the VGIC to queue it again if it is.

Since these actions on the sync path are extremely close to those for
level-triggered interrupts, rename process_level_irq to
process_queued_irq, allowing it to cater for both cases.

Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 3c2909c1bda3..84abc6f38c1d 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1313,13 +1313,10 @@ epilog:
 	}
 }
 
-static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
+static int process_queued_irq(struct kvm_vcpu *vcpu,
+				   int lr, struct vgic_lr vlr)
 {
-	int level_pending = 0;
-
-	vlr.state = 0;
-	vlr.hwirq = 0;
-	vgic_set_lr(vcpu, lr, vlr);
+	int pending = 0;
 
 	/*
 	 * If the IRQ was EOIed (called from vgic_process_maintenance) or it
@@ -1335,26 +1332,35 @@ static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
 	vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
 
 	/*
-	 * Tell the gic to start sampling the line of this interrupt again.
+	 * Tell the gic to start sampling this interrupt again.
 	 */
 	vgic_irq_clear_queued(vcpu, vlr.irq);
 
 	/* Any additional pending interrupt? */
-	if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
-		vgic_cpu_irq_set(vcpu, vlr.irq);
-		level_pending = 1;
+	if (vgic_irq_is_edge(vcpu, vlr.irq)) {
+		BUG_ON(!(vlr.state & LR_HW));
+		pending = vgic_dist_irq_is_pending(vcpu, vlr.irq);
 	} else {
-		vgic_dist_irq_clear_pending(vcpu, vlr.irq);
-		vgic_cpu_irq_clear(vcpu, vlr.irq);
+		if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+			vgic_cpu_irq_set(vcpu, vlr.irq);
+			pending = 1;
+		} else {
+			vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+			vgic_cpu_irq_clear(vcpu, vlr.irq);
+		}
 	}
 
 	/*
 	 * Despite being EOIed, the LR may not have
 	 * been marked as empty.
 	 */
+	vlr.state = 0;
+	vlr.hwirq = 0;
+	vgic_set_lr(vcpu, lr, vlr);
+
 	vgic_sync_lr_elrsr(vcpu, lr, vlr);
 
-	return level_pending;
+	return pending;
 }
 
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
@@ -1391,7 +1397,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 					     vlr.irq - VGIC_NR_PRIVATE_IRQS);
 
 			spin_lock(&dist->lock);
-			level_pending |= process_level_irq(vcpu, lr, vlr);
+			level_pending |= process_queued_irq(vcpu, lr, vlr);
 			spin_unlock(&dist->lock);
 		}
 	}
@@ -1413,7 +1419,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 /*
  * Save the physical active state, and reset it to inactive.
  *
- * Return true if there's a pending level triggered interrupt line to queue.
+ * Return true if there's a pending forwarded interrupt to queue.
  */
 static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
 {
@@ -1438,10 +1444,8 @@ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
 	if (phys_active)
 		return 0;
 
-	/* Mapped edge-triggered interrupts not yet supported. */
-	WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 	spin_lock(&dist->lock);
-	level_pending = process_level_irq(vcpu, lr, vlr);
+	level_pending = process_queued_irq(vcpu, lr, vlr);
 	spin_unlock(&dist->lock);
 	return level_pending;
 }
-- 
cgit v1.2.3


From e21f09108754dfdfbb30e547f4edbd3b6884eedb Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sun, 30 Aug 2015 13:57:20 +0200
Subject: arm/arm64: KVM: Add tracepoints for vgic and timer

The VGIC and timer code for KVM arm/arm64 doesn't have any tracepoints
or tracepoint infrastructure defined.  Rewriting some of the timer code
handling showed me how much we need this, so let's add these simple
trace points once and for all and we can easily expand with additional
trace points in these files as we go along.

Cc: Wei Huang <wei@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c |  4 +++
 virt/kvm/arm/trace.h      | 63 +++++++++++++++++++++++++++++++++++++++++++++++
 virt/kvm/arm/vgic.c       |  5 ++++
 3 files changed, 72 insertions(+)
 create mode 100644 virt/kvm/arm/trace.h

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 523816d8c402..21a0ab2d8919 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -28,6 +28,8 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
+#include "trace.h"
+
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
 static unsigned int host_vtimer_irq;
@@ -129,6 +131,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
 	BUG_ON(!vgic_initialized(vcpu->kvm));
 
 	timer->irq.level = new_level;
+	trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
+				   timer->irq.level);
 	ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
 					 timer->map,
 					 timer->irq.level);
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
new file mode 100644
index 000000000000..37d8b98867d5
--- /dev/null
+++ b/virt/kvm/arm/trace.h
@@ -0,0 +1,63 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoints for vgic
+ */
+TRACE_EVENT(vgic_update_irq_pending,
+	TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
+	TP_ARGS(vcpu_id, irq, level),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_id	)
+		__field(	__u32,		irq	)
+		__field(	bool,		level	)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+		__entry->irq		= irq;
+		__entry->level		= level;
+	),
+
+	TP_printk("VCPU: %ld, IRQ %d, level: %d",
+		  __entry->vcpu_id, __entry->irq, __entry->level)
+);
+
+/*
+ * Tracepoints for arch_timer
+ */
+TRACE_EVENT(kvm_timer_update_irq,
+	TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
+	TP_ARGS(vcpu_id, irq, level),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	vcpu_id	)
+		__field(	__u32,		irq	)
+		__field(	int,		level	)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu_id;
+		__entry->irq		= irq;
+		__entry->level		= level;
+	),
+
+	TP_printk("VCPU: %ld, IRQ %d, level %d",
+		  __entry->vcpu_id, __entry->irq, __entry->level)
+);
+
+#endif /* _TRACE_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 84abc6f38c1d..d4669eb29b77 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -34,6 +34,9 @@
 #include <asm/kvm.h>
 #include <kvm/iodev.h>
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
  *
@@ -1574,6 +1577,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 	int enabled;
 	bool ret = true, can_inject = true;
 
+	trace_vgic_update_irq_pending(cpuid, irq_num, level);
+
 	if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
 		return -EINVAL;
 
-- 
cgit v1.2.3


From c4cd4c168b81dad53e659d18cdae653bc0ec2384 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Tue, 27 Oct 2015 11:37:29 +0300
Subject: KVM: arm/arm64: Optimize away redundant LR tracking

Currently we use vgic_irq_lr_map in order to track which LRs hold which
IRQs, and lr_used bitmap in order to track which LRs are used or free.

vgic_irq_lr_map is actually used only for piggy-back optimization, and
can be easily replaced by iteration over lr_used. This is good because in
future, when LPI support is introduced, number of IRQs will grow up to at
least 16384, while numbers from 1024 to 8192 are never going to be used.
This would be a huge memory waste.

In its turn, lr_used is also completely redundant since
ae705930fca6322600690df9dc1c7d0516145a93 ("arm/arm64: KVM: Keep elrsr/aisr
in sync with software model"), because together with lr_used we also update
elrsr. This allows to easily replace lr_used with elrsr, inverting all
conditions (because in elrsr '1' means 'free').

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic-v2.c |  1 +
 virt/kvm/arm/vgic-v3.c |  1 +
 virt/kvm/arm/vgic.c    | 53 ++++++++++++++------------------------------------
 3 files changed, 17 insertions(+), 38 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 8d7b04db8471..c0f5d7fad9ea 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -158,6 +158,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
 	 * anyway.
 	 */
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+	vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
 
 	/* Get the show on the road... */
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 7dd5d62f10a1..92003cb61a0a 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -193,6 +193,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 	 * anyway.
 	 */
 	vgic_v3->vgic_vmcr = 0;
+	vgic_v3->vgic_elrsr = ~0;
 
 	/*
 	 * If we are emulating a GICv3, we do it in an non-GICv2-compatible
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index d4669eb29b77..265a41035728 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -108,6 +108,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
 static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
 						int virt_irq);
 static int compute_pending_for_cpu(struct kvm_vcpu *vcpu);
@@ -691,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
 void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	u64 elrsr = vgic_get_elrsr(vcpu);
+	unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
 	int i;
 
-	for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+	for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
 		struct vgic_lr lr = vgic_get_lr(vcpu, i);
 
 		/*
@@ -1098,7 +1101,6 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
 
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
 
 	/*
@@ -1112,8 +1114,6 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
 
 	vlr.state = 0;
 	vgic_set_lr(vcpu, lr_nr, vlr);
-	clear_bit(lr_nr, vgic_cpu->lr_used);
-	vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
 	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
@@ -1128,10 +1128,11 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
  */
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+	u64 elrsr = vgic_get_elrsr(vcpu);
+	unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
 	int lr;
 
-	for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) {
+	for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
 		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
 		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
@@ -1188,8 +1189,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
  */
 bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+	u64 elrsr = vgic_get_elrsr(vcpu);
+	unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
 	struct vgic_lr vlr;
 	int lr;
 
@@ -1200,28 +1202,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq)
 
 	kvm_debug("Queue IRQ%d\n", irq);
 
-	lr = vgic_cpu->vgic_irq_lr_map[irq];
-
 	/* Do we have an active interrupt for the same CPUID? */
-	if (lr != LR_EMPTY) {
+	for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
 		vlr = vgic_get_lr(vcpu, lr);
-		if (vlr.source == sgi_source_id) {
+		if (vlr.irq == irq && vlr.source == sgi_source_id) {
 			kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
-			BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
 			vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
 			return true;
 		}
 	}
 
 	/* Try to use another LR for this interrupt */
-	lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used,
-			       vgic->nr_lr);
+	lr = find_first_bit(elrsr_ptr, vgic->nr_lr);
 	if (lr >= vgic->nr_lr)
 		return false;
 
 	kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id);
-	vgic_cpu->vgic_irq_lr_map[irq] = lr;
-	set_bit(lr, vgic_cpu->lr_used);
 
 	vlr.irq = irq;
 	vlr.source = sgi_source_id;
@@ -1456,7 +1452,6 @@ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
 /* Sync back the VGIC state after a guest run */
 static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	u64 elrsr;
 	unsigned long *elrsr_ptr;
@@ -1469,22 +1464,10 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 
 	/* Deal with HW interrupts, and clear mappings for empty LRs */
 	for (lr = 0; lr < vgic->nr_lr; lr++) {
-		struct vgic_lr vlr;
-
-		if (!test_bit(lr, vgic_cpu->lr_used))
-			continue;
-
-		vlr = vgic_get_lr(vcpu, lr);
-		if (vgic_sync_hwirq(vcpu, lr, vlr))
-			level_pending = true;
-
-		if (!test_bit(lr, elrsr_ptr))
-			continue;
-
-		clear_bit(lr, vgic_cpu->lr_used);
+		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
+		level_pending |= vgic_sync_hwirq(vcpu, lr, vlr);
 		BUG_ON(vlr.irq >= dist->nr_irqs);
-		vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
 	}
 
 	/* Check if we still have something up our sleeve... */
@@ -1912,12 +1895,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
 	kfree(vgic_cpu->pending_shared);
 	kfree(vgic_cpu->active_shared);
 	kfree(vgic_cpu->pend_act_shared);
-	kfree(vgic_cpu->vgic_irq_lr_map);
 	vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
 	vgic_cpu->pending_shared = NULL;
 	vgic_cpu->active_shared = NULL;
 	vgic_cpu->pend_act_shared = NULL;
-	vgic_cpu->vgic_irq_lr_map = NULL;
 }
 
 static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
@@ -1928,18 +1909,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
 	vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL);
 	vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL);
-	vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
 	if (!vgic_cpu->pending_shared
 		|| !vgic_cpu->active_shared
-		|| !vgic_cpu->pend_act_shared
-		|| !vgic_cpu->vgic_irq_lr_map) {
+		|| !vgic_cpu->pend_act_shared) {
 		kvm_vgic_vcpu_destroy(vcpu);
 		return -ENOMEM;
 	}
 
-	memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
-
 	/*
 	 * Store the number of LRs per vcpu, so we don't have to go
 	 * all the way to the distributor structure to find out. Only
-- 
cgit v1.2.3


From 212c76545dde8370ebde2a170e4f8e1ed8441dc0 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Tue, 27 Oct 2015 11:37:30 +0300
Subject: KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings

1. Remove unnecessary 'irq' argument, because irq number can be retrieved
   from the LR.
2. Since cff9211eb1a1f58ce7f5a2d596b617928fd4be0e
   ("arm/arm64: KVM: Fix arch timer behavior for disabled interrupts ")
   LR_STATE_PENDING is queued back by vgic_retire_lr() itself. Also, it
   clears vlr.state itself. Therefore, we remove the same, now duplicated,
   check with all accompanying bit manipulations from vgic_unqueue_irqs().
3. vgic_retire_lr() is always accompanied by vgic_irq_clear_queued(). Since
   it already does more than just clearing the LR, move
   vgic_irq_clear_queued() inside of it.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic.c | 37 ++++++++++---------------------------
 1 file changed, 10 insertions(+), 27 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 265a41035728..96e45f3da534 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -105,7 +105,7 @@
 #include "vgic.h"
 
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu);
 static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
 static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
 static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu);
@@ -717,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
 		 * interrupt then move the active state to the
 		 * distributor tracking bit.
 		 */
-		if (lr.state & LR_STATE_ACTIVE) {
+		if (lr.state & LR_STATE_ACTIVE)
 			vgic_irq_set_active(vcpu, lr.irq);
-			lr.state &= ~LR_STATE_ACTIVE;
-		}
 
 		/*
 		 * Reestablish the pending state on the distributor and the
-		 * CPU interface.  It may have already been pending, but that
-		 * is fine, then we are only setting a few bits that were
-		 * already set.
+		 * CPU interface and mark the LR as free for other use.
 		 */
-		if (lr.state & LR_STATE_PENDING) {
-			vgic_dist_irq_set_pending(vcpu, lr.irq);
-			lr.state &= ~LR_STATE_PENDING;
-		}
-
-		vgic_set_lr(vcpu, i, lr);
-
-		/*
-		 * Mark the LR as free for other use.
-		 */
-		BUG_ON(lr.state & LR_STATE_MASK);
-		vgic_retire_lr(i, lr.irq, vcpu);
-		vgic_irq_clear_queued(vcpu, lr.irq);
+		vgic_retire_lr(i, vcpu);
 
 		/* Finally update the VGIC state. */
 		vgic_update_state(vcpu->kvm);
@@ -1099,16 +1083,18 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu)
 	vgic_ops->enable(vcpu);
 }
 
-static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
+static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
 {
 	struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
 
+	vgic_irq_clear_queued(vcpu, vlr.irq);
+
 	/*
 	 * We must transfer the pending state back to the distributor before
 	 * retiring the LR, otherwise we may loose edge-triggered interrupts.
 	 */
 	if (vlr.state & LR_STATE_PENDING) {
-		vgic_dist_irq_set_pending(vcpu, irq);
+		vgic_dist_irq_set_pending(vcpu, vlr.irq);
 		vlr.hwirq = 0;
 	}
 
@@ -1135,11 +1121,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
 	for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) {
 		struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
 
-		if (!vgic_irq_is_enabled(vcpu, vlr.irq)) {
-			vgic_retire_lr(lr, vlr.irq, vcpu);
-			if (vgic_irq_is_queued(vcpu, vlr.irq))
-				vgic_irq_clear_queued(vcpu, vlr.irq);
-		}
+		if (!vgic_irq_is_enabled(vcpu, vlr.irq))
+			vgic_retire_lr(lr, vcpu);
 	}
 }
 
-- 
cgit v1.2.3


From 26caea7693cb99833fe4ecc544c842289d6b3f69 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Tue, 27 Oct 2015 11:37:31 +0300
Subject: KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr()

Now we see that vgic_set_lr() and vgic_sync_lr_elrsr() are always used
together. Merge them into one function, saving from second vgic_ops
dereferencing every time.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/vgic-v2.c |  5 -----
 virt/kvm/arm/vgic-v3.c |  5 -----
 virt/kvm/arm/vgic.c    | 14 ++------------
 3 files changed, 2 insertions(+), 22 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index c0f5d7fad9ea..ff02f08df74d 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
 		lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
 
 	vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
-}
 
-static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
-				  struct vgic_lr lr_desc)
-{
 	if (!(lr_desc.state & LR_STATE_MASK))
 		vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr);
 	else
@@ -167,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu)
 static const struct vgic_ops vgic_v2_ops = {
 	.get_lr			= vgic_v2_get_lr,
 	.set_lr			= vgic_v2_set_lr,
-	.sync_lr_elrsr		= vgic_v2_sync_lr_elrsr,
 	.get_elrsr		= vgic_v2_get_elrsr,
 	.get_eisr		= vgic_v2_get_eisr,
 	.clear_eisr		= vgic_v2_clear_eisr,
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 92003cb61a0a..487d6357b7e7 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
 	}
 
 	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
-}
 
-static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
-				  struct vgic_lr lr_desc)
-{
 	if (!(lr_desc.state & LR_STATE_MASK))
 		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
 	else
@@ -212,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu)
 static const struct vgic_ops vgic_v3_ops = {
 	.get_lr			= vgic_v3_get_lr,
 	.set_lr			= vgic_v3_set_lr,
-	.sync_lr_elrsr		= vgic_v3_sync_lr_elrsr,
 	.get_elrsr		= vgic_v3_get_elrsr,
 	.get_eisr		= vgic_v3_get_eisr,
 	.clear_eisr		= vgic_v3_clear_eisr,
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 96e45f3da534..fe451d4885ae 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1032,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr,
 	vgic_ops->set_lr(vcpu, lr, vlr);
 }
 
-static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr,
-			       struct vgic_lr vlr)
-{
-	vgic_ops->sync_lr_elrsr(vcpu, lr, vlr);
-}
-
 static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu)
 {
 	return vgic_ops->get_elrsr(vcpu);
@@ -1100,7 +1094,6 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu)
 
 	vlr.state = 0;
 	vgic_set_lr(vcpu, lr_nr, vlr);
-	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
 /*
@@ -1162,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
 	}
 
 	vgic_set_lr(vcpu, lr_nr, vlr);
-	vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
 }
 
 /*
@@ -1340,8 +1332,6 @@ static int process_queued_irq(struct kvm_vcpu *vcpu,
 	vlr.hwirq = 0;
 	vgic_set_lr(vcpu, lr, vlr);
 
-	vgic_sync_lr_elrsr(vcpu, lr, vlr);
-
 	return pending;
 }
 
@@ -1442,8 +1432,6 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	bool level_pending;
 
 	level_pending = vgic_process_maintenance(vcpu);
-	elrsr = vgic_get_elrsr(vcpu);
-	elrsr_ptr = u64_to_bitmask(&elrsr);
 
 	/* Deal with HW interrupts, and clear mappings for empty LRs */
 	for (lr = 0; lr < vgic->nr_lr; lr++) {
@@ -1454,6 +1442,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 	}
 
 	/* Check if we still have something up our sleeve... */
+	elrsr = vgic_get_elrsr(vcpu);
+	elrsr_ptr = u64_to_bitmask(&elrsr);
 	pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr);
 	if (level_pending || pending < vgic->nr_lr)
 		set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu);
-- 
cgit v1.2.3


From 6956d8946d5d1cb2ac913caa8d4259a4d0e00c48 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 19 Oct 2015 04:37:18 -0600
Subject: KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs

The symbol was missing a KVM dependency.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 9f8014dda2cf..7a79b6853583 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -46,7 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 
 config KVM_COMPAT
        def_bool y
-       depends on COMPAT && !S390
+       depends on KVM && COMPAT && !S390
 
 config HAVE_KVM_IRQ_BYPASS
        bool
-- 
cgit v1.2.3


From b97e6de9c96cefaa02a6a7464731ea504b45e150 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 28 Oct 2015 19:16:47 +0100
Subject: KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We do not want to do too much work in atomic context, in particular
not walking all the VCPUs of the virtual machine.  So we want
to distinguish the architecture-specific injection function for irqfd
from kvm_set_msi.  Since it's still empty, reuse the newly added
kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/eventfd.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'virt/kvm')

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index e29fd2640709..46dbc0a7dfc1 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -171,7 +171,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
 	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
 }
 
-int __attribute__((weak)) kvm_arch_set_irq(
+int __attribute__((weak)) kvm_arch_set_irq_inatomic(
 				struct kvm_kernel_irq_routing_entry *irq,
 				struct kvm *kvm, int irq_source_id,
 				int level,
@@ -201,12 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
 			irq = irqfd->irq_entry;
 		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
 		/* An event has been signaled, inject an interrupt */
-		if (irq.type == KVM_IRQ_ROUTING_MSI)
-			kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
-					false);
-		else if (kvm_arch_set_irq(&irq, kvm,
-					  KVM_USERSPACE_IRQ_SOURCE_ID, 1,
-					  false) == -EWOULDBLOCK)
+		if (kvm_arch_set_irq_inatomic(&irq, kvm,
+					      KVM_USERSPACE_IRQ_SOURCE_ID, 1,
+					      false) == -EWOULDBLOCK)
 			schedule_work(&irqfd->inject);
 		srcu_read_unlock(&kvm->irq_srcu, idx);
 	}
-- 
cgit v1.2.3