From d9565a7399d665fa7313122504778cb3d5ef3e19 Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 22 Jul 2016 16:20:40 +0000
Subject: KVM: Move kvm_setup_default/empty_irq_routing declaration in arch
 specific header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

kvm_setup_default_irq_routing and kvm_setup_empty_irq_routing are
not used by generic code. So let's move the declarations in x86 irq.h
header instead of kvm_host.h.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Suggested-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/x86/kvm/irq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 61ebdc13a29a..035731eb3897 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
+
 #endif
-- 
cgit v1.2.3


From 180ae7b1182344ca617d8b5200306b02a6b5075d Mon Sep 17 00:00:00 2001
From: Eric Auger <eric.auger@redhat.com>
Date: Fri, 22 Jul 2016 16:20:41 +0000
Subject: KVM: arm/arm64: Enable irqchip routing

This patch adds compilation and link against irqchip.

Main motivation behind using irqchip code is to enable MSI
routing code. In the future irqchip routing may also be useful
when targeting multiple irqchips.

Routing standard callbacks now are implemented in vgic-irqfd:
- kvm_set_routing_entry
- kvm_set_irq
- kvm_set_msi

They only are supported with new_vgic code.

Both HAVE_KVM_IRQCHIP and HAVE_KVM_IRQ_ROUTING are defined.
KVM_CAP_IRQ_ROUTING is advertised and KVM_SET_GSI_ROUTING is allowed.

So from now on IRQCHIP routing is enabled and a routing table entry
must exist for irqfd injection to succeed for a given SPI. This patch
builds a default flat irqchip routing table (gsi=irqchip.pin) covering
all the VGIC SPI indexes. This routing table is overwritten by the
first first user-space call to KVM_SET_GSI_ROUTING ioctl.

MSI routing setup is not yet allowed.

Signed-off-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/Kconfig    |  2 ++
 arch/arm/kvm/Makefile   |  1 +
 arch/arm/kvm/irq.h      | 19 +++++++++++++++++++
 arch/arm64/kvm/Kconfig  |  2 ++
 arch/arm64/kvm/Makefile |  1 +
 arch/arm64/kvm/irq.h    | 19 +++++++++++++++++++
 6 files changed, 44 insertions(+)
 create mode 100644 arch/arm/kvm/irq.h
 create mode 100644 arch/arm64/kvm/irq.h

(limited to 'arch')

diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 95a000515e43..3e1cd0452d67 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -32,6 +32,8 @@ config KVM
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
 	select HAVE_KVM_IRQFD
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
 	---help---
 	  Support hosting virtualized guest machines.
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 5e28df80dca7..10d77a66cad5 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -29,4 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
+obj-y += $(KVM)/irqchip.o
 obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h
new file mode 100644
index 000000000000..b74099b905fd
--- /dev/null
+++ b/arch/arm/kvm/irq.h
@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 9d2eff0b3ad3..9c9edc98d271 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -37,6 +37,8 @@ config KVM
 	select KVM_ARM_VGIC_V3
 	select KVM_ARM_PMU if HW_PERF_EVENTS
 	select HAVE_KVM_MSI
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index a5b96642a9cb..695eb3c7ef41 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -30,5 +30,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h
new file mode 100644
index 000000000000..b74099b905fd
--- /dev/null
+++ b/arch/arm64/kvm/irq.h
@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif
-- 
cgit v1.2.3


From 89581f06b2bc225f0c9822fa52e714aa2e3810dd Mon Sep 17 00:00:00 2001
From: Andrew Jones <drjones@redhat.com>
Date: Fri, 22 Jul 2016 10:38:46 -0400
Subject: arm64: KVM: Set cpsr before spsr on fault injection

We need to set cpsr before determining the spsr bank, as the bank
depends on the target exception level of the injection, not the
current mode of the vcpu. Normally this is one in the same (EL1),
but not when we manage to trap an EL0 fault. It still doesn't really
matter for the 64-bit EL0 case though, as vcpu_spsr() unconditionally
uses the EL1 bank for that. However the 32-bit EL0 case gets fun, as
that path will lead to the BUG() in vcpu_spsr32().

This patch fixes the assignment order and also modifies some white
space in order to better group pairs of lines that have strict order.

Cc: stable@vger.kernel.org # v4.5
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/inject_fault.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index e9e0e6db73f6..898c0e6aedd4 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
-	bool is_aarch32;
+	bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
 	u32 esr = 0;
 
-	is_aarch32 = vcpu_mode_is_32bit(vcpu);
-
-	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_spsr(vcpu) = cpsr;
 
 	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 
@@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
-	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_spsr(vcpu) = cpsr;
 
 	/*
 	 * Build an unknown exception, depending on the instruction
-- 
cgit v1.2.3


From 3aed64f6d341cdb62bb2d6232589fb13448ce063 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 Jun 2016 13:06:08 +0200
Subject: pvclock: introduce seqcount-like API

The version field in struct pvclock_vcpu_time_info basically implements
a seqcount.  Wrap it with the usual read_begin and read_retry functions,
and use these APIs instead of peppering the code with smp_rmb()s.
While at it, change it to the more pedantically correct virt_rmb().

With this change, __pvclock_read_cycles can be simplified noticeably.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/entry/vdso/vclock_gettime.c |  9 ++-------
 arch/x86/include/asm/pvclock.h       | 39 +++++++++++++++++++++---------------
 arch/x86/kernel/pvclock.c            | 17 ++++++----------
 3 files changed, 31 insertions(+), 34 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 2f02d23a05ef..db1e3b4c3693 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -123,9 +123,7 @@ static notrace cycle_t vread_pvclock(int *mode)
 	 */
 
 	do {
-		version = pvti->version;
-
-		smp_rmb();
+		version = pvclock_read_begin(pvti);
 
 		if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
 			*mode = VCLOCK_NONE;
@@ -137,10 +135,7 @@ static notrace cycle_t vread_pvclock(int *mode)
 		pvti_tsc_shift = pvti->tsc_shift;
 		pvti_system_time = pvti->system_time;
 		pvti_tsc = pvti->tsc_timestamp;
-
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while (unlikely((version & 1) || version != pvti->version));
+	} while (pvclock_read_retry(pvti, version));
 
 	delta = tsc - pvti_tsc;
 	ret = pvti_system_time +
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7c1c89598688..d019f0cc80ec 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -25,6 +25,24 @@ void pvclock_resume(void);
 
 void pvclock_touch_watchdogs(void);
 
+static __always_inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
+{
+	unsigned version = src->version & ~1;
+	/* Make sure that the version is read before the data. */
+	virt_rmb();
+	return version;
+}
+
+static __always_inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+			unsigned version)
+{
+	/* Make sure that the version is re-read after the data. */
+	virt_rmb();
+	return unlikely(version != src->version);
+}
+
 /*
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
@@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 
 static __always_inline
-unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
-			       cycle_t *cycles, u8 *flags)
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
 {
-	unsigned version;
-	cycle_t offset;
-	u64 delta;
-
-	version = src->version;
-	/* Make the latest version visible */
-	smp_rmb();
-
-	delta = rdtsc_ordered() - src->tsc_timestamp;
-	offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
-				   src->tsc_shift);
-	*cycles = src->system_time + offset;
-	*flags = src->flags;
-	return version;
+	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+	cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+					     src->tsc_shift);
+	return src->system_time + offset;
 }
 
 struct pvclock_vsyscall_time_info {
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 06c58ce46762..3599404e3089 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 	u8 flags;
 
 	do {
-		version = src->version;
-		/* Make the latest version visible */
-		smp_rmb();
-
+		version = pvclock_read_begin(src);
 		flags = src->flags;
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while ((src->version & 1) || version != src->version);
+	} while (pvclock_read_retry(src, version));
 
 	return flags & valid_flags;
 }
@@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 	u8 flags;
 
 	do {
-		version = __pvclock_read_cycles(src, &ret, &flags);
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while ((src->version & 1) || version != src->version);
+		version = pvclock_read_begin(src);
+		ret = __pvclock_read_cycles(src);
+		flags = src->flags;
+	} while (pvclock_read_retry(src, version));
 
 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
 		src->flags &= ~PVCLOCK_GUEST_STOPPED;
-- 
cgit v1.2.3


From abe9efa79be02cf2ba27f643b214b07877bb050b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 9 Jun 2016 20:12:34 +0200
Subject: x86: vdso: use __pvclock_read_cycles

The new simplified __pvclock_read_cycles does the same computation
as vread_pvclock, except that (because it takes the pvclock_vcpu_time_info
pointer) it has to be moved inside the loop.  Since the loop is expected to
never roll, this makes no difference.

Acked-by: Andy Lutomirski <luto@kernel.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/entry/vdso/vclock_gettime.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index db1e3b4c3693..94d54d0defa7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode)
 {
 	const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
 	cycle_t ret;
-	u64 tsc, pvti_tsc;
-	u64 last, delta, pvti_system_time;
-	u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+	u64 last;
+	u32 version;
 
 	/*
 	 * Note: The kernel and hypervisor must guarantee that cpu ID
@@ -130,18 +129,9 @@ static notrace cycle_t vread_pvclock(int *mode)
 			return 0;
 		}
 
-		tsc = rdtsc_ordered();
-		pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
-		pvti_tsc_shift = pvti->tsc_shift;
-		pvti_system_time = pvti->system_time;
-		pvti_tsc = pvti->tsc_timestamp;
+		ret = __pvclock_read_cycles(pvti);
 	} while (pvclock_read_retry(pvti, version));
 
-	delta = tsc - pvti_tsc;
-	ret = pvti_system_time +
-		pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
-				    pvti_tsc_shift);
-
 	/* refer to vread_tsc() comment for rationale */
 	last = gtod->cycle_last;
 
-- 
cgit v1.2.3


From 910053002ef1fdc1d42b7015d5b2400172f43e42 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Wed, 3 Aug 2016 12:04:12 +0800
Subject: KVM: lapic: fix access preemption timer stuff even if
 kernel_irqchip=off
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BUG: unable to handle kernel NULL pointer dereference at 000000000000008c
IP: [<ffffffffc04e0180>] kvm_lapic_hv_timer_in_use+0x10/0x20 [kvm]
PGD 0
Oops: 0000 [#1] SMP
Call Trace:
 kvm_arch_vcpu_load+0x86/0x260 [kvm]
 vcpu_load+0x46/0x60 [kvm]
 kvm_vcpu_ioctl+0x79/0x7c0 [kvm]
 ? __lock_is_held+0x54/0x70
 do_vfs_ioctl+0x96/0x6a0
 ? __fget_light+0x2a/0x90
 SyS_ioctl+0x79/0x90
 do_syscall_64+0x7c/0x1e0
 entry_SYSCALL64_slow_path+0x25/0x25
RIP  [<ffffffffc04e0180>] kvm_lapic_hv_timer_in_use+0x10/0x20 [kvm]
 RSP <ffff8800db1f3d70>
CR2: 000000000000008c
---[ end trace a55fb79d2b3b4ee8 ]---

This can be reproduced steadily by kernel_irqchip=off.

We should not access preemption timer stuff if lapic is emulated in userspace.
This patch fix it by avoiding access preemption timer stuff when kernel_irqchip=off.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Yunhong Jiang <yunhong.jiang@intel.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 730cf174090a..b62c85229711 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1349,6 +1349,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
 {
+	if (!lapic_in_kernel(vcpu))
+		return false;
+
 	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
-- 
cgit v1.2.3


From 03331b4b8bd1f0fef34539afa6bb6c45ec25c47b Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 2 Aug 2016 16:32:35 -0400
Subject: nvmx: remove comment about missing nested vpid support

Nested vpid is already supported and both single/global
modes are advertised to the guest

Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bc354f003ce1..b26c222ebeef 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2945,7 +2945,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 			vmx->nested.nested_vmx_secondary_ctls_high);
 		break;
 	case MSR_IA32_VMX_EPT_VPID_CAP:
-		/* Currently, no nested vpid support */
 		*pdata = vmx->nested.nested_vmx_ept_caps |
 			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
 		break;
-- 
cgit v1.2.3


From 45e11817d5703eceb65a673927a8bc74dc1286d6 Mon Sep 17 00:00:00 2001
From: Bandan Das <bsd@redhat.com>
Date: Tue, 2 Aug 2016 16:32:36 -0400
Subject: nvmx: mark ept single context invalidation as supported

Commit 4b855078601f ("KVM: nVMX: Don't advertise single
context invalidation for invept") removed advertising
single context invalidation since the spec does not mandate it.
However, some hypervisors (such as ESX) require it to be present
before willing to use ept in a nested environment. Advertise it
and fallback to the global case.

Signed-off-by: Bandan Das <bsd@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b26c222ebeef..a45d8580f91e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2809,12 +2809,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 			vmx->nested.nested_vmx_ept_caps |=
 				VMX_EPT_EXECUTE_ONLY_BIT;
 		vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
-		/*
-		 * For nested guests, we don't do anything specific
-		 * for single context invalidation. Hence, only advertise
-		 * support for global context invalidation.
-		 */
-		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
+		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+			VMX_EPT_EXTENT_CONTEXT_BIT;
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 
@@ -7608,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
 	switch (type) {
 	case VMX_EPT_EXTENT_GLOBAL:
+	/*
+	 * TODO: track mappings and invalidate
+	 * single context requests appropriately
+	 */
+	case VMX_EPT_EXTENT_CONTEXT:
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		nested_vmx_succeed(vcpu);
 		break;
 	default:
-		/* Trap single context invalidation invept calls */
 		BUG_ON(1);
 		break;
 	}
-- 
cgit v1.2.3