From 9e6dabeffd1d0ec2aa19aa076c4886067238d442 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 17 Nov 2011 11:00:41 +0100
Subject: KVM: s390: Fix RUNNING flag misinterpretation

CPUSTAT_RUNNING was implemented signifying that a vcpu is not stopped.
This is not, however, what the architecture says: RUNNING should be
set when the host is acting on the behalf of the guest operating
system.

CPUSTAT_RUNNING has been changed to be set in kvm_arch_vcpu_load()
and to be unset in kvm_arch_vcpu_put().

For signifying stopped state of a vcpu, a host-controlled bit has
been used and is set/unset basically on the reverse as the old
CPUSTAT_RUNNING bit (including pushing it down into stop handling
proper in handle_stop()).

Cc: stable@kernel.org
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/include/asm/kvm_host.h |  2 +-
 arch/s390/kvm/diag.c             |  2 +-
 arch/s390/kvm/intercept.c        |  3 ++-
 arch/s390/kvm/interrupt.c        |  1 +
 arch/s390/kvm/kvm-s390.c         | 10 +++++++---
 arch/s390/kvm/sigp.c             |  6 +++---
 6 files changed, 15 insertions(+), 9 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 24e18473d926..20f9068cd6b5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -47,7 +47,7 @@ struct sca_block {
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-#define CPUSTAT_HOST       0x80000000
+#define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
 #define CPUSTAT_STOP_INT   0x04000000
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 87cedd61be04..8943e82cd4d9 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -70,7 +70,7 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 	}
 
-	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
 	vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index c7c51898984e..02434543eabb 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -132,7 +132,6 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 	int rc = 0;
 
 	vcpu->stat.exit_stop_request++;
-	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	spin_lock_bh(&vcpu->arch.local_int.lock);
 	if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
 		vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
@@ -149,6 +148,8 @@ static int handle_stop(struct kvm_vcpu *vcpu)
 	}
 
 	if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
+		atomic_set_mask(CPUSTAT_STOPPED,
+				&vcpu->arch.sie_block->cpuflags);
 		vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
 		VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
 		rc = -EOPNOTSUPP;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 87c16705b381..278ee009ce65 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -252,6 +252,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
 			offsetof(struct _lowcore, restart_psw), sizeof(psw_t));
 		if (rc == -EFAULT)
 			exception = 1;
+		atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 		break;
 
 	case KVM_S390_PROGRAM_INT:
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0bd3bea1e4cd..630ab65b488b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -270,10 +270,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	restore_fp_regs(&vcpu->arch.guest_fpregs);
 	restore_access_regs(vcpu->arch.guest_acrs);
 	gmap_enable(vcpu->arch.gmap);
+	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
 	save_fp_regs(&vcpu->arch.guest_fpregs);
 	save_access_regs(vcpu->arch.guest_acrs);
@@ -301,7 +303,9 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | CPUSTAT_SM);
+	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
+						    CPUSTAT_SM |
+						    CPUSTAT_STOPPED);
 	vcpu->arch.sie_block->ecb   = 6;
 	vcpu->arch.sie_block->eca   = 0xC1002001U;
 	vcpu->arch.sie_block->fac   = (int) (long) facilities;
@@ -428,7 +432,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
 {
 	int rc = 0;
 
-	if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
+	if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
 		rc = -EBUSY;
 	else {
 		vcpu->run->psw_mask = psw.mask;
@@ -501,7 +505,7 @@ rerun_vcpu:
 	if (vcpu->sigset_active)
 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
 
-	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
 
 	BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
 
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index f815118835f3..4a488a98d7f8 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -57,8 +57,8 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
 	spin_lock(&fi->lock);
 	if (fi->local_int[cpu_addr] == NULL)
 		rc = 3; /* not operational */
-	else if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
-		 & CPUSTAT_RUNNING) {
+	else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags)
+		  & CPUSTAT_STOPPED)) {
 		*reg &= 0xffffffff00000000UL;
 		rc = 1; /* status stored */
 	} else {
@@ -251,7 +251,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 
 	spin_lock_bh(&li->lock);
 	/* cpu must be in stopped state */
-	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
+	if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
 		rc = 1; /* incorrect state */
 		*reg &= SIGP_STAT_INCORRECT_STATE;
 		kfree(inti);
-- 
cgit v1.2.3


From bd59d3a4444e5f941c863db1a7715edf7289e29a Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Thu, 17 Nov 2011 11:00:42 +0100
Subject: KVM: s390: handle SIGP sense running intercepts

SIGP sense running may cause an intercept on higher level
virtualization, so handle it by checking the CPUSTAT_RUNNING flag.

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/kvm/kvm-s390.c         |  1 +
 arch/s390/kvm/sigp.c             | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 20f9068cd6b5..b0c235cb6ad5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -139,6 +139,7 @@ struct kvm_vcpu_stat {
 	u32 instruction_stfl;
 	u32 instruction_tprot;
 	u32 instruction_sigp_sense;
+	u32 instruction_sigp_sense_running;
 	u32 instruction_sigp_external_call;
 	u32 instruction_sigp_emergency;
 	u32 instruction_sigp_stop;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 630ab65b488b..0891926ea953 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -65,6 +65,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
+	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 4a488a98d7f8..0a7941d74bc6 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -31,9 +31,11 @@
 #define SIGP_SET_PREFIX        0x0d
 #define SIGP_STORE_STATUS_ADDR 0x0e
 #define SIGP_SET_ARCH          0x12
+#define SIGP_SENSE_RUNNING     0x15
 
 /* cpu status bits */
 #define SIGP_STAT_EQUIPMENT_CHECK   0x80000000UL
+#define SIGP_STAT_NOT_RUNNING	    0x00000400UL
 #define SIGP_STAT_INCORRECT_STATE   0x00000200UL
 #define SIGP_STAT_INVALID_PARAMETER 0x00000100UL
 #define SIGP_STAT_EXT_CALL_PENDING  0x00000080UL
@@ -275,6 +277,38 @@ out_fi:
 	return rc;
 }
 
+static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
+				unsigned long *reg)
+{
+	int rc;
+	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+
+	if (cpu_addr >= KVM_MAX_VCPUS)
+		return 3; /* not operational */
+
+	spin_lock(&fi->lock);
+	if (fi->local_int[cpu_addr] == NULL)
+		rc = 3; /* not operational */
+	else {
+		if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
+		    & CPUSTAT_RUNNING) {
+			/* running */
+			rc = 1;
+		} else {
+			/* not running */
+			*reg &= 0xffffffff00000000UL;
+			*reg |= SIGP_STAT_NOT_RUNNING;
+			rc = 0;
+		}
+	}
+	spin_unlock(&fi->lock);
+
+	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
+		   rc);
+
+	return rc;
+}
+
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 {
 	int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
@@ -331,6 +365,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
 				       &vcpu->arch.guest_gprs[r1]);
 		break;
+	case SIGP_SENSE_RUNNING:
+		vcpu->stat.instruction_sigp_sense_running++;
+		rc = __sigp_sense_running(vcpu, cpu_addr,
+					  &vcpu->arch.guest_gprs[r1]);
+		break;
 	case SIGP_RESTART:
 		vcpu->stat.instruction_sigp_restart++;
 		/* user space must know about restart */
-- 
cgit v1.2.3


From 1eddb85f889a5a9fe59ec184e06844716a00aded Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 17 Nov 2011 11:00:43 +0100
Subject: KVM: s390: Fix tprot locking

There is a potential host deadlock in the tprot intercept handling.
We must not hold the mmap semaphore while resolving the guest
address. If userspace is remapping, then the memory detection in
the guest is broken anyway so we can safely separate the
address translation from walking the vmas.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/kvm/priv.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 391626361084..d02638959922 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -336,6 +336,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 	u64 address1 = disp1 + base1 ? vcpu->arch.guest_gprs[base1] : 0;
 	u64 address2 = disp2 + base2 ? vcpu->arch.guest_gprs[base2] : 0;
 	struct vm_area_struct *vma;
+	unsigned long user_address;
 
 	vcpu->stat.instruction_tprot++;
 
@@ -349,9 +350,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
 		return -EOPNOTSUPP;
 
 
+	/* we must resolve the address without holding the mmap semaphore.
+	 * This is ok since the userspace hypervisor is not supposed to change
+	 * the mapping while the guest queries the memory. Otherwise the guest
+	 * might crash or get wrong info anyway. */
+	user_address = (unsigned long) __guestaddr_to_user(vcpu, address1);
+
 	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm,
-			(unsigned long) __guestaddr_to_user(vcpu, address1));
+	vma = find_vma(current->mm, user_address);
 	if (!vma) {
 		up_read(&current->mm->mmap_sem);
 		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-- 
cgit v1.2.3


From 52e16b185fdd0aeb0522c203e944873e38ac1575 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Thu, 17 Nov 2011 11:00:44 +0100
Subject: KVM: s390: announce SYNC_MMU

KVM on s390 always had a sync mmu. Any mapping change in userspace
mapping was always reflected immediately in the guest mapping.
- In older code the guest mapping was just an offset
- In newer code the last level page table is shared

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Carsten Otte <cotte@de.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/kvm/kvm-s390.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0891926ea953..d1c445732451 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -128,6 +128,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	switch (ext) {
 	case KVM_CAP_S390_PSW:
 	case KVM_CAP_S390_GMAP:
+	case KVM_CAP_SYNC_MMU:
 		r = 1;
 		break;
 	default:
-- 
cgit v1.2.3


From 8bf00a529967dafbbb210b377c38a15834d1e979 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 5 Oct 2011 14:01:22 +0200
Subject: KVM: VMX: add support for switching of PERF_GLOBAL_CTRL

Some cpus have special support for switching PERF_GLOBAL_CTRL msr.
Add logic to detect if such support exists and works properly and extend
msr switching code to use it if available. Also extend number of generic
msr switching entries to 8.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 93 insertions(+), 11 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a0d6bd9ad442..55e849b52d9e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -118,7 +118,7 @@ module_param(ple_gap, int, S_IRUGO);
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
-#define NR_AUTOLOAD_MSRS 1
+#define NR_AUTOLOAD_MSRS 8
 #define VMCS02_POOL_SIZE 1
 
 struct vmcs {
@@ -622,6 +622,7 @@ static unsigned long *vmx_msr_bitmap_legacy;
 static unsigned long *vmx_msr_bitmap_longmode;
 
 static bool cpu_has_load_ia32_efer;
+static bool cpu_has_load_perf_global_ctrl;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -1191,15 +1192,34 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
+static void clear_atomic_switch_msr_special(unsigned long entry,
+		unsigned long exit)
+{
+	vmcs_clear_bits(VM_ENTRY_CONTROLS, entry);
+	vmcs_clear_bits(VM_EXIT_CONTROLS, exit);
+}
+
 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 {
 	unsigned i;
 	struct msr_autoload *m = &vmx->msr_autoload;
 
-	if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-		vmcs_clear_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-		vmcs_clear_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-		return;
+	switch (msr) {
+	case MSR_EFER:
+		if (cpu_has_load_ia32_efer) {
+			clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+					VM_EXIT_LOAD_IA32_EFER);
+			return;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		if (cpu_has_load_perf_global_ctrl) {
+			clear_atomic_switch_msr_special(
+					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+			return;
+		}
+		break;
 	}
 
 	for (i = 0; i < m->nr; ++i)
@@ -1215,18 +1235,44 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
 }
 
+static void add_atomic_switch_msr_special(unsigned long entry,
+		unsigned long exit, unsigned long guest_val_vmcs,
+		unsigned long host_val_vmcs, u64 guest_val, u64 host_val)
+{
+	vmcs_write64(guest_val_vmcs, guest_val);
+	vmcs_write64(host_val_vmcs, host_val);
+	vmcs_set_bits(VM_ENTRY_CONTROLS, entry);
+	vmcs_set_bits(VM_EXIT_CONTROLS, exit);
+}
+
 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 				  u64 guest_val, u64 host_val)
 {
 	unsigned i;
 	struct msr_autoload *m = &vmx->msr_autoload;
 
-	if (msr == MSR_EFER && cpu_has_load_ia32_efer) {
-		vmcs_write64(GUEST_IA32_EFER, guest_val);
-		vmcs_write64(HOST_IA32_EFER, host_val);
-		vmcs_set_bits(VM_ENTRY_CONTROLS, VM_ENTRY_LOAD_IA32_EFER);
-		vmcs_set_bits(VM_EXIT_CONTROLS, VM_EXIT_LOAD_IA32_EFER);
-		return;
+	switch (msr) {
+	case MSR_EFER:
+		if (cpu_has_load_ia32_efer) {
+			add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+					VM_EXIT_LOAD_IA32_EFER,
+					GUEST_IA32_EFER,
+					HOST_IA32_EFER,
+					guest_val, host_val);
+			return;
+		}
+		break;
+	case MSR_CORE_PERF_GLOBAL_CTRL:
+		if (cpu_has_load_perf_global_ctrl) {
+			add_atomic_switch_msr_special(
+					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
+					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
+					GUEST_IA32_PERF_GLOBAL_CTRL,
+					HOST_IA32_PERF_GLOBAL_CTRL,
+					guest_val, host_val);
+			return;
+		}
+		break;
 	}
 
 	for (i = 0; i < m->nr; ++i)
@@ -2455,6 +2501,42 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 		&& allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
 				   VM_EXIT_LOAD_IA32_EFER);
 
+	cpu_has_load_perf_global_ctrl =
+		allow_1_setting(MSR_IA32_VMX_ENTRY_CTLS,
+				VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
+		&& allow_1_setting(MSR_IA32_VMX_EXIT_CTLS,
+				   VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
+
+	/*
+	 * Some cpus support VM_ENTRY_(LOAD|SAVE)_IA32_PERF_GLOBAL_CTRL
+	 * but due to arrata below it can't be used. Workaround is to use
+	 * msr load mechanism to switch IA32_PERF_GLOBAL_CTRL.
+	 *
+	 * VM Exit May Incorrectly Clear IA32_PERF_GLOBAL_CTRL [34:32]
+	 *
+	 * AAK155             (model 26)
+	 * AAP115             (model 30)
+	 * AAT100             (model 37)
+	 * BC86,AAY89,BD102   (model 44)
+	 * BA97               (model 46)
+	 *
+	 */
+	if (cpu_has_load_perf_global_ctrl && boot_cpu_data.x86 == 0x6) {
+		switch (boot_cpu_data.x86_model) {
+		case 26:
+		case 30:
+		case 37:
+		case 44:
+		case 46:
+			cpu_has_load_perf_global_ctrl = false;
+			printk_once(KERN_WARNING"kvm: VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL "
+					"does not work properly. Using workaround\n");
+			break;
+		default:
+			break;
+		}
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From d7cd97964ba6d70c558348bd2c87290dce885583 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 5 Oct 2011 14:01:23 +0200
Subject: KVM: VMX: Add support for guest/host-only profiling

Support guest/host-only profiling by switch perf msrs on
a guest entry if needed.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 55e849b52d9e..98f4b0bcc660 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -39,6 +39,7 @@
 #include <asm/mce.h>
 #include <asm/i387.h>
 #include <asm/xcr.h>
+#include <asm/perf_event.h>
 
 #include "trace.h"
 
@@ -6050,6 +6051,24 @@ static void vmx_cancel_injection(struct kvm_vcpu *vcpu)
 	vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
 }
 
+static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
+{
+	int i, nr_msrs;
+	struct perf_guest_switch_msr *msrs;
+
+	msrs = perf_guest_get_msrs(&nr_msrs);
+
+	if (!msrs)
+		return;
+
+	for (i = 0; i < nr_msrs; i++)
+		if (msrs[i].host == msrs[i].guest)
+			clear_atomic_switch_msr(vmx, msrs[i].msr);
+		else
+			add_atomic_switch_msr(vmx, msrs[i].msr, msrs[i].guest,
+					msrs[i].host);
+}
+
 #ifdef CONFIG_X86_64
 #define R "r"
 #define Q "q"
@@ -6099,6 +6118,8 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
+	atomic_switch_perf_msrs(vmx);
+
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
-- 
cgit v1.2.3


From e7fc6f93b4242b2b566f0070709e27257d6da8a2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 5 Oct 2011 14:01:24 +0200
Subject: KVM: VMX: Check for automatic switch msr table overflow

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 98f4b0bcc660..579a0b51696a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1280,7 +1280,11 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		if (m->guest[i].index == msr)
 			break;
 
-	if (i == m->nr) {
+	if (i == NR_AUTOLOAD_MSRS) {
+		printk_once(KERN_WARNING"Not enough mst switch entries. "
+				"Can't add msr %x\n", msr);
+		return;
+	} else if (i == m->nr) {
 		++m->nr;
 		vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, m->nr);
 		vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
-- 
cgit v1.2.3


From bb75c627fb0dfb8c0ab75d3033709ff928896e16 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 17 Nov 2011 15:26:35 +0100
Subject: Revert "KVM: PPC: Add support for explicit HIOR setting"

This reverts commit a15bd354f083f20f257db450488db52ac27df439.

It exceeded the padding on the SREGS struct, rendering the ABI
backwards-incompatible.

Conflicts:

	arch/powerpc/kvm/powerpc.c
	include/linux/kvm.h

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm.h        |  8 --------
 arch/powerpc/include/asm/kvm_book3s.h |  2 --
 arch/powerpc/kvm/book3s_pr.c          | 14 ++------------
 arch/powerpc/kvm/powerpc.c            |  1 -
 4 files changed, 2 insertions(+), 23 deletions(-)

(limited to 'arch')

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 08fe69edcd10..0ad432bc81d6 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -148,12 +148,6 @@ struct kvm_regs {
 #define KVM_SREGS_E_UPDATE_DEC		(1 << 2)
 #define KVM_SREGS_E_UPDATE_DBSR		(1 << 3)
 
-/*
- * Book3S special bits to indicate contents in the struct by maintaining
- * backwards compatibility with older structs. If adding a new field,
- * please make sure to add a flag for that new field */
-#define KVM_SREGS_S_HIOR		(1 << 0)
-
 /*
  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
  * previous KVM_GET_REGS.
@@ -179,8 +173,6 @@ struct kvm_sregs {
 				__u64 ibat[8]; 
 				__u64 dbat[8]; 
 			} ppc32;
-			__u64 flags; /* KVM_SREGS_S_ */
-			__u64 hior;
 		} s;
 		struct {
 			union {
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a384ffdf33de..d4df013ad779 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -90,8 +90,6 @@ struct kvmppc_vcpu_book3s {
 #endif
 	int context_id[SID_CONTEXTS];
 
-	bool hior_sregs;		/* HIOR is set by SREGS, not PVR */
-
 	struct hlist_head hpte_hash_pte[HPTEG_HASH_NUM_PTE];
 	struct hlist_head hpte_hash_pte_long[HPTEG_HASH_NUM_PTE_LONG];
 	struct hlist_head hpte_hash_vpte[HPTEG_HASH_NUM_VPTE];
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index bc4d50dec78b..3c791e1eb675 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -151,16 +151,14 @@ void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u32 pvr)
 #ifdef CONFIG_PPC_BOOK3S_64
 	if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
 		kvmppc_mmu_book3s_64_init(vcpu);
-		if (!to_book3s(vcpu)->hior_sregs)
-			to_book3s(vcpu)->hior = 0xfff00000;
+		to_book3s(vcpu)->hior = 0xfff00000;
 		to_book3s(vcpu)->msr_mask = 0xffffffffffffffffULL;
 		vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	} else
 #endif
 	{
 		kvmppc_mmu_book3s_32_init(vcpu);
-		if (!to_book3s(vcpu)->hior_sregs)
-			to_book3s(vcpu)->hior = 0;
+		to_book3s(vcpu)->hior = 0;
 		to_book3s(vcpu)->msr_mask = 0xffffffffULL;
 		vcpu->arch.cpu_type = KVM_CPU_3S_32;
 	}
@@ -797,9 +795,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
 		}
 	}
 
-	if (sregs->u.s.flags & KVM_SREGS_S_HIOR)
-		sregs->u.s.hior = to_book3s(vcpu)->hior;
-
 	return 0;
 }
 
@@ -836,11 +831,6 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	/* Flush the MMU after messing with the segments */
 	kvmppc_mmu_pte_flush(vcpu, 0, 0);
 
-	if (sregs->u.s.flags & KVM_SREGS_S_HIOR) {
-		to_book3s(vcpu)->hior_sregs = true;
-		to_book3s(vcpu)->hior = sregs->u.s.hior;
-	}
-
 	return 0;
 }
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index efbf9ad87203..607fbdf24b84 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -208,7 +208,6 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_BOOKE_SREGS:
 #else
 	case KVM_CAP_PPC_SEGSTATE:
-	case KVM_CAP_PPC_HIOR:
 	case KVM_CAP_PPC_PAPR:
 #endif
 	case KVM_CAP_PPC_UNSET_IRQ:
-- 
cgit v1.2.3


From 95ef1e52922cf75b1ea2eae54ef886f2cc47eecb Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 15 Nov 2011 14:59:07 +0200
Subject: KVM guest: prevent tracing recursion with kvmclock

Prevent tracing of preempt_disable() in get_cpu_var() in
kvm_clock_read(). When CONFIG_DEBUG_PREEMPT is enabled,
preempt_disable/enable() are traced and this causes the function_graph
tracer to go into an infinite recursion. By open coding the
preempt_disable() around the get_cpu_var(), we can use the notrace
version which prevents preempt_disable/enable() from being traced and
prevents the recursion.

Based on a similar patch for Xen from Jeremy Fitzhardinge.

Tested-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/kvmclock.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index c1a0188e29ae..44842d756b29 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -74,9 +74,10 @@ static cycle_t kvm_clock_read(void)
 	struct pvclock_vcpu_time_info *src;
 	cycle_t ret;
 
-	src = &get_cpu_var(hv_clock);
+	preempt_disable_notrace();
+	src = &__get_cpu_var(hv_clock);
 	ret = pvclock_clocksource_read(src);
-	put_cpu_var(hv_clock);
+	preempt_enable_notrace();
 	return ret;
 }
 
-- 
cgit v1.2.3