From e13dcc1ab593452b99c39de2cb588c39f6d6a7e9 Mon Sep 17 00:00:00 2001
From: Stuart Yoder <stuart.yoder@freescale.com>
Date: Tue, 3 Jul 2012 05:48:49 +0000
Subject: PPC: epapr: create define for return code value of success

Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/epapr_hcalls.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index bf2c06c33871..c0c7adcd21e3 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -88,7 +88,8 @@
 #define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
 #define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
 
-/* epapr error codes */
+/* epapr return codes */
+#define EV_SUCCESS		0
 #define EV_EPERM		1	/* Operation not permitted */
 #define EV_ENOENT		2	/*  Entry Not Found */
 #define EV_EIO			3	/* I/O error occured */
-- 
cgit v1.2.3


From fdcf8bd7e711d4c0fe3ef624cfb5e3808149ff7f Mon Sep 17 00:00:00 2001
From: Stuart Yoder <stuart.yoder@freescale.com>
Date: Tue, 3 Jul 2012 05:48:50 +0000
Subject: KVM: PPC: use definitions in epapr header for hcalls

Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_para.h | 21 +++++++++++----------
 arch/powerpc/kernel/kvm.c           |  2 +-
 arch/powerpc/kvm/powerpc.c          | 10 +++++-----
 3 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index c18916bff689..a168ce37d85c 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -75,9 +75,10 @@ struct kvm_vcpu_arch_shared {
 };
 
 #define KVM_SC_MAGIC_R0		0x4b564d21 /* "KVM!" */
-#define HC_VENDOR_KVM		(42 << 16)
-#define HC_EV_SUCCESS		0
-#define HC_EV_UNIMPLEMENTED	12
+
+#define KVM_HCALL_TOKEN(num)     _EV_HCALL_TOKEN(EV_KVM_VENDOR_ID, num)
+
+#include <asm/epapr_hcalls.h>
 
 #define KVM_FEATURE_MAGIC_PAGE	1
 
@@ -121,7 +122,7 @@ static unsigned long kvm_hypercall(unsigned long *in,
 				   unsigned long *out,
 				   unsigned long nr)
 {
-	return HC_EV_UNIMPLEMENTED;
+	return EV_UNIMPLEMENTED;
 }
 
 #endif
@@ -132,7 +133,7 @@ static inline long kvm_hypercall0_1(unsigned int nr, unsigned long *r2)
 	unsigned long out[8];
 	unsigned long r;
 
-	r = kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	r = kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
 	*r2 = out[0];
 
 	return r;
@@ -143,7 +144,7 @@ static inline long kvm_hypercall0(unsigned int nr)
 	unsigned long in[8];
 	unsigned long out[8];
 
-	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
 }
 
 static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
@@ -152,7 +153,7 @@ static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
 	unsigned long out[8];
 
 	in[0] = p1;
-	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
 }
 
 static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
@@ -163,7 +164,7 @@ static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
 
 	in[0] = p1;
 	in[1] = p2;
-	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
 }
 
 static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
@@ -175,7 +176,7 @@ static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
 	in[0] = p1;
 	in[1] = p2;
 	in[2] = p3;
-	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
 }
 
 static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
@@ -189,7 +190,7 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 	in[1] = p2;
 	in[2] = p3;
 	in[3] = p4;
-	return kvm_hypercall(in, out, nr | HC_VENDOR_KVM);
+	return kvm_hypercall(in, out, KVM_HCALL_TOKEN(nr));
 }
 
 
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index 867db1de8949..a61b133c4f99 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -419,7 +419,7 @@ static void kvm_map_magic_page(void *data)
 	in[0] = KVM_MAGIC_PAGE;
 	in[1] = KVM_MAGIC_PAGE;
 
-	kvm_hypercall(in, out, HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE);
+	kvm_hypercall(in, out, KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE));
 
 	*features = out[0];
 }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4d213b8b0fb5..0368a9391b21 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -67,18 +67,18 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 	}
 
 	switch (nr) {
-	case HC_VENDOR_KVM | KVM_HC_PPC_MAP_MAGIC_PAGE:
+	case KVM_HCALL_TOKEN(KVM_HC_PPC_MAP_MAGIC_PAGE):
 	{
 		vcpu->arch.magic_page_pa = param1;
 		vcpu->arch.magic_page_ea = param2;
 
 		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
 
-		r = HC_EV_SUCCESS;
+		r = EV_SUCCESS;
 		break;
 	}
-	case HC_VENDOR_KVM | KVM_HC_FEATURES:
-		r = HC_EV_SUCCESS;
+	case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
+		r = EV_SUCCESS;
 #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
 		/* XXX Missing magic page on 44x */
 		r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
@@ -87,7 +87,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 		/* Second return value is in r4 */
 		break;
 	default:
-		r = HC_EV_UNIMPLEMENTED;
+		r = EV_UNIMPLEMENTED;
 		break;
 	}
 
-- 
cgit v1.2.3


From 784bafac79e7646e56f40998a6dde0e1ed5595f8 Mon Sep 17 00:00:00 2001
From: Stuart Yoder <stuart.yoder@freescale.com>
Date: Tue, 3 Jul 2012 05:48:51 +0000
Subject: KVM: PPC: add pvinfo for hcall opcodes on e500mc/e5500

Signed-off-by: Liu Yu <yu.liu@freescale.com>
[stuart: factored this out from idle hcall support in host patch]
Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/powerpc.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0368a9391b21..a478e662b2bc 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -751,9 +751,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 
 static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 {
+	u32 inst_nop = 0x60000000;
+#ifdef CONFIG_KVM_BOOKE_HV
+	u32 inst_sc1 = 0x44000022;
+	pvinfo->hcall[0] = inst_sc1;
+	pvinfo->hcall[1] = inst_nop;
+	pvinfo->hcall[2] = inst_nop;
+	pvinfo->hcall[3] = inst_nop;
+#else
 	u32 inst_lis = 0x3c000000;
 	u32 inst_ori = 0x60000000;
-	u32 inst_nop = 0x60000000;
 	u32 inst_sc = 0x44000002;
 	u32 inst_imm_mask = 0xffff;
 
@@ -770,6 +777,7 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 	pvinfo->hcall[1] = inst_ori | (KVM_SC_MAGIC_R0 & inst_imm_mask);
 	pvinfo->hcall[2] = inst_sc;
 	pvinfo->hcall[3] = inst_nop;
+#endif
 
 	return 0;
 }
-- 
cgit v1.2.3


From 9202e07636f0c4858ba6c30773a3f160b2b5659a Mon Sep 17 00:00:00 2001
From: Liu Yu-B13201 <Yu.Liu@freescale.com>
Date: Tue, 3 Jul 2012 05:48:52 +0000
Subject: KVM: PPC: Add support for ePAPR idle hcall in host kernel

And add a new flag definition in kvm_ppc_pvinfo to indicate
whether the host supports the EV_IDLE hcall.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
[stuart.yoder@freescale.com: cleanup,fixes for conditions allowing idle]
Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
[agraf: fix typo]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/Kbuild |  1 +
 arch/powerpc/kvm/powerpc.c      | 10 ++++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7e313f1ed183..13d6b7bf3b69 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -34,5 +34,6 @@ header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
+header-y += epapr_hcalls.h
 
 generic-y += rwsem.h
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a478e662b2bc..dbf56e173c25 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -38,8 +38,7 @@
 
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
 {
-	return !(v->arch.shared->msr & MSR_WE) ||
-	       !!(v->arch.pending_exceptions) ||
+	return !!(v->arch.pending_exceptions) ||
 	       v->requests;
 }
 
@@ -86,6 +85,11 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 
 		/* Second return value is in r4 */
 		break;
+	case EV_HCALL_TOKEN(EV_IDLE):
+		r = EV_SUCCESS;
+		kvm_vcpu_block(vcpu);
+		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+		break;
 	default:
 		r = EV_UNIMPLEMENTED;
 		break;
@@ -779,6 +783,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 	pvinfo->hcall[3] = inst_nop;
 #endif
 
+	pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE;
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 2f979de8a716bdbdc9f4db532652fbca08ed710c Mon Sep 17 00:00:00 2001
From: Liu Yu-B13201 <Yu.Liu@freescale.com>
Date: Tue, 3 Jul 2012 05:48:53 +0000
Subject: KVM: PPC: ev_idle hcall support for e500 guests

Signed-off-by: Liu Yu <yu.liu@freescale.com>
[varun: 64-bit changes]
Signed-off-by: Varun Sethi <Varun.Sethi@freescale.com>
Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/epapr_hcalls.h | 11 ++++++-----
 arch/powerpc/kernel/epapr_hcalls.S      | 28 ++++++++++++++++++++++++++++
 arch/powerpc/kernel/epapr_paravirt.c    | 11 ++++++++++-
 3 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index c0c7adcd21e3..833ce2c2d505 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -50,10 +50,6 @@
 #ifndef _EPAPR_HCALLS_H
 #define _EPAPR_HCALLS_H
 
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <asm/byteorder.h>
-
 #define EV_BYTE_CHANNEL_SEND		1
 #define EV_BYTE_CHANNEL_RECEIVE		2
 #define EV_BYTE_CHANNEL_POLL		3
@@ -109,6 +105,11 @@
 #define EV_UNIMPLEMENTED	12	/* Unimplemented hypercall */
 #define EV_BUFFER_OVERFLOW	13	/* Caller-supplied buffer too small */
 
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/byteorder.h>
+
 /*
  * Hypercall register clobber list
  *
@@ -506,5 +507,5 @@ static inline unsigned int ev_idle(void)
 
 	return r3;
 }
-
+#endif /* !__ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S
index 697b390ebfd8..62c0dc237826 100644
--- a/arch/powerpc/kernel/epapr_hcalls.S
+++ b/arch/powerpc/kernel/epapr_hcalls.S
@@ -8,13 +8,41 @@
  */
 
 #include <linux/threads.h>
+#include <asm/epapr_hcalls.h>
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/cputable.h>
 #include <asm/thread_info.h>
 #include <asm/ppc_asm.h>
+#include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
 
+/* epapr_ev_idle() was derived from e500_idle() */
+_GLOBAL(epapr_ev_idle)
+	CURRENT_THREAD_INFO(r3, r1)
+	PPC_LL	r4, TI_LOCAL_FLAGS(r3)	/* set napping bit */
+	ori	r4, r4,_TLF_NAPPING	/* so when we take an exception */
+	PPC_STL	r4, TI_LOCAL_FLAGS(r3)	/* it will return to our caller */
+
+	wrteei	1
+
+idle_loop:
+	LOAD_REG_IMMEDIATE(r11, EV_HCALL_TOKEN(EV_IDLE))
+
+.global epapr_ev_idle_start
+epapr_ev_idle_start:
+	li	r3, -1
+	nop
+	nop
+	nop
+
+	/*
+	 * Guard against spurious wakeups from a hypervisor --
+	 * only interrupt will cause us to return to LR due to
+	 * _TLF_NAPPING.
+	 */
+	b	idle_loop
+
 /* Hypercall entry point. Will be patched with device tree instructions. */
 .global epapr_hypercall_start
 epapr_hypercall_start:
diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c
index 028aeae370b6..f3eab8594d9f 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -21,6 +21,10 @@
 #include <asm/epapr_hcalls.h>
 #include <asm/cacheflush.h>
 #include <asm/code-patching.h>
+#include <asm/machdep.h>
+
+extern void epapr_ev_idle(void);
+extern u32 epapr_ev_idle_start[];
 
 bool epapr_paravirt_enabled;
 
@@ -41,8 +45,13 @@ static int __init epapr_paravirt_init(void)
 	if (len % 4 || len > (4 * 4))
 		return -ENODEV;
 
-	for (i = 0; i < (len / 4); i++)
+	for (i = 0; i < (len / 4); i++) {
 		patch_instruction(epapr_hypercall_start + i, insts[i]);
+		patch_instruction(epapr_ev_idle_start + i, insts[i]);
+	}
+
+	if (of_get_property(hyper_node, "has-idle", NULL))
+		ppc_md.power_save = epapr_ev_idle;
 
 	epapr_paravirt_enabled = true;
 
-- 
cgit v1.2.3


From 40656397241860bb21f2802af17ac1de607fb7a9 Mon Sep 17 00:00:00 2001
From: Stuart Yoder <stuart.yoder@freescale.com>
Date: Tue, 3 Jul 2012 05:48:54 +0000
Subject: PPC: select EPAPR_PARAVIRT for all users of epapr hcalls

Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/platforms/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index e7a896acd982..48a920d51489 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -90,6 +90,7 @@ config MPIC
 config PPC_EPAPR_HV_PIC
 	bool
 	default n
+	select EPAPR_PARAVIRT
 
 config MPIC_WEIRD
 	bool
-- 
cgit v1.2.3


From 305bcf26128e380bb1296d2802387659ab8b038e Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Tue, 3 Jul 2012 05:48:55 +0000
Subject: powerpc/fsl-soc: use CONFIG_EPAPR_PARAVIRT for hcalls

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/sysdev/fsl_msi.c | 9 +++++++--
 arch/powerpc/sysdev/fsl_soc.c | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 6e097de00e09..7e2b2f2e3ecd 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -236,7 +236,6 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	u32 intr_index;
 	u32 have_shift = 0;
 	struct fsl_msi_cascade_data *cascade_data;
-	unsigned int ret;
 
 	cascade_data = irq_get_handler_data(irq);
 	msi_data = cascade_data->msi_data;
@@ -268,7 +267,9 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 	case FSL_PIC_IP_IPIC:
 		msir_value = fsl_msi_read(msi_data->msi_regs, msir_index * 0x4);
 		break;
-	case FSL_PIC_IP_VMPIC:
+#ifdef CONFIG_EPAPR_PARAVIRT
+	case FSL_PIC_IP_VMPIC: {
+		unsigned int ret;
 		ret = fh_vmpic_get_msir(virq_to_hw(irq), &msir_value);
 		if (ret) {
 			pr_err("fsl-msi: fh_vmpic_get_msir() failed for "
@@ -277,6 +278,8 @@ static void fsl_msi_cascade(unsigned int irq, struct irq_desc *desc)
 		}
 		break;
 	}
+#endif
+	}
 
 	while (msir_value) {
 		intr_index = ffs(msir_value) - 1;
@@ -508,10 +511,12 @@ static const struct of_device_id fsl_of_msi_ids[] = {
 		.compatible = "fsl,ipic-msi",
 		.data = (void *)&ipic_msi_feature,
 	},
+#ifdef CONFIG_EPAPR_PARAVIRT
 	{
 		.compatible = "fsl,vmpic-msi",
 		.data = (void *)&vmpic_msi_feature,
 	},
+#endif
 	{}
 };
 
diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c
index c449dbd1c938..97118dc3d285 100644
--- a/arch/powerpc/sysdev/fsl_soc.c
+++ b/arch/powerpc/sysdev/fsl_soc.c
@@ -253,6 +253,7 @@ struct platform_diu_data_ops diu_ops;
 EXPORT_SYMBOL(diu_ops);
 #endif
 
+#ifdef CONFIG_EPAPR_PARAVIRT
 /*
  * Restart the current partition
  *
@@ -278,3 +279,4 @@ void fsl_hv_halt(void)
 	pr_info("hv exit\n");
 	fh_partition_stop(-1);
 }
+#endif
-- 
cgit v1.2.3


From 8e525d59d024f54b88a038faac38f76b9094774e Mon Sep 17 00:00:00 2001
From: Liu Yu-B13201 <Yu.Liu@freescale.com>
Date: Tue, 3 Jul 2012 05:48:56 +0000
Subject: PPC: Don't use hardcoded opcode for ePAPR hcall invocation

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Stuart Yoder <stuart.yoder@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/epapr_hcalls.h | 22 ++++++++++----------
 arch/powerpc/include/asm/fsl_hcalls.h   | 36 ++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index 833ce2c2d505..b8d94459a929 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -195,7 +195,7 @@ static inline unsigned int ev_int_set_config(unsigned int interrupt,
 	r5  = priority;
 	r6  = destination;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6)
 		: : EV_HCALL_CLOBBERS4
 	);
@@ -224,7 +224,7 @@ static inline unsigned int ev_int_get_config(unsigned int interrupt,
 	r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
 	r3 = interrupt;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6)
 		: : EV_HCALL_CLOBBERS4
 	);
@@ -254,7 +254,7 @@ static inline unsigned int ev_int_set_mask(unsigned int interrupt,
 	r3 = interrupt;
 	r4 = mask;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -279,7 +279,7 @@ static inline unsigned int ev_int_get_mask(unsigned int interrupt,
 	r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
 	r3 = interrupt;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "=r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -307,7 +307,7 @@ static inline unsigned int ev_int_eoi(unsigned int interrupt)
 	r11 = EV_HCALL_TOKEN(EV_INT_EOI);
 	r3 = interrupt;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -346,7 +346,7 @@ static inline unsigned int ev_byte_channel_send(unsigned int handle,
 	r7 = be32_to_cpu(p[2]);
 	r8 = be32_to_cpu(p[3]);
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3),
 		  "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8)
 		: : EV_HCALL_CLOBBERS6
@@ -385,7 +385,7 @@ static inline unsigned int ev_byte_channel_receive(unsigned int handle,
 	r3 = handle;
 	r4 = *count;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4),
 		  "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8)
 		: : EV_HCALL_CLOBBERS6
@@ -423,7 +423,7 @@ static inline unsigned int ev_byte_channel_poll(unsigned int handle,
 	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
 	r3 = handle;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5)
 		: : EV_HCALL_CLOBBERS3
 	);
@@ -456,7 +456,7 @@ static inline unsigned int ev_int_iack(unsigned int handle,
 	r11 = EV_HCALL_TOKEN(EV_INT_IACK);
 	r3 = handle;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "=r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -480,7 +480,7 @@ static inline unsigned int ev_doorbell_send(unsigned int handle)
 	r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
 	r3 = handle;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -500,7 +500,7 @@ static inline unsigned int ev_idle(void)
 
 	r11 = EV_HCALL_TOKEN(EV_IDLE);
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "=r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
diff --git a/arch/powerpc/include/asm/fsl_hcalls.h b/arch/powerpc/include/asm/fsl_hcalls.h
index 922d9b5fe3d5..3abb58394da4 100644
--- a/arch/powerpc/include/asm/fsl_hcalls.h
+++ b/arch/powerpc/include/asm/fsl_hcalls.h
@@ -96,7 +96,7 @@ static inline unsigned int fh_send_nmi(unsigned int vcpu_mask)
 	r11 = FH_HCALL_TOKEN(FH_SEND_NMI);
 	r3 = vcpu_mask;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -151,7 +151,7 @@ static inline unsigned int fh_partition_get_dtprop(int handle,
 	r9 = (uint32_t)propvalue_addr;
 	r10 = *propvalue_len;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11),
 		  "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
 		  "+r" (r8), "+r" (r9), "+r" (r10)
@@ -205,7 +205,7 @@ static inline unsigned int fh_partition_set_dtprop(int handle,
 	r9 = (uint32_t)propvalue_addr;
 	r10 = propvalue_len;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11),
 		  "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7),
 		  "+r" (r8), "+r" (r9), "+r" (r10)
@@ -229,7 +229,7 @@ static inline unsigned int fh_partition_restart(unsigned int partition)
 	r11 = FH_HCALL_TOKEN(FH_PARTITION_RESTART);
 	r3 = partition;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -262,7 +262,7 @@ static inline unsigned int fh_partition_get_status(unsigned int partition,
 	r11 = FH_HCALL_TOKEN(FH_PARTITION_GET_STATUS);
 	r3 = partition;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "=r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -295,7 +295,7 @@ static inline unsigned int fh_partition_start(unsigned int partition,
 	r4 = entry_point;
 	r5 = load;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5)
 		: : EV_HCALL_CLOBBERS3
 	);
@@ -317,7 +317,7 @@ static inline unsigned int fh_partition_stop(unsigned int partition)
 	r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP);
 	r3 = partition;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -376,7 +376,7 @@ static inline unsigned int fh_partition_memcpy(unsigned int source,
 #endif
 	r7 = count;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11),
 		  "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7)
 		: : EV_HCALL_CLOBBERS5
@@ -399,7 +399,7 @@ static inline unsigned int fh_dma_enable(unsigned int liodn)
 	r11 = FH_HCALL_TOKEN(FH_DMA_ENABLE);
 	r3 = liodn;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -421,7 +421,7 @@ static inline unsigned int fh_dma_disable(unsigned int liodn)
 	r11 = FH_HCALL_TOKEN(FH_DMA_DISABLE);
 	r3 = liodn;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -447,7 +447,7 @@ static inline unsigned int fh_vmpic_get_msir(unsigned int interrupt,
 	r11 = FH_HCALL_TOKEN(FH_VMPIC_GET_MSIR);
 	r3 = interrupt;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "=r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -469,7 +469,7 @@ static inline unsigned int fh_system_reset(void)
 
 	r11 = FH_HCALL_TOKEN(FH_SYSTEM_RESET);
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "=r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -506,7 +506,7 @@ static inline unsigned int fh_err_get_info(int queue, uint32_t *bufsize,
 	r6 = addr_lo;
 	r7 = peek;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6),
 		  "+r" (r7)
 		: : EV_HCALL_CLOBBERS5
@@ -542,7 +542,7 @@ static inline unsigned int fh_get_core_state(unsigned int handle,
 	r3 = handle;
 	r4 = vcpu;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -572,7 +572,7 @@ static inline unsigned int fh_enter_nap(unsigned int handle, unsigned int vcpu)
 	r3 = handle;
 	r4 = vcpu;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -597,7 +597,7 @@ static inline unsigned int fh_exit_nap(unsigned int handle, unsigned int vcpu)
 	r3 = handle;
 	r4 = vcpu;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3), "+r" (r4)
 		: : EV_HCALL_CLOBBERS2
 	);
@@ -618,7 +618,7 @@ static inline unsigned int fh_claim_device(unsigned int handle)
 	r11 = FH_HCALL_TOKEN(FH_CLAIM_DEVICE);
 	r3 = handle;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
@@ -645,7 +645,7 @@ static inline unsigned int fh_partition_stop_dma(unsigned int handle)
 	r11 = FH_HCALL_TOKEN(FH_PARTITION_STOP_DMA);
 	r3 = handle;
 
-	__asm__ __volatile__ ("sc 1"
+	asm volatile("bl	epapr_hypercall_start"
 		: "+r" (r11), "+r" (r3)
 		: : EV_HCALL_CLOBBERS1
 	);
-- 
cgit v1.2.3


From 97c95059848358f1577f471ec47cf68690f996e4 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 2 Aug 2012 15:10:00 +0200
Subject: KVM: PPC: PR: Use generic tracepoint for guest exit

We want to have tracing information on guest exits for booke as well
as book3s. Since most information is identical, use a common trace point.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c |  2 +-
 arch/powerpc/kvm/booke.c     |  3 ++
 arch/powerpc/kvm/trace.h     | 79 ++++++++++++++++++++++++++++----------------
 3 files changed, 55 insertions(+), 29 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 05c28f59f77f..7f0fe6f9e297 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -549,7 +549,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* We get here with MSR.EE=0, so enable it to be a nice citizen */
 	__hard_irq_enable();
 
-	trace_kvm_book3s_exit(exit_nr, vcpu);
+	trace_kvm_exit(exit_nr, vcpu);
 	preempt_enable();
 	kvm_resched(vcpu);
 	switch (exit_nr) {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097c852b..7ce2ed07831f 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -39,6 +39,7 @@
 
 #include "timing.h"
 #include "booke.h"
+#include "trace.h"
 
 unsigned long kvmppc_booke_handlers;
 
@@ -677,6 +678,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	local_irq_enable();
 
+	trace_kvm_exit(exit_nr, vcpu);
+
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
 
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 877186b7b1c3..9fab6eddc7e4 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,6 +31,57 @@ TRACE_EVENT(kvm_ppc_instr,
 		  __entry->inst, __entry->pc, __entry->emulate)
 );
 
+TRACE_EVENT(kvm_exit,
+	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
+	TP_ARGS(exit_nr, vcpu),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	exit_nr		)
+		__field(	unsigned long,	pc		)
+		__field(	unsigned long,	msr		)
+		__field(	unsigned long,	dar		)
+#ifdef CONFIG_KVM_BOOK3S_PR
+		__field(	unsigned long,	srr1		)
+#endif
+		__field(	unsigned long,	last_inst	)
+	),
+
+	TP_fast_assign(
+#ifdef CONFIG_KVM_BOOK3S_PR
+		struct kvmppc_book3s_shadow_vcpu *svcpu;
+#endif
+		__entry->exit_nr	= exit_nr;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->dar		= kvmppc_get_fault_dar(vcpu);
+		__entry->msr		= vcpu->arch.shared->msr;
+#ifdef CONFIG_KVM_BOOK3S_PR
+		svcpu = svcpu_get(vcpu);
+		__entry->srr1		= svcpu->shadow_srr1;
+		svcpu_put(svcpu);
+#endif
+		__entry->last_inst	= vcpu->arch.last_inst;
+	),
+
+	TP_printk("exit=0x%x"
+		" | pc=0x%lx"
+		" | msr=0x%lx"
+		" | dar=0x%lx"
+#ifdef CONFIG_KVM_BOOK3S_PR
+		" | srr1=0x%lx"
+#endif
+		" | last_inst=0x%lx"
+		,
+		__entry->exit_nr,
+		__entry->pc,
+		__entry->msr,
+		__entry->dar,
+#ifdef CONFIG_KVM_BOOK3S_PR
+		__entry->srr1,
+#endif
+		__entry->last_inst
+		)
+);
+
 TRACE_EVENT(kvm_stlb_inval,
 	TP_PROTO(unsigned int stlb_index),
 	TP_ARGS(stlb_index),
@@ -105,34 +156,6 @@ TRACE_EVENT(kvm_gtlb_write,
 
 #ifdef CONFIG_KVM_BOOK3S_PR
 
-TRACE_EVENT(kvm_book3s_exit,
-	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
-	TP_ARGS(exit_nr, vcpu),
-
-	TP_STRUCT__entry(
-		__field(	unsigned int,	exit_nr		)
-		__field(	unsigned long,	pc		)
-		__field(	unsigned long,	msr		)
-		__field(	unsigned long,	dar		)
-		__field(	unsigned long,	srr1		)
-	),
-
-	TP_fast_assign(
-		struct kvmppc_book3s_shadow_vcpu *svcpu;
-		__entry->exit_nr	= exit_nr;
-		__entry->pc		= kvmppc_get_pc(vcpu);
-		__entry->dar		= kvmppc_get_fault_dar(vcpu);
-		__entry->msr		= vcpu->arch.shared->msr;
-		svcpu = svcpu_get(vcpu);
-		__entry->srr1		= svcpu->shadow_srr1;
-		svcpu_put(svcpu);
-	),
-
-	TP_printk("exit=0x%x | pc=0x%lx | msr=0x%lx | dar=0x%lx | srr1=0x%lx",
-		  __entry->exit_nr, __entry->pc, __entry->msr, __entry->dar,
-		  __entry->srr1)
-);
-
 TRACE_EVENT(kvm_book3s_reenter,
 	TP_PROTO(int r, struct kvm_vcpu *vcpu),
 	TP_ARGS(r, vcpu),
-- 
cgit v1.2.3


From f4800b1f4d23156e9080a08d6114e5d8bb767964 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 7 Aug 2012 10:24:14 +0200
Subject: KVM: PPC: Expose SYNC cap based on mmu notifiers

Semantically, the "SYNC" cap means that we have mmu notifiers available.
Express this in our #ifdef'ery around the feature, so that we can be sure
we don't miss out on ppc targets when they get their implementation.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/powerpc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index dbf56e173c25..45fe433316ea 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -264,10 +264,16 @@ int kvm_dev_ioctl_check_extension(long ext)
 		if (cpu_has_feature(CPU_FTR_ARCH_201))
 			r = 2;
 		break;
+#endif
 	case KVM_CAP_SYNC_MMU:
+#ifdef CONFIG_KVM_BOOK3S_64_HV
 		r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
-		break;
+#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+		r = 1;
+#else
+		r = 0;
 #endif
+		break;
 	case KVM_CAP_NR_VCPUS:
 		/*
 		 * Recommending a number of CPUs is somewhat arbitrary; we
-- 
cgit v1.2.3


From cf1c5ca47319d9eb49166859921822fea354d4b3 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 1 Aug 2012 12:56:51 +0200
Subject: KVM: PPC: BookE: Expose remote TLB flushes in debugfs

We're already counting remote TLB flushes in a variable, but don't export
it to user space yet. Do so, so we know what's going on.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7ce2ed07831f..1d4ce9a80f55 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "doorbell", VCPU_STAT(dbell_exits) },
 	{ "guest doorbell", VCPU_STAT(gdbell_exits) },
+	{ "remote_tlb_flush", VM_STAT(remote_tlb_flush) },
 	{ NULL }
 };
 
-- 
cgit v1.2.3


From 2bb890f5ee79c85b9d3b7df37ecb639d8d4b961e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 2 Aug 2012 13:38:49 +0200
Subject: KVM: PPC: E500: Fix clear_tlb_refs

Our mapping code assumes that TLB0 entries are always mapped. However, after
calling clear_tlb_refs() this is no longer the case.

Map them dynamically if we find an entry unmapped in TLB0.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500_tlb.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index ff38b664195d..b56b6e14df6c 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1039,8 +1039,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 		sesel = 0; /* unused */
 		priv = &vcpu_e500->gtlb_priv[tlbsel][esel];
 
-		kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
-					&priv->ref, eaddr, &stlbe);
+		/* Only triggers after clear_tlb_refs */
+		if (unlikely(!(priv->ref.flags & E500_TLB_VALID)))
+			kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe);
+		else
+			kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K,
+						&priv->ref, eaddr, &stlbe);
 		break;
 
 	case 1: {
-- 
cgit v1.2.3


From 1340f3e8871b9f35b39c33d0140383c6c6c1f005 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 6 Aug 2012 00:04:14 +0000
Subject: KVM: PPC: Quieten message about allocating linear regions

This is printed once for every RMA or HPT region that get
preallocated.  If one preallocates hundreds of such regions
(in order to run hundreds of KVM guests), that gets rather
painful, so make it a bit quieter.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index fb4eac290fef..ec0a9e5de100 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -157,8 +157,8 @@ static void __init kvm_linear_init_one(ulong size, int count, int type)
 	linear_info = alloc_bootmem(count * sizeof(struct kvmppc_linear_info));
 	for (i = 0; i < count; ++i) {
 		linear = alloc_bootmem_align(size, size);
-		pr_info("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
-			size >> 20);
+		pr_debug("Allocated KVM %s at %p (%ld MB)\n", typestr, linear,
+			 size >> 20);
 		linear_info[i].base_virt = linear;
 		linear_info[i].base_pfn = __pa(linear) >> PAGE_SHIFT;
 		linear_info[i].npages = npages;
-- 
cgit v1.2.3


From 8043e494da644ec174f7df0b67f88ccf8777a1ce Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Fri, 10 Aug 2012 12:21:21 +0000
Subject: powerpc/epapr: export epapr_hypercall_start

This fixes breakage introduced by the following commit:

  commit 6d2d82627f4f1e96a33664ace494fa363e0495cb
  Author: Liu Yu-B13201 <Yu.Liu@freescale.com>
  Date:   Tue Jul 3 05:48:56 2012 +0000

    PPC: Don't use hardcoded opcode for ePAPR hcall invocation

when a driver that uses ePAPR hypercalls is built as a module.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kernel/ppc_ksyms.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 3e4031581c65..e597dde124e8 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -43,6 +43,7 @@
 #include <asm/dcr.h>
 #include <asm/ftrace.h>
 #include <asm/switch_to.h>
+#include <asm/epapr_hcalls.h>
 
 #ifdef CONFIG_PPC32
 extern void transfer_to_handler(void);
@@ -192,3 +193,7 @@ EXPORT_SYMBOL(__arch_hweight64);
 #ifdef CONFIG_PPC_BOOK3S_64
 EXPORT_SYMBOL_GPL(mmu_psize_defs);
 #endif
+
+#ifdef CONFIG_EPAPR_PARAVIRT
+EXPORT_SYMBOL(epapr_hypercall_start);
+#endif
-- 
cgit v1.2.3


From 4ffc6356ec690f77f65b7b78e0047a3fe8316371 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 8 Aug 2012 20:31:13 +0200
Subject: KVM: PPC: BookE: Add check_requests helper function

We need a central place to check for pending requests in. Add one that
only does the timer check we already do in a different place.

Later, this central function can be extended by more checks.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1d4ce9a80f55..bcf87fe89179 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -419,13 +419,6 @@ static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
 	unsigned long *pending = &vcpu->arch.pending_exceptions;
 	unsigned int priority;
 
-	if (vcpu->requests) {
-		if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu)) {
-			smp_mb();
-			update_timer_ints(vcpu);
-		}
-	}
-
 	priority = __ffs(*pending);
 	while (priority < BOOKE_IRQPRIO_MAX) {
 		if (kvmppc_booke_irqprio_deliver(vcpu, priority))
@@ -461,6 +454,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->requests) {
+		if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
+			update_timer_ints(vcpu);
+	}
+}
+
 /*
  * Common checks before entering the guest world.  Call with interrupts
  * disabled.
@@ -485,6 +486,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			break;
 		}
 
+		smp_mb();
+		if (vcpu->requests) {
+			/* Make sure we process requests preemptable */
+			local_irq_enable();
+			kvmppc_check_requests(vcpu);
+			local_irq_disable();
+			continue;
+		}
+
 		if (kvmppc_core_prepare_to_enter(vcpu)) {
 			/* interrupts got enabled in between, so we
 			   are back at square 1 */
-- 
cgit v1.2.3


From d69c6436443c05a64452054f51a79316297755f4 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 8 Aug 2012 20:44:20 +0200
Subject: KVM: PPC: BookE: Add support for vcpu->mode

Generic KVM code might want to know whether we are inside guest context
or outside. It also wants to be able to push us out of guest context.

Add support to the BookE code for the generic vcpu->mode field that describes
the above states.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index bcf87fe89179..70a86c0a9d85 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -501,6 +501,15 @@ static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			continue;
 		}
 
+		if (vcpu->mode == EXITING_GUEST_MODE) {
+			r = 1;
+			break;
+		}
+
+		/* Going into guest context! Yay! */
+		vcpu->mode = IN_GUEST_MODE;
+		smp_wmb();
+
 		break;
 	}
 
@@ -572,6 +581,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvm_guest_exit();
 
 out:
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+	smp_wmb();
 	local_irq_enable();
 	return ret;
 }
-- 
cgit v1.2.3


From 862d31f788f9a249f7656d02d8d4006e306108ce Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 31 Jul 2012 00:19:50 +0200
Subject: KVM: PPC: E500: Implement MMU notifiers

The e500 target has lived without mmu notifiers ever since it got
introduced, but fails for the user space check on them with hugetlbfs.

So in order to get that one working, implement mmu notifiers in a
reasonably dumb fashion and be happy. On embedded hardware, we almost
never end up with mmu notifier calls, since most people don't overcommit.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |  3 +-
 arch/powerpc/include/asm/kvm_ppc.h  |  1 +
 arch/powerpc/kvm/Kconfig            |  2 ++
 arch/powerpc/kvm/booke.c            |  6 ++++
 arch/powerpc/kvm/e500_tlb.c         | 60 +++++++++++++++++++++++++++++++++----
 5 files changed, 65 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 28e8f5e5c63e..cea9d3aab71c 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,7 +46,8 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \
+    defined(CONFIG_KVM_E500MC)
 #include <linux/mmu_notifier.h>
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e006f0bdea95..88de3146838b 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -104,6 +104,7 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
                                          struct kvm_interrupt *irq);
+extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                                   unsigned int op, int *advance);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index f4dacb9c57fa..40cad8c8bd0e 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -123,6 +123,7 @@ config KVM_E500V2
 	depends on EXPERIMENTAL && E500 && !PPC_E500MC
 	select KVM
 	select KVM_MMIO
+	select MMU_NOTIFIER
 	---help---
 	  Support running unmodified E500 guest kernels in virtual machines on
 	  E500v2 host processors.
@@ -138,6 +139,7 @@ config KVM_E500MC
 	select KVM
 	select KVM_MMIO
 	select KVM_BOOKE_HV
+	select MMU_NOTIFIER
 	---help---
 	  Support running unmodified E500MC/E5500 (32-bit) guest kernels in
 	  virtual machines on E500MC/E5500 host processors.
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 70a86c0a9d85..52f6cbb4923e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -459,6 +459,10 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
 	if (vcpu->requests) {
 		if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
 			update_timer_ints(vcpu);
+#if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
+		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+			kvmppc_core_flush_tlb(vcpu);
+#endif
 	}
 }
 
@@ -579,6 +583,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #endif
 
 	kvm_guest_exit();
+	vcpu->mode = OUTSIDE_GUEST_MODE;
+	smp_wmb();
 
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index b56b6e14df6c..de8ea29409f2 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -303,18 +303,15 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
 	ref->pfn = pfn;
 	ref->flags = E500_TLB_VALID;
 
-	if (tlbe_is_writable(gtlbe))
+	if (tlbe_is_writable(gtlbe)) {
 		ref->flags |= E500_TLB_DIRTY;
+		kvm_set_pfn_dirty(pfn);
+	}
 }
 
 static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
 {
 	if (ref->flags & E500_TLB_VALID) {
-		if (ref->flags & E500_TLB_DIRTY)
-			kvm_release_pfn_dirty(ref->pfn);
-		else
-			kvm_release_pfn_clean(ref->pfn);
-
 		ref->flags = 0;
 	}
 }
@@ -357,6 +354,13 @@ static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500)
 	clear_tlb_privs(vcpu_e500);
 }
 
+void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
+	clear_tlb_refs(vcpu_e500);
+	clear_tlb1_bitmap(vcpu_e500);
+}
+
 static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu,
 		unsigned int eaddr, int as)
 {
@@ -541,6 +545,9 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 	/* Clear i-cache for new pages */
 	kvmppc_mmu_flush_icache(pfn);
+
+	/* Drop refcount on page, so that mmu notifiers can clear it */
+	kvm_release_pfn_clean(pfn);
 }
 
 /* XXX only map the one-one case, for now use TLB0 */
@@ -1064,6 +1071,47 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 	write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel);
 }
 
+/************* MMU Notifiers *************/
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	/*
+	 * Flush all shadow tlb entries everywhere. This is slow, but
+	 * we are 100% sure that we catch the to be unmapped page
+	 */
+	kvm_flush_remote_tlbs(kvm);
+
+	return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	/* kvm_unmap_hva flushes everything anyways */
+	kvm_unmap_hva(kvm, start);
+
+	return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	/* XXX could be more clever ;) */
+	return 0;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	/* XXX could be more clever ;) */
+	return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	/* The page will get remapped properly on its next fault */
+	kvm_unmap_hva(kvm, hva);
+}
+
+/*****************************************/
+
 static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
 {
 	int i;
-- 
cgit v1.2.3


From 6346046c3a69edc9149311473b940f3af7c93752 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 8 Aug 2012 00:44:52 +0200
Subject: KVM: PPC: BookE: Add some more trace points

Without trace points, debugging what exactly is going on inside guest
code can be very tricky. Add a few more trace points at places that
hopefully tell us more when things go wrong.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c    |  3 ++
 arch/powerpc/kvm/e500_tlb.c |  3 ++
 arch/powerpc/kvm/trace.h    | 71 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 52f6cbb4923e..00bcc57428c7 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -143,6 +143,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
                                        unsigned int priority)
 {
+	trace_kvm_booke_queue_irqprio(vcpu, priority);
 	set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
@@ -457,6 +458,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->requests) {
+		trace_kvm_check_requests(vcpu);
+
 		if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
 			update_timer_ints(vcpu);
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index de8ea29409f2..1af6fab58995 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -312,6 +312,7 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
 static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
 {
 	if (ref->flags & E500_TLB_VALID) {
+		trace_kvm_booke206_ref_release(ref->pfn, ref->flags);
 		ref->flags = 0;
 	}
 }
@@ -1075,6 +1076,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
+	trace_kvm_unmap_hva(hva);
+
 	/*
 	 * Flush all shadow tlb entries everywhere. This is slow, but
 	 * we are 100% sure that we catch the to be unmapped page
diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index 9fab6eddc7e4..cb2780a42fd8 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -82,6 +82,21 @@ TRACE_EVENT(kvm_exit,
 		)
 );
 
+TRACE_EVENT(kvm_unmap_hva,
+	TP_PROTO(unsigned long hva),
+	TP_ARGS(hva),
+
+	TP_STRUCT__entry(
+		__field(	unsigned long,	hva		)
+	),
+
+	TP_fast_assign(
+		__entry->hva		= hva;
+	),
+
+	TP_printk("unmap hva 0x%lx\n", __entry->hva)
+);
+
 TRACE_EVENT(kvm_stlb_inval,
 	TP_PROTO(unsigned int stlb_index),
 	TP_ARGS(stlb_index),
@@ -149,6 +164,24 @@ TRACE_EVENT(kvm_gtlb_write,
 		__entry->word1, __entry->word2)
 );
 
+TRACE_EVENT(kvm_check_requests,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(	__u32,	cpu_nr		)
+		__field(	__u32,	requests	)
+	),
+
+	TP_fast_assign(
+		__entry->cpu_nr		= vcpu->vcpu_id;
+		__entry->requests	= vcpu->requests;
+	),
+
+	TP_printk("vcpu=%x requests=%x",
+		__entry->cpu_nr, __entry->requests)
+);
+
 
 /*************************************************************************
  *                         Book3S trace points                           *
@@ -418,6 +451,44 @@ TRACE_EVENT(kvm_booke206_gtlb_write,
 		__entry->mas2, __entry->mas7_3)
 );
 
+TRACE_EVENT(kvm_booke206_ref_release,
+	TP_PROTO(__u64 pfn, __u32 flags),
+	TP_ARGS(pfn, flags),
+
+	TP_STRUCT__entry(
+		__field(	__u64,	pfn		)
+		__field(	__u32,	flags		)
+	),
+
+	TP_fast_assign(
+		__entry->pfn		= pfn;
+		__entry->flags		= flags;
+	),
+
+	TP_printk("pfn=%llx flags=%x",
+		__entry->pfn, __entry->flags)
+);
+
+TRACE_EVENT(kvm_booke_queue_irqprio,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
+	TP_ARGS(vcpu, priority),
+
+	TP_STRUCT__entry(
+		__field(	__u32,	cpu_nr		)
+		__field(	__u32,	priority		)
+		__field(	unsigned long,	pending		)
+	),
+
+	TP_fast_assign(
+		__entry->cpu_nr		= vcpu->vcpu_id;
+		__entry->priority	= priority;
+		__entry->pending	= vcpu->arch.pending_exceptions;
+	),
+
+	TP_printk("vcpu=%x prio=%x pending=%lx",
+		__entry->cpu_nr, __entry->priority, __entry->pending)
+);
+
 #endif
 
 #endif /* _TRACE_KVM_H */
-- 
cgit v1.2.3


From 2d8185d4ee22f425001d28d1817fc8d478e6fa02 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 10 Aug 2012 12:31:12 +0200
Subject: KVM: PPC: BookE: No duplicate request != 0 check

We only call kvmppc_check_requests() when vcpu->requests != 0, so drop
the redundant check in the function itself

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 00bcc57428c7..683cbd686d01 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -457,16 +457,14 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 
 static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
 {
-	if (vcpu->requests) {
-		trace_kvm_check_requests(vcpu);
+	trace_kvm_check_requests(vcpu);
 
-		if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
-			update_timer_ints(vcpu);
+	if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
+		update_timer_ints(vcpu);
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
-		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
-			kvmppc_core_flush_tlb(vcpu);
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+		kvmppc_core_flush_tlb(vcpu);
 #endif
-	}
 }
 
 /*
-- 
cgit v1.2.3


From 03d25c5bd5c3125055bd36f4813ddb817def19dd Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 10 Aug 2012 12:28:50 +0200
Subject: KVM: PPC: Use same kvmppc_prepare_to_enter code for booke and
 book3s_pr

We need to do the same things when preparing to enter a guest for booke and
book3s_pr cores. Fold the generic code into a generic function that both call.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h |  3 ++
 arch/powerpc/kvm/book3s_pr.c       | 22 ++++-----------
 arch/powerpc/kvm/booke.c           | 58 +-------------------------------------
 arch/powerpc/kvm/powerpc.c         | 57 +++++++++++++++++++++++++++++++++++++
 4 files changed, 67 insertions(+), 73 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 88de3146838b..59b7c87e47f7 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -112,6 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
 				     ulong val);
 extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
 				     ulong *val);
+extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_booke_init(void);
 extern void kvmppc_booke_exit(void);
@@ -150,6 +151,8 @@ extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
 extern int kvmppc_bookehv_init(void);
 extern void kvmppc_bookehv_exit(void);
 
+extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7f0fe6f9e297..cae2defd1462 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -88,6 +88,10 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 	kvmppc_giveup_ext(vcpu, MSR_VSX);
 }
 
+void kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+{
+}
+
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
 	ulong smsr = vcpu->arch.shared->msr;
@@ -815,19 +819,9 @@ program_interrupt:
 		 * again due to a host external interrupt.
 		 */
 		__hard_irq_disable();
-		if (signal_pending(current)) {
-			__hard_irq_enable();
-#ifdef EXIT_DEBUG
-			printk(KERN_EMERG "KVM: Going back to host\n");
-#endif
-			vcpu->stat.signal_exits++;
+		if (kvmppc_prepare_to_enter(vcpu)) {
 			run->exit_reason = KVM_EXIT_INTR;
 			r = -EINTR;
-		} else {
-			/* In case an interrupt came in that was triggered
-			 * from userspace (like DEC), we need to check what
-			 * to inject now! */
-			kvmppc_core_prepare_to_enter(vcpu);
 		}
 	}
 
@@ -1029,8 +1023,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		goto out;
 	}
 
-	kvmppc_core_prepare_to_enter(vcpu);
-
 	/*
 	 * Interrupts could be timers for the guest which we have to inject
 	 * again, so let's postpone them until we're in the guest and if we
@@ -1038,9 +1030,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * a host external interrupt.
 	 */
 	__hard_irq_disable();
-
-	/* No need to go into the guest when all we do is going out */
-	if (signal_pending(current)) {
+	if (kvmppc_prepare_to_enter(vcpu)) {
 		__hard_irq_enable();
 		kvm_run->exit_reason = KVM_EXIT_INTR;
 		ret = -EINTR;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 683cbd686d01..4652e0bfa781 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -455,10 +455,8 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 	return r;
 }
 
-static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
+void kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 {
-	trace_kvm_check_requests(vcpu);
-
 	if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
 		update_timer_ints(vcpu);
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
@@ -467,60 +465,6 @@ static void kvmppc_check_requests(struct kvm_vcpu *vcpu)
 #endif
 }
 
-/*
- * Common checks before entering the guest world.  Call with interrupts
- * disabled.
- *
- * returns !0 if a signal is pending and check_signal is true
- */
-static int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
-{
-	int r = 0;
-
-	WARN_ON_ONCE(!irqs_disabled());
-	while (true) {
-		if (need_resched()) {
-			local_irq_enable();
-			cond_resched();
-			local_irq_disable();
-			continue;
-		}
-
-		if (signal_pending(current)) {
-			r = 1;
-			break;
-		}
-
-		smp_mb();
-		if (vcpu->requests) {
-			/* Make sure we process requests preemptable */
-			local_irq_enable();
-			kvmppc_check_requests(vcpu);
-			local_irq_disable();
-			continue;
-		}
-
-		if (kvmppc_core_prepare_to_enter(vcpu)) {
-			/* interrupts got enabled in between, so we
-			   are back at square 1 */
-			continue;
-		}
-
-		if (vcpu->mode == EXITING_GUEST_MODE) {
-			r = 1;
-			break;
-		}
-
-		/* Going into guest context! Yay! */
-		vcpu->mode = IN_GUEST_MODE;
-		smp_wmb();
-
-		break;
-	}
-
-	return r;
-}
-
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 45fe433316ea..153a26abc915 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -47,6 +47,63 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+#ifndef CONFIG_KVM_BOOK3S_64_HV
+/*
+ * Common checks before entering the guest world.  Call with interrupts
+ * disabled.
+ *
+ * returns !0 if a signal is pending and check_signal is true
+ */
+int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
+{
+	int r = 0;
+
+	WARN_ON_ONCE(!irqs_disabled());
+	while (true) {
+		if (need_resched()) {
+			local_irq_enable();
+			cond_resched();
+			local_irq_disable();
+			continue;
+		}
+
+		if (signal_pending(current)) {
+			r = 1;
+			break;
+		}
+
+		smp_mb();
+		if (vcpu->requests) {
+			/* Make sure we process requests preemptable */
+			local_irq_enable();
+			trace_kvm_check_requests(vcpu);
+			kvmppc_core_check_requests(vcpu);
+			local_irq_disable();
+			continue;
+		}
+
+		if (kvmppc_core_prepare_to_enter(vcpu)) {
+			/* interrupts got enabled in between, so we
+			   are back at square 1 */
+			continue;
+		}
+
+		if (vcpu->mode == EXITING_GUEST_MODE) {
+			r = 1;
+			break;
+		}
+
+		/* Going into guest context! Yay! */
+		vcpu->mode = IN_GUEST_MODE;
+		smp_wmb();
+
+		break;
+	}
+
+	return r;
+}
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
 int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 {
 	int nr = kvmppc_get_gpr(vcpu, 11);
-- 
cgit v1.2.3


From 9b0cb3c808fef0d75d6f79ab9684246e6879f9c1 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 10 Aug 2012 13:23:55 +0200
Subject: KVM: PPC: Book3s: PR: Add (dumb) MMU Notifier support

Now that we have very simple MMU Notifier support for e500 in place,
also add the same simple support to book3s. It gets us one step closer
to actual fast support.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h   |  3 +--
 arch/powerpc/kvm/Kconfig              |  1 +
 arch/powerpc/kvm/book3s_32_mmu_host.c |  1 +
 arch/powerpc/kvm/book3s_64_mmu_host.c |  1 +
 arch/powerpc/kvm/book3s_mmu_hpte.c    |  5 ----
 arch/powerpc/kvm/book3s_pr.c          | 47 +++++++++++++++++++++++++++++++++++
 6 files changed, 51 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index cea9d3aab71c..4a5ec8f573c7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -46,8 +46,7 @@
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
 #endif
 
-#if defined(CONFIG_KVM_BOOK3S_64_HV) || defined(CONFIG_KVM_E500V2) || \
-    defined(CONFIG_KVM_E500MC)
+#if !defined(CONFIG_KVM_440)
 #include <linux/mmu_notifier.h>
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 40cad8c8bd0e..71f0cd9edf33 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -36,6 +36,7 @@ config KVM_BOOK3S_64_HANDLER
 config KVM_BOOK3S_PR
 	bool
 	select KVM_MMIO
+	select MMU_NOTIFIER
 
 config KVM_BOOK3S_32
 	tristate "KVM support for PowerPC book3s_32 processors"
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 837f13e7b6bf..9fac0101ffb9 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -254,6 +254,7 @@ next_pteg:
 
 	kvmppc_mmu_hpte_cache_map(vcpu, pte);
 
+	kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
 out:
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 0688b6b39585..6b2c80e49681 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -168,6 +168,7 @@ map_again:
 
 		kvmppc_mmu_hpte_cache_map(vcpu, pte);
 	}
+	kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
 
 out:
 	return r;
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index 41cb0017e757..2c86b0d63714 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -114,11 +114,6 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
 	hlist_del_init_rcu(&pte->list_vpte);
 	hlist_del_init_rcu(&pte->list_vpte_long);
 
-	if (pte->pte.may_write)
-		kvm_release_pfn_dirty(pte->pfn);
-	else
-		kvm_release_pfn_clean(pte->pfn);
-
 	spin_unlock(&vcpu3s->mmu_lock);
 
 	vcpu3s->hpte_cache_count--;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cae2defd1462..10f8217b8c38 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -90,8 +90,55 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 
 void kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 {
+	/* We misuse TLB_FLUSH to indicate that we want to clear
+	   all shadow cache entries */
+	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
+		kvmppc_mmu_pte_flush(vcpu, 0, 0);
 }
 
+/************* MMU Notifiers *************/
+
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
+{
+	trace_kvm_unmap_hva(hva);
+
+	/*
+	 * Flush all shadow tlb entries everywhere. This is slow, but
+	 * we are 100% sure that we catch the to be unmapped page
+	 */
+	kvm_flush_remote_tlbs(kvm);
+
+	return 0;
+}
+
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	/* kvm_unmap_hva flushes everything anyways */
+	kvm_unmap_hva(kvm, start);
+
+	return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	/* XXX could be more clever ;) */
+	return 0;
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+	/* XXX could be more clever ;) */
+	return 0;
+}
+
+void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
+{
+	/* The page will get remapped properly on its next fault */
+	kvm_unmap_hva(kvm, hva);
+}
+
+/*****************************************/
+
 static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
 	ulong smsr = vcpu->arch.shared->msr;
-- 
cgit v1.2.3


From e85ad380c6bf6dcd4776d313c81d16a6293db136 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 12 Aug 2012 11:13:25 +0200
Subject: KVM: PPC: BookE: Drop redundant vcpu->mode set

We only need to set vcpu->mode to outside once.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 4652e0bfa781..492c343f598e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -528,8 +528,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #endif
 
 	kvm_guest_exit();
-	vcpu->mode = OUTSIDE_GUEST_MODE;
-	smp_wmb();
 
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
-- 
cgit v1.2.3


From c63ddcb4540db95e5a4223cfa8cdbe6efbd5e386 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 12 Aug 2012 11:27:49 +0200
Subject: KVM: PPC: Book3S: PR: Only do resched check once per exit

Now that we use our generic exit helper, we can safely drop our previous
kvm_resched that we used to trigger at the beginning of the exit handler
function.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 10f8217b8c38..2c268a15b20f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -602,7 +602,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	trace_kvm_exit(exit_nr, vcpu);
 	preempt_enable();
-	kvm_resched(vcpu);
+
 	switch (exit_nr) {
 	case BOOK3S_INTERRUPT_INST_STORAGE:
 	{
-- 
cgit v1.2.3


From 706fb730cb4f9db2e3de33391475dd0616c2c935 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 12 Aug 2012 11:29:09 +0200
Subject: KVM: PPC: Exit guest context while handling exit

The x86 implementation of KVM accounts for host time while processing
guest exits. Do the same for us.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c | 2 ++
 arch/powerpc/kvm/booke.c     | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 2c268a15b20f..b4ae11ec068f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -601,6 +601,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	__hard_irq_enable();
 
 	trace_kvm_exit(exit_nr, vcpu);
+	kvm_guest_exit();
 	preempt_enable();
 
 	switch (exit_nr) {
@@ -872,6 +873,7 @@ program_interrupt:
 		}
 	}
 
+	kvm_guest_enter();
 	trace_kvm_book3s_reenter(r, vcpu);
 
 	return r;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 492c343f598e..887c7cc02146 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -650,6 +650,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	local_irq_enable();
 
 	trace_kvm_exit(exit_nr, vcpu);
+	kvm_guest_exit();
 
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
@@ -952,6 +953,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 	}
 
+	kvm_guest_enter();
+
 	return r;
 }
 
-- 
cgit v1.2.3


From 0652eaaebea0995b3236e51dec727d62264f4248 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 12 Aug 2012 11:34:21 +0200
Subject: KVM: PPC: Book3S: PR: Indicate we're out of guest mode

When going out of guest mode, indicate that we are in vcpu->mode. That way
requests from other CPUs don't needlessly need to kick us to process them,
because it'll just happen next time we enter the guest.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b4ae11ec068f..9430a362e5a3 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1152,6 +1152,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #endif
 
 out:
+	vcpu->mode = OUTSIDE_GUEST_MODE;
 	preempt_enable();
 	return ret;
 }
-- 
cgit v1.2.3


From 24afa37b9c8f035d2fe2028e4824bc4e49bafe73 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 12 Aug 2012 12:42:30 +0200
Subject: KVM: PPC: Consistentify vcpu exit path

When getting out of __vcpu_run, let's be consistent about the state we
return in. We want to always

  * have IRQs enabled
  * have called kvm_guest_exit before

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c |  8 ++++++--
 arch/powerpc/kvm/booke.c     | 13 ++++++++-----
 2 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 9430a362e5a3..3dec346c4b93 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -868,12 +868,15 @@ program_interrupt:
 		 */
 		__hard_irq_disable();
 		if (kvmppc_prepare_to_enter(vcpu)) {
+			/* local_irq_enable(); */
 			run->exit_reason = KVM_EXIT_INTR;
 			r = -EINTR;
+		} else {
+			/* Going back to guest */
+			kvm_guest_enter();
 		}
 	}
 
-	kvm_guest_enter();
 	trace_kvm_book3s_reenter(r, vcpu);
 
 	return r;
@@ -1123,7 +1126,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
-	kvm_guest_exit();
+	/* No need for kvm_guest_exit. It's done in handle_exit.
+	   We also get here with interrupts enabled. */
 
 	current->thread.regs->msr = ext_msr;
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 887c7cc02146..aae535f6d9de 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -481,6 +481,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	local_irq_disable();
 	if (kvmppc_prepare_to_enter(vcpu)) {
+		local_irq_enable();
 		kvm_run->exit_reason = KVM_EXIT_INTR;
 		ret = -EINTR;
 		goto out;
@@ -512,6 +513,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
+	/* No need for kvm_guest_exit. It's done in handle_exit.
+	   We also get here with interrupts enabled. */
+
 #ifdef CONFIG_PPC_FPU
 	kvmppc_save_guest_fp(vcpu);
 
@@ -527,12 +531,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	current->thread.fpexc_mode = fpexc_mode;
 #endif
 
-	kvm_guest_exit();
-
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
-	local_irq_enable();
 	return ret;
 }
 
@@ -947,14 +948,16 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (!(r & RESUME_HOST)) {
 		local_irq_disable();
 		if (kvmppc_prepare_to_enter(vcpu)) {
+			local_irq_enable();
 			run->exit_reason = KVM_EXIT_INTR;
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
 			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+		} else {
+			/* Going back to guest */
+			kvm_guest_enter();
 		}
 	}
 
-	kvm_guest_enter();
-
 	return r;
 }
 
-- 
cgit v1.2.3


From bd2be6836ee493d41fe42367a2b129aa771185c1 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 13 Aug 2012 01:04:19 +0200
Subject: KVM: PPC: Book3S: PR: Rework irq disabling

Today, we disable preemption while inside guest context, because we need
to expose to the world that we are not in a preemptible context. However,
during that time we already have interrupts disabled, which would indicate
that we are in a non-preemptible context.

The reason the checks for irqs_disabled() fail for us though is that we
manually control hard IRQs and ignore all the lazy EE framework. Let's
stop doing that. Instead, let's always use lazy EE to indicate when we
want to disable IRQs, but do a special final switch that gets us into
EE disabled, but soft enabled state. That way when we get back out of
guest state, we are immediately ready to process interrupts.

This simplifies the code drastically and reduces the time that we appear
as preempt disabled.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h   | 10 ++++++++++
 arch/powerpc/kvm/book3s_pr.c         | 21 +++++++--------------
 arch/powerpc/kvm/book3s_rmhandlers.S | 15 ++++++++-------
 arch/powerpc/kvm/booke.c             |  2 ++
 arch/powerpc/kvm/powerpc.c           | 14 ++++++++++++++
 5 files changed, 41 insertions(+), 21 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 59b7c87e47f7..545936428bf6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -234,5 +234,15 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
 	}
 }
 
+/* Please call after prepare_to_enter. This function puts the lazy ee state
+   back to normal mode, without actually enabling interrupts. */
+static inline void kvmppc_lazy_ee_enable(void)
+{
+#ifdef CONFIG_PPC64
+	/* Only need to enable IRQs by hard enabling them after this */
+	local_paca->irq_happened = 0;
+	local_paca->soft_enabled = 1;
+#endif
+}
 
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3dec346c4b93..e737db8a5ca7 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -52,8 +52,6 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 #define MSR_USER32 MSR_USER
 #define MSR_USER64 MSR_USER
 #define HW_PAGE_SIZE PAGE_SIZE
-#define __hard_irq_disable local_irq_disable
-#define __hard_irq_enable local_irq_enable
 #endif
 
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -597,12 +595,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
 
-	/* We get here with MSR.EE=0, so enable it to be a nice citizen */
-	__hard_irq_enable();
+	/* We get here with MSR.EE=1 */
 
 	trace_kvm_exit(exit_nr, vcpu);
 	kvm_guest_exit();
-	preempt_enable();
 
 	switch (exit_nr) {
 	case BOOK3S_INTERRUPT_INST_STORAGE:
@@ -854,7 +850,6 @@ program_interrupt:
 	}
 	}
 
-	preempt_disable();
 	if (!(r & RESUME_HOST)) {
 		/* To avoid clobbering exit_reason, only check for signals if
 		 * we aren't already exiting to userspace for some other
@@ -866,14 +861,15 @@ program_interrupt:
 		 * and if we really did time things so badly, then we just exit
 		 * again due to a host external interrupt.
 		 */
-		__hard_irq_disable();
+		local_irq_disable();
 		if (kvmppc_prepare_to_enter(vcpu)) {
-			/* local_irq_enable(); */
+			local_irq_enable();
 			run->exit_reason = KVM_EXIT_INTR;
 			r = -EINTR;
 		} else {
 			/* Going back to guest */
 			kvm_guest_enter();
+			kvmppc_lazy_ee_enable();
 		}
 	}
 
@@ -1066,8 +1062,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 #endif
 	ulong ext_msr;
 
-	preempt_disable();
-
 	/* Check if we can run the vcpu at all */
 	if (!vcpu->arch.sane) {
 		kvm_run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -1081,9 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * really did time things so badly, then we just exit again due to
 	 * a host external interrupt.
 	 */
-	__hard_irq_disable();
+	local_irq_disable();
 	if (kvmppc_prepare_to_enter(vcpu)) {
-		__hard_irq_enable();
+		local_irq_enable();
 		kvm_run->exit_reason = KVM_EXIT_INTR;
 		ret = -EINTR;
 		goto out;
@@ -1122,7 +1116,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	if (vcpu->arch.shared->msr & MSR_FP)
 		kvmppc_handle_ext(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, MSR_FP);
 
-	kvm_guest_enter();
+	kvmppc_lazy_ee_enable();
 
 	ret = __kvmppc_vcpu_run(kvm_run, vcpu);
 
@@ -1157,7 +1151,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
-	preempt_enable();
 	return ret;
 }
 
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 9ecf6e35cd8d..b2f8258b545a 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -170,20 +170,21 @@ kvmppc_handler_skip_ins:
  * Call kvmppc_handler_trampoline_enter in real mode
  *
  * On entry, r4 contains the guest shadow MSR
+ * MSR.EE has to be 0 when calling this function
  */
 _GLOBAL(kvmppc_entry_trampoline)
 	mfmsr	r5
 	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
 	toreal(r7)
 
-	li	r9, MSR_RI
-	ori	r9, r9, MSR_EE
-	andc	r9, r5, r9	/* Clear EE and RI in MSR value */
 	li	r6, MSR_IR | MSR_DR
-	ori	r6, r6, MSR_EE
-	andc	r6, r5, r6	/* Clear EE, DR and IR in MSR value */
-	MTMSR_EERI(r9)		/* Clear EE and RI in MSR */
-	mtsrr0	r7		/* before we set srr0/1 */
+	andc	r6, r5, r6	/* Clear DR and IR in MSR value */
+	/*
+	 * Set EE in HOST_MSR so that it's enabled when we get into our
+	 * C exit handler function
+	 */
+	ori	r5, r5, MSR_EE
+	mtsrr0	r7
 	mtsrr1	r6
 	RFI
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index aae535f6d9de..2bd190c488ef 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -486,6 +486,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		ret = -EINTR;
 		goto out;
 	}
+	kvmppc_lazy_ee_enable();
 
 	kvm_guest_enter();
 
@@ -955,6 +956,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		} else {
 			/* Going back to guest */
 			kvm_guest_enter();
+			kvmppc_lazy_ee_enable();
 		}
 	}
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 153a26abc915..266549979e9f 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -30,6 +30,7 @@
 #include <asm/kvm_ppc.h>
 #include <asm/tlbflush.h>
 #include <asm/cputhreads.h>
+#include <asm/irqflags.h>
 #include "timing.h"
 #include "../mm/mmu_decl.h"
 
@@ -93,6 +94,19 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			break;
 		}
 
+#ifdef CONFIG_PPC64
+		/* lazy EE magic */
+		hard_irq_disable();
+		if (lazy_irq_pending()) {
+			/* Got an interrupt in between, try again */
+			local_irq_enable();
+			local_irq_disable();
+			continue;
+		}
+
+		trace_hardirqs_on();
+#endif
+
 		/* Going into guest context! Yay! */
 		vcpu->mode = IN_GUEST_MODE;
 		smp_wmb();
-- 
cgit v1.2.3


From 3766a4c693358cff33441310413e3776dbbf8ef0 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 13 Aug 2012 01:24:01 +0200
Subject: KVM: PPC: Move kvm_guest_enter call into generic code

We need to call kvm_guest_enter in booke and book3s, so move its
call to generic code.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c | 2 --
 arch/powerpc/kvm/booke.c     | 2 --
 arch/powerpc/kvm/powerpc.c   | 3 +++
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e737db8a5ca7..1ff0d6ccc589 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -867,8 +867,6 @@ program_interrupt:
 			run->exit_reason = KVM_EXIT_INTR;
 			r = -EINTR;
 		} else {
-			/* Going back to guest */
-			kvm_guest_enter();
 			kvmppc_lazy_ee_enable();
 		}
 	}
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 2bd190c488ef..5e8dc1909130 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -954,8 +954,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
 			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
 		} else {
-			/* Going back to guest */
-			kvm_guest_enter();
 			kvmppc_lazy_ee_enable();
 		}
 	}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 266549979e9f..6646574bf930 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -101,12 +101,15 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			/* Got an interrupt in between, try again */
 			local_irq_enable();
 			local_irq_disable();
+			kvm_guest_exit();
 			continue;
 		}
 
 		trace_hardirqs_on();
 #endif
 
+		kvm_guest_enter();
+
 		/* Going into guest context! Yay! */
 		vcpu->mode = IN_GUEST_MODE;
 		smp_wmb();
-- 
cgit v1.2.3


From 206c2ed7f1ea55222bde2954ee3d65c2e9cfb750 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 13 Aug 2012 12:43:33 +0200
Subject: KVM: PPC: Ignore EXITING_GUEST_MODE mode

We don't need to do anything when mode is EXITING_GUEST_MODE, because
we essentially are outside of guest mode and did everything it asked
us to do by the time we check it.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/powerpc.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6646574bf930..dc86371b9953 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -89,11 +89,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			continue;
 		}
 
-		if (vcpu->mode == EXITING_GUEST_MODE) {
-			r = 1;
-			break;
-		}
-
 #ifdef CONFIG_PPC64
 		/* lazy EE magic */
 		hard_irq_disable();
-- 
cgit v1.2.3


From 7ee788556bf395a8ef413bea33494df29a3409e0 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 13 Aug 2012 12:44:41 +0200
Subject: KVM: PPC: Add return value in prepare_to_enter

Our prepare_to_enter helper wants to be able to return in more circumstances
to the host than only when an interrupt is pending. Broaden the interface a
bit and move even more generic code to the generic helper.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c | 12 ++++++------
 arch/powerpc/kvm/booke.c     | 16 ++++++++--------
 arch/powerpc/kvm/powerpc.c   | 11 ++++++++---
 3 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 1ff0d6ccc589..71fa0f1873b3 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -589,6 +589,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned int exit_nr)
 {
 	int r = RESUME_HOST;
+	int s;
 
 	vcpu->stat.sum_exits++;
 
@@ -862,10 +863,10 @@ program_interrupt:
 		 * again due to a host external interrupt.
 		 */
 		local_irq_disable();
-		if (kvmppc_prepare_to_enter(vcpu)) {
+		s = kvmppc_prepare_to_enter(vcpu);
+		if (s <= 0) {
 			local_irq_enable();
-			run->exit_reason = KVM_EXIT_INTR;
-			r = -EINTR;
+			r = s;
 		} else {
 			kvmppc_lazy_ee_enable();
 		}
@@ -1074,10 +1075,9 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * a host external interrupt.
 	 */
 	local_irq_disable();
-	if (kvmppc_prepare_to_enter(vcpu)) {
+	ret = kvmppc_prepare_to_enter(vcpu);
+	if (ret <= 0) {
 		local_irq_enable();
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		ret = -EINTR;
 		goto out;
 	}
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 5e8dc1909130..1917802463f5 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -467,7 +467,7 @@ void kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
-	int ret;
+	int ret, s;
 #ifdef CONFIG_PPC_FPU
 	unsigned int fpscr;
 	int fpexc_mode;
@@ -480,10 +480,10 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	}
 
 	local_irq_disable();
-	if (kvmppc_prepare_to_enter(vcpu)) {
+	s = kvmppc_prepare_to_enter(vcpu);
+	if (s <= 0) {
 		local_irq_enable();
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		ret = -EINTR;
+		ret = s;
 		goto out;
 	}
 	kvmppc_lazy_ee_enable();
@@ -642,6 +642,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
                        unsigned int exit_nr)
 {
 	int r = RESUME_HOST;
+	int s;
 
 	/* update before a new last_exit_type is rewritten */
 	kvmppc_update_timing_stats(vcpu);
@@ -948,11 +949,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 */
 	if (!(r & RESUME_HOST)) {
 		local_irq_disable();
-		if (kvmppc_prepare_to_enter(vcpu)) {
+		s = kvmppc_prepare_to_enter(vcpu);
+		if (s <= 0) {
 			local_irq_enable();
-			run->exit_reason = KVM_EXIT_INTR;
-			r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
-			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+			r = (s << 2) | RESUME_HOST | (r & RESUME_FLAG_NV);
 		} else {
 			kvmppc_lazy_ee_enable();
 		}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index dc86371b9953..0e2a98ab6a77 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -53,11 +53,14 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
  * Common checks before entering the guest world.  Call with interrupts
  * disabled.
  *
- * returns !0 if a signal is pending and check_signal is true
+ * returns:
+ *
+ * == 1 if we're ready to go into guest state
+ * <= 0 if we need to go back to the host with return value
  */
 int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 {
-	int r = 0;
+	int r = 1;
 
 	WARN_ON_ONCE(!irqs_disabled());
 	while (true) {
@@ -69,7 +72,9 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 		}
 
 		if (signal_pending(current)) {
-			r = 1;
+			kvmppc_account_exit(vcpu, SIGNAL_EXITS);
+			vcpu->run->exit_reason = KVM_EXIT_INTR;
+			r = -EINTR;
 			break;
 		}
 
-- 
cgit v1.2.3


From 7c973a2ebb8fb9c8ee2ae9647f9ad7b0ad58a3e6 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 13 Aug 2012 12:50:35 +0200
Subject: KVM: PPC: Add return value to core_check_requests

Requests may want to tell us that we need to go back into host state,
so add a return value for the checks.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h | 2 +-
 arch/powerpc/kvm/book3s_pr.c       | 6 +++++-
 arch/powerpc/kvm/booke.c           | 6 +++++-
 arch/powerpc/kvm/powerpc.c         | 6 ++++--
 4 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 545936428bf6..3dfc437fb9d9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -112,7 +112,7 @@ extern int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn,
 				     ulong val);
 extern int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn,
 				     ulong *val);
-extern void kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
+extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 
 extern int kvmppc_booke_init(void);
 extern void kvmppc_booke_exit(void);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 71fa0f1873b3..b3c584f94cb3 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -86,12 +86,16 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 	kvmppc_giveup_ext(vcpu, MSR_VSX);
 }
 
-void kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 {
+	int r = 1; /* Indicate we want to get back into the guest */
+
 	/* We misuse TLB_FLUSH to indicate that we want to clear
 	   all shadow cache entries */
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
 		kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+	return r;
 }
 
 /************* MMU Notifiers *************/
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 1917802463f5..c36493087dbf 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -455,14 +455,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 	return r;
 }
 
-void kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
+int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 {
+	int r = 1; /* Indicate we want to get back into the guest */
+
 	if (kvm_check_request(KVM_REQ_PENDING_TIMER, vcpu))
 		update_timer_ints(vcpu);
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
 		kvmppc_core_flush_tlb(vcpu);
 #endif
+
+	return r;
 }
 
 int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0e2a98ab6a77..54b12af577d0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -83,9 +83,11 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			/* Make sure we process requests preemptable */
 			local_irq_enable();
 			trace_kvm_check_requests(vcpu);
-			kvmppc_core_check_requests(vcpu);
+			r = kvmppc_core_check_requests(vcpu);
 			local_irq_disable();
-			continue;
+			if (r > 0)
+				continue;
+			break;
 		}
 
 		if (kvmppc_core_prepare_to_enter(vcpu)) {
-- 
cgit v1.2.3


From f61c94bb99ca4253ac5dd57750e1af209a4beb7a Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Wed, 8 Aug 2012 20:38:19 +0000
Subject: KVM: PPC: booke: Add watchdog emulation

This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.

Signed-off-by: Liu Yu <yu.liu@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
[bharat.bhushan@freescale.com: reworked patch]
Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
[agraf: adjust to new request framework]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h  |   3 +
 arch/powerpc/include/asm/kvm_ppc.h   |   2 +
 arch/powerpc/include/asm/reg_booke.h |   7 ++
 arch/powerpc/kvm/book3s.c            |   9 ++
 arch/powerpc/kvm/booke.c             | 155 +++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/booke_emulate.c     |   8 ++
 arch/powerpc/kvm/powerpc.c           |  14 +++-
 7 files changed, 196 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 4a5ec8f573c7..51b0ccd56769 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -471,6 +471,8 @@ struct kvm_vcpu_arch {
 	ulong fault_esr;
 	ulong queued_dear;
 	ulong queued_esr;
+	spinlock_t wdt_lock;
+	struct timer_list wdt_timer;
 	u32 tlbcfg[4];
 	u32 mmucfg;
 	u32 epr;
@@ -486,6 +488,7 @@ struct kvm_vcpu_arch {
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
+	u8 watchdog_enabled;
 	u8 sane;
 	u8 cpu_type;
 	u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3dfc437fb9d9..c06a64b53362 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(unsigned long data);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
 
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4982c5..e07e6af5e1ff 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
 #define TCR_FIE		0x00800000	/* FIT Interrupt Enable */
 #define TCR_ARE		0x00400000	/* Auto Reload Enable */
 
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
+			      (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
+#endif
+
 /* Bit definitions for the TSR. */
 #define TSR_ENW		0x80000000	/* Enable Next Watchdog */
 #define TSR_WIS		0x40000000	/* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a8360c857..e94666566fa9 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index c36493087dbf..09e8bf33a8c9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -209,6 +209,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
 	clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
 }
 
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+	clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -328,6 +338,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		msr_mask = MSR_CE | MSR_ME | MSR_DE;
 		int_class = INT_CLASS_NONCRIT;
 		break;
+	case BOOKE_IRQPRIO_WATCHDOG:
 	case BOOKE_IRQPRIO_CRITICAL:
 	case BOOKE_IRQPRIO_DBELL_CRIT:
 		allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -407,12 +418,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	return allowed;
 }
 
+/*
+ * Return the number of jiffies until the next timeout.  If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+	u64 tb, wdt_tb, wdt_ticks = 0;
+	u64 nr_jiffies = 0;
+	u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+	wdt_tb = 1ULL << (63 - period);
+	tb = get_tb();
+	/*
+	 * The watchdog timeout will hapeen when TB bit corresponding
+	 * to watchdog will toggle from 0 to 1.
+	 */
+	if (tb & wdt_tb)
+		wdt_ticks = wdt_tb;
+
+	wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+	/* Convert timebase ticks to jiffies */
+	nr_jiffies = wdt_ticks;
+
+	if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+		nr_jiffies++;
+
+	return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+	unsigned long nr_jiffies;
+	unsigned long flags;
+
+	/*
+	 * If TSR_ENW and TSR_WIS are not set then no need to exit to
+	 * userspace, so clear the KVM_REQ_WATCHDOG request.
+	 */
+	if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
+		clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+
+	spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+	nr_jiffies = watchdog_next_timeout(vcpu);
+	/*
+	 * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+	 * then do not run the watchdog timer as this can break timer APIs.
+	 */
+	if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+		mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+	else
+		del_timer(&vcpu->arch.wdt_timer);
+	spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+	u32 tsr, new_tsr;
+	int final;
+
+	do {
+		new_tsr = tsr = vcpu->arch.tsr;
+		final = 0;
+
+		/* Time out event */
+		if (tsr & TSR_ENW) {
+			if (tsr & TSR_WIS)
+				final = 1;
+			else
+				new_tsr = tsr | TSR_WIS;
+		} else {
+			new_tsr = tsr | TSR_ENW;
+		}
+	} while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+	if (new_tsr & TSR_WIS) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * If this is final watchdog expiry and some action is required
+	 * then exit to userspace.
+	 */
+	if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+	    vcpu->arch.watchdog_enabled) {
+		smp_wmb();
+		kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+		kvm_vcpu_kick(vcpu);
+	}
+
+	/*
+	 * Stop running the watchdog timer after final expiration to
+	 * prevent the host from being flooded with timers if the
+	 * guest sets a short period.
+	 * Timers will resume when TSR/TCR is updated next time.
+	 */
+	if (!final)
+		arm_next_watchdog(vcpu);
+}
+
 static void update_timer_ints(struct kvm_vcpu *vcpu)
 {
 	if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
 		kvmppc_core_queue_dec(vcpu);
 	else
 		kvmppc_core_dequeue_dec(vcpu);
+
+	if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+		kvmppc_core_queue_watchdog(vcpu);
+	else
+		kvmppc_core_dequeue_watchdog(vcpu);
 }
 
 static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -466,6 +586,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
 		kvmppc_core_flush_tlb(vcpu);
 #endif
 
+	if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+		vcpu->run->exit_reason = KVM_EXIT_WATCHDOG;
+		r = 0;
+	}
+
 	return r;
 }
 
@@ -995,6 +1120,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	/* setup watchdog timer once */
+	spin_lock_init(&vcpu->arch.wdt_lock);
+	setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+		    (unsigned long)vcpu);
+
+	return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	del_timer_sync(&vcpu->arch.wdt_timer);
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
 	int i;
@@ -1090,7 +1230,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
 	}
 
 	if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+		u32 old_tsr = vcpu->arch.tsr;
+
 		vcpu->arch.tsr = sregs->u.e.tsr;
+
+		if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+			arm_next_watchdog(vcpu);
+
 		update_timer_ints(vcpu);
 	}
 
@@ -1251,6 +1397,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
 {
 	vcpu->arch.tcr = new_tcr;
+	arm_next_watchdog(vcpu);
 	update_timer_ints(vcpu);
 }
 
@@ -1265,6 +1412,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 {
 	clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+	/*
+	 * We may have stopped the watchdog due to
+	 * being stuck on final expiration.
+	 */
+	if (tsr_bits & (TSR_ENW | TSR_WIS))
+		arm_next_watchdog(vcpu);
+
 	update_timer_ints(vcpu);
 }
 
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb608ab..5a66ade7fd17 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		kvmppc_clr_tsr_bits(vcpu, spr_val);
 		break;
 	case SPRN_TCR:
+		/*
+		 * WRC is a 2-bit field that is supposed to preserve its
+		 * value once written to non-zero.
+		 */
+		if (vcpu->arch.tcr & TCR_WRC_MASK) {
+			spr_val &= ~TCR_WRC_MASK;
+			spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+		}
 		kvmppc_set_tcr(vcpu, spr_val);
 		break;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 54b12af577d0..0ffd7d17adc7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -300,6 +300,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	switch (ext) {
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_SREGS:
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
 #else
 	case KVM_CAP_PPC_SEGSTATE:
 	case KVM_CAP_PPC_HIOR:
@@ -476,6 +477,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
 	tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
 	vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -484,13 +487,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_KVM_EXIT_TIMING
 	mutex_init(&vcpu->arch.exit_timing_lock);
 #endif
-
-	return 0;
+	ret = kvmppc_subarch_vcpu_init(vcpu);
+	return ret;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	kvmppc_mmu_destroy(vcpu);
+	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -735,6 +739,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		r = 0;
 		vcpu->arch.papr_enabled = true;
 		break;
+#ifdef CONFIG_BOOKE
+	case KVM_CAP_PPC_BOOKE_WATCHDOG:
+		r = 0;
+		vcpu->arch.watchdog_enabled = true;
+		break;
+#endif
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
 	case KVM_CAP_SW_TLB: {
 		struct kvm_config_tlb cfg;
-- 
cgit v1.2.3


From 6df8d3fc58dde84fc82a9ec2581440e54dfd3d14 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <r65777@freescale.com>
Date: Wed, 8 Aug 2012 21:17:55 +0000
Subject: booke: Added ONE_REG interface for IAC/DAC debug registers

IAC/DAC are defined as 32 bit while they are 64 bit wide. So ONE_REG
interface is added to set/get them.

Signed-off-by: Bharat Bhushan <bharat.bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm.h      | 12 ++++++++++
 arch/powerpc/include/asm/kvm_host.h | 24 +++++++++++++++++--
 arch/powerpc/kvm/booke.c            | 48 +++++++++++++++++++++++++++++++++++--
 arch/powerpc/kvm/booke_emulate.c    |  8 +++----
 4 files changed, 84 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 1bea4d8ea6f4..3c14202a3c84 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -221,6 +221,12 @@ struct kvm_sregs {
 
 			__u32 dbsr;	/* KVM_SREGS_E_UPDATE_DBSR */
 			__u32 dbcr[3];
+			/*
+			 * iac/dac registers are 64bit wide, while this API
+			 * interface provides only lower 32 bits on 64 bit
+			 * processors. ONE_REG interface is added for 64bit
+			 * iac/dac registers.
+			 */
 			__u32 iac[4];
 			__u32 dac[2];
 			__u32 dvc[2];
@@ -326,5 +332,11 @@ struct kvm_book3e_206_tlb_params {
 };
 
 #define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+#define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_PPC_IAC3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_PPC_IAC4	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_PPC_DAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_PPC_DAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 51b0ccd56769..f20a5ef1c7e8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -346,6 +346,27 @@ struct kvmppc_slb {
 	bool class	: 1;
 };
 
+# ifdef CONFIG_PPC_FSL_BOOK3E
+#define KVMPPC_BOOKE_IAC_NUM	2
+#define KVMPPC_BOOKE_DAC_NUM	2
+# else
+#define KVMPPC_BOOKE_IAC_NUM	4
+#define KVMPPC_BOOKE_DAC_NUM	2
+# endif
+#define KVMPPC_BOOKE_MAX_IAC	4
+#define KVMPPC_BOOKE_MAX_DAC	2
+
+struct kvmppc_booke_debug_reg {
+	u32 dbcr0;
+	u32 dbcr1;
+	u32 dbcr2;
+#ifdef CONFIG_KVM_E500MC
+	u32 dbcr4;
+#endif
+	u64 iac[KVMPPC_BOOKE_MAX_IAC];
+	u64 dac[KVMPPC_BOOKE_MAX_DAC];
+};
+
 struct kvm_vcpu_arch {
 	ulong host_stack;
 	u32 host_pid;
@@ -440,8 +461,6 @@ struct kvm_vcpu_arch {
 
 	u32 ccr0;
 	u32 ccr1;
-	u32 dbcr0;
-	u32 dbcr1;
 	u32 dbsr;
 
 	u64 mmcr[3];
@@ -476,6 +495,7 @@ struct kvm_vcpu_arch {
 	u32 tlbcfg[4];
 	u32 mmucfg;
 	u32 epr;
+	struct kvmppc_booke_debug_reg dbg_reg;
 #endif
 	gpa_t paddr_accessed;
 	gva_t vaddr_accessed;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 09e8bf33a8c9..959aae96469c 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1351,12 +1351,56 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-	return -EINVAL;
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_REG_PPC_IAC1:
+	case KVM_REG_PPC_IAC2:
+	case KVM_REG_PPC_IAC3:
+	case KVM_REG_PPC_IAC4: {
+		int iac = reg->id - KVM_REG_PPC_IAC1;
+		r = copy_to_user((u64 __user *)(long)reg->addr,
+				 &vcpu->arch.dbg_reg.iac[iac], sizeof(u64));
+		break;
+	}
+	case KVM_REG_PPC_DAC1:
+	case KVM_REG_PPC_DAC2: {
+		int dac = reg->id - KVM_REG_PPC_DAC1;
+		r = copy_to_user((u64 __user *)(long)reg->addr,
+				 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
+		break;
+	}
+	default:
+		break;
+	}
+	return r;
 }
 
 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 {
-	return -EINVAL;
+	int r = -EINVAL;
+
+	switch (reg->id) {
+	case KVM_REG_PPC_IAC1:
+	case KVM_REG_PPC_IAC2:
+	case KVM_REG_PPC_IAC3:
+	case KVM_REG_PPC_IAC4: {
+		int iac = reg->id - KVM_REG_PPC_IAC1;
+		r = copy_from_user(&vcpu->arch.dbg_reg.iac[iac],
+			     (u64 __user *)(long)reg->addr, sizeof(u64));
+		break;
+	}
+	case KVM_REG_PPC_DAC1:
+	case KVM_REG_PPC_DAC2: {
+		int dac = reg->id - KVM_REG_PPC_DAC1;
+		r = copy_from_user(&vcpu->arch.dbg_reg.dac[dac],
+			     (u64 __user *)(long)reg->addr, sizeof(u64));
+		break;
+	}
+	default:
+		break;
+	}
+	return r;
 }
 
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 5a66ade7fd17..cc99a0b3d202 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -133,10 +133,10 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		vcpu->arch.csrr1 = spr_val;
 		break;
 	case SPRN_DBCR0:
-		vcpu->arch.dbcr0 = spr_val;
+		vcpu->arch.dbg_reg.dbcr0 = spr_val;
 		break;
 	case SPRN_DBCR1:
-		vcpu->arch.dbcr1 = spr_val;
+		vcpu->arch.dbg_reg.dbcr1 = spr_val;
 		break;
 	case SPRN_DBSR:
 		vcpu->arch.dbsr &= ~spr_val;
@@ -266,10 +266,10 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 		*spr_val = vcpu->arch.csrr1;
 		break;
 	case SPRN_DBCR0:
-		*spr_val = vcpu->arch.dbcr0;
+		*spr_val = vcpu->arch.dbg_reg.dbcr0;
 		break;
 	case SPRN_DBCR1:
-		*spr_val = vcpu->arch.dbcr1;
+		*spr_val = vcpu->arch.dbg_reg.dbcr1;
 		break;
 	case SPRN_DBSR:
 		*spr_val = vcpu->arch.dbsr;
-- 
cgit v1.2.3


From 491dd5b8a4926393308172da80c73faf242a4057 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 13 Aug 2012 14:40:29 +0200
Subject: KVM: PPC: 44x: Initialize PVR

We need to make sure that vcpu->arch.pvr is initialized to a sane value,
so let's just take the host PVR.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/44x.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
index 50e7dbc7356c..3d7fd21c65f9 100644
--- a/arch/powerpc/kvm/44x.c
+++ b/arch/powerpc/kvm/44x.c
@@ -83,6 +83,7 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu_44x->shadow_refs[i].gtlb_index = -1;
 
 	vcpu->arch.cpu_type = KVM_CPU_440;
+	vcpu->arch.pvr = mfspr(SPRN_PVR);
 
 	return 0;
 }
-- 
cgit v1.2.3


From 50c871edf59b4585fd2c17acfe4e7cd3752418b7 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 13 Aug 2012 14:50:54 +0200
Subject: KVM: PPC: BookE: Add MCSR SPR support

Add support for the MCSR SPR. This only implements the SPR storage
bits, not actual machine checks.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke_emulate.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index cc99a0b3d202..514790f41aba 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -237,6 +237,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_IVOR15:
 		vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG] = spr_val;
 		break;
+	case SPRN_MCSR:
+		vcpu->arch.mcsr &= ~spr_val;
+		break;
 
 	default:
 		emulated = EMULATE_FAIL;
@@ -329,6 +332,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 	case SPRN_IVOR15:
 		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_DEBUG];
 		break;
+	case SPRN_MCSR:
+		*spr_val = vcpu->arch.mcsr;
+		break;
 
 	default:
 		emulated = EMULATE_FAIL;
-- 
cgit v1.2.3


From 166a2b7000c388aee81168987ce2eddb6783f550 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 15 Aug 2012 01:38:43 +0200
Subject: KVM: PPC: Use symbols for exit trace

Exit traces are a lot easier to read when you don't have to remember
cryptic numbers for guest exit reasons. Symbolify them in our trace
output.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/trace.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h
index cb2780a42fd8..519aba8bb3d3 100644
--- a/arch/powerpc/kvm/trace.h
+++ b/arch/powerpc/kvm/trace.h
@@ -31,6 +31,60 @@ TRACE_EVENT(kvm_ppc_instr,
 		  __entry->inst, __entry->pc, __entry->emulate)
 );
 
+#ifdef CONFIG_PPC_BOOK3S
+#define kvm_trace_symbol_exit \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+#else
+#define kvm_trace_symbol_exit \
+	{0, "CRITICAL"}, \
+	{1, "MACHINE_CHECK"}, \
+	{2, "DATA_STORAGE"}, \
+	{3, "INST_STORAGE"}, \
+	{4, "EXTERNAL"}, \
+	{5, "ALIGNMENT"}, \
+	{6, "PROGRAM"}, \
+	{7, "FP_UNAVAIL"}, \
+	{8, "SYSCALL"}, \
+	{9, "AP_UNAVAIL"}, \
+	{10, "DECREMENTER"}, \
+	{11, "FIT"}, \
+	{12, "WATCHDOG"}, \
+	{13, "DTLB_MISS"}, \
+	{14, "ITLB_MISS"}, \
+	{15, "DEBUG"}, \
+	{32, "SPE_UNAVAIL"}, \
+	{33, "SPE_FP_DATA"}, \
+	{34, "SPE_FP_ROUND"}, \
+	{35, "PERFORMANCE_MONITOR"}, \
+	{36, "DOORBELL"}, \
+	{37, "DOORBELL_CRITICAL"}, \
+	{38, "GUEST_DBELL"}, \
+	{39, "GUEST_DBELL_CRIT"}, \
+	{40, "HV_SYSCALL"}, \
+	{41, "HV_PRIV"}
+#endif
+
 TRACE_EVENT(kvm_exit,
 	TP_PROTO(unsigned int exit_nr, struct kvm_vcpu *vcpu),
 	TP_ARGS(exit_nr, vcpu),
@@ -62,7 +116,7 @@ TRACE_EVENT(kvm_exit,
 		__entry->last_inst	= vcpu->arch.last_inst;
 	),
 
-	TP_printk("exit=0x%x"
+	TP_printk("exit=%s"
 		" | pc=0x%lx"
 		" | msr=0x%lx"
 		" | dar=0x%lx"
@@ -71,7 +125,7 @@ TRACE_EVENT(kvm_exit,
 #endif
 		" | last_inst=0x%lx"
 		,
-		__entry->exit_nr,
+		__print_symbolic(__entry->exit_nr, kvm_trace_symbol_exit),
 		__entry->pc,
 		__entry->msr,
 		__entry->dar,
-- 
cgit v1.2.3


From 430c7ff52ffb902e1e08b255b93c28fcad8cb9ef Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 15 Aug 2012 11:42:07 +0200
Subject: KVM: PPC: E500: Remove E500_TLB_DIRTY flag

Since we always mark pages as dirty immediately when mapping them read/write
now, there's no need for the dirty flag in our cache.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500.h     | 3 +--
 arch/powerpc/kvm/e500_tlb.c | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index aa8b81428bf4..d1622864549e 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -27,8 +27,7 @@
 #define E500_TLB_NUM   2
 
 #define E500_TLB_VALID 1
-#define E500_TLB_DIRTY 2
-#define E500_TLB_BITMAP 4
+#define E500_TLB_BITMAP 2
 
 struct tlbe_ref {
 	pfn_t pfn;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 1af6fab58995..43489a8fa985 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -303,10 +303,8 @@ static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref,
 	ref->pfn = pfn;
 	ref->flags = E500_TLB_VALID;
 
-	if (tlbe_is_writable(gtlbe)) {
-		ref->flags |= E500_TLB_DIRTY;
+	if (tlbe_is_writable(gtlbe))
 		kvm_set_pfn_dirty(pfn);
-	}
 }
 
 static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref)
-- 
cgit v1.2.3


From e4dcfe88fb30bcedda80c151018086fffb8280e6 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 16 Aug 2012 00:28:09 +0200
Subject: KVM: PPC: 440: Implement mtdcrx

We need mtdcrx to execute properly on 460 cores.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/44x_emulate.c | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index c8c61578fdfc..3843a75e3e98 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -28,11 +28,29 @@
 #include "44x_tlb.h"
 
 #define XOP_MFDCR   323
+#define XOP_MTDCRX  387
 #define XOP_MTDCR   451
 #define XOP_TLBSX   914
 #define XOP_ICCCI   966
 #define XOP_TLBWE   978
 
+static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
+{
+	/* emulate some access in kernel */
+	switch (dcrn) {
+	case DCRN_CPR0_CONFIG_ADDR:
+		vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
+		return EMULATE_DONE;
+	default:
+		vcpu->run->dcr.dcrn = dcrn;
+		vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
+		vcpu->run->dcr.is_write = 1;
+		vcpu->arch.dcr_needed = 1;
+		kvmppc_account_exit(vcpu, DCR_EXITS);
+		return EMULATE_DO_DCR;
+	}
+}
+
 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                            unsigned int inst, int *advance)
 {
@@ -85,20 +103,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 
 		case XOP_MTDCR:
-			/* emulate some access in kernel */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data = kvmppc_get_gpr(vcpu, rs);
-				run->dcr.is_write = 1;
-				vcpu->arch.dcr_needed = 1;
-				kvmppc_account_exit(vcpu, DCR_EXITS);
-				emulated = EMULATE_DO_DCR;
-			}
+			emulated = emulate_mtdcr(vcpu, rs, dcrn);
+			break;
 
+		case XOP_MTDCRX:
+			emulated = emulate_mtdcr(vcpu, rs,
+					kvmppc_get_gpr(vcpu, ra));
 			break;
 
 		case XOP_TLBWE:
-- 
cgit v1.2.3


From ceb985f9d18cba2efdef08b8d31751c2c2b20d77 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 16 Aug 2012 00:34:58 +0200
Subject: KVM: PPC: 440: Implement mfdcrx

We need mfdcrx to execute properly on 460 cores.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/44x_emulate.c | 74 ++++++++++++++++++++++++------------------
 1 file changed, 43 insertions(+), 31 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 3843a75e3e98..1a793c4c4a67 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -27,6 +27,7 @@
 #include "booke.h"
 #include "44x_tlb.h"
 
+#define XOP_MFDCRX  259
 #define XOP_MFDCR   323
 #define XOP_MTDCRX  387
 #define XOP_MTDCR   451
@@ -51,6 +52,43 @@ static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
 	}
 }
 
+static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
+{
+	/* The guest may access CPR0 registers to determine the timebase
+	 * frequency, and it must know the real host frequency because it
+	 * can directly access the timebase registers.
+	 *
+	 * It would be possible to emulate those accesses in userspace,
+	 * but userspace can really only figure out the end frequency.
+	 * We could decompose that into the factors that compute it, but
+	 * that's tricky math, and it's easier to just report the real
+	 * CPR0 values.
+	 */
+	switch (dcrn) {
+	case DCRN_CPR0_CONFIG_ADDR:
+		kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
+		break;
+	case DCRN_CPR0_CONFIG_DATA:
+		local_irq_disable();
+		mtdcr(DCRN_CPR0_CONFIG_ADDR,
+			  vcpu->arch.cpr0_cfgaddr);
+		kvmppc_set_gpr(vcpu, rt,
+			       mfdcr(DCRN_CPR0_CONFIG_DATA));
+		local_irq_enable();
+		break;
+	default:
+		vcpu->run->dcr.dcrn = dcrn;
+		vcpu->run->dcr.data =  0;
+		vcpu->run->dcr.is_write = 0;
+		vcpu->arch.io_gpr = rt;
+		vcpu->arch.dcr_needed = 1;
+		kvmppc_account_exit(vcpu, DCR_EXITS);
+		return EMULATE_DO_DCR;
+	}
+
+	return EMULATE_DONE;
+}
+
 int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
                            unsigned int inst, int *advance)
 {
@@ -68,38 +106,12 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		switch (get_xop(inst)) {
 
 		case XOP_MFDCR:
-			/* The guest may access CPR0 registers to determine the timebase
-			 * frequency, and it must know the real host frequency because it
-			 * can directly access the timebase registers.
-			 *
-			 * It would be possible to emulate those accesses in userspace,
-			 * but userspace can really only figure out the end frequency.
-			 * We could decompose that into the factors that compute it, but
-			 * that's tricky math, and it's easier to just report the real
-			 * CPR0 values.
-			 */
-			switch (dcrn) {
-			case DCRN_CPR0_CONFIG_ADDR:
-				kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
-				break;
-			case DCRN_CPR0_CONFIG_DATA:
-				local_irq_disable();
-				mtdcr(DCRN_CPR0_CONFIG_ADDR,
-					  vcpu->arch.cpr0_cfgaddr);
-				kvmppc_set_gpr(vcpu, rt,
-					       mfdcr(DCRN_CPR0_CONFIG_DATA));
-				local_irq_enable();
-				break;
-			default:
-				run->dcr.dcrn = dcrn;
-				run->dcr.data =  0;
-				run->dcr.is_write = 0;
-				vcpu->arch.io_gpr = rt;
-				vcpu->arch.dcr_needed = 1;
-				kvmppc_account_exit(vcpu, DCR_EXITS);
-				emulated = EMULATE_DO_DCR;
-			}
+			emulated = emulate_mfdcr(vcpu, rt, dcrn);
+			break;
 
+		case XOP_MFDCRX:
+			emulated = emulate_mfdcr(vcpu, rt,
+					kvmppc_get_gpr(vcpu, ra));
 			break;
 
 		case XOP_MTDCR:
-- 
cgit v1.2.3


From 7a08c2740f07fb8c3769d1f137721835ead7652f Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 16 Aug 2012 13:10:16 +0200
Subject: KVM: PPC: BookE: Support FPU on non-hv systems

When running on HV aware hosts, we can not trap when the guest sets the FP
bit, so we just let it do so when it wants to, because it has full access to
MSR.

For non-HV aware hosts with an FPU (like 440), we need to also adjust the
shadow MSR though. Otherwise the guest gets an FP unavailable trap even when
it really enabled the FP bit in MSR.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 959aae96469c..5f0476a602d8 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -122,6 +122,16 @@ static void kvmppc_vcpu_sync_spe(struct kvm_vcpu *vcpu)
 }
 #endif
 
+static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
+{
+#if defined(CONFIG_PPC_FPU) && !defined(CONFIG_KVM_BOOKE_HV)
+	/* We always treat the FP bit as enabled from the host
+	   perspective, so only need to adjust the shadow MSR */
+	vcpu->arch.shadow_msr &= ~MSR_FP;
+	vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_FP;
+#endif
+}
+
 /*
  * Helper function for "full" MSR writes.  No need to call this if only
  * EE/CE/ME/DE/RI are changing.
@@ -138,6 +148,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
 
 	kvmppc_mmu_msr_notify(vcpu, old_msr);
 	kvmppc_vcpu_sync_spe(vcpu);
+	kvmppc_vcpu_sync_fpu(vcpu);
 }
 
 static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
-- 
cgit v1.2.3


From d61966fc08b84857b697ebae4489c652dd87e48a Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 12 Sep 2012 03:18:14 +0000
Subject: KVM: PPC: bookehv: Allow duplicate calls of DO_KVM macro

The current form of DO_KVM macro restricts its use to one call per input
parameter set. This is caused by kvmppc_resume_\intno\()_\srr1 symbol
definition.
Duplicate calls of DO_KVM are required by distinct implementations of
exeption handlers which are delegated at runtime. Use a rare label number
to avoid conflicts with the calling contexts.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_booke_hv_asm.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index 30a600fa1b6a..a37a12a9a7d7 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -38,9 +38,9 @@
 #ifdef CONFIG_KVM_BOOKE_HV
 BEGIN_FTR_SECTION
 	mtocrf	0x80, r11	/* check MSR[GS] without clobbering reg */
-	bf	3, kvmppc_resume_\intno\()_\srr1
+	bf	3, 1975f
 	b	kvmppc_handler_\intno\()_\srr1
-kvmppc_resume_\intno\()_\srr1:
+1975:
 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV)
 #endif
 .endm
-- 
cgit v1.2.3


From 2c9097e4c1340208ef93371abd4b3bd7e989381b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 11 Sep 2012 13:27:01 +0000
Subject: KVM: PPC: Book3S HV: Take the SRCU read lock before looking up
 memslots

The generic KVM code uses SRCU (sleeping RCU) to protect accesses
to the memslots data structures against updates due to userspace
adding, modifying or removing memory slots.  We need to do that too,
both to avoid accessing stale copies of the memslots and to avoid
lockdep warnings.  This therefore adds srcu_read_lock/unlock pairs
around code that accesses and uses memslots.

Since the real-mode handlers for H_ENTER, H_REMOVE and H_BULK_REMOVE
need to access the memslots, and we don't want to call the SRCU code
in real mode (since we have no assurance that it would only access
the linear mapping), we hold the SRCU read lock for the VM while
in the guest.  This does mean that adding or removing memory slots
while some vcpus are executing in the guest will block for up to
two jiffies.  This tradeoff is acceptable since adding/removing
memory slots only happens rarely, while H_ENTER/H_REMOVE/H_BULK_REMOVE
are performance-critical hot paths.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 17 +++++++++++++----
 arch/powerpc/kvm/book3s_hv.c        | 27 +++++++++++++++++++++++----
 2 files changed, 36 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d95d11322a15..0f031c07f7e5 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -24,6 +24,7 @@
 #include <linux/slab.h>
 #include <linux/hugetlb.h>
 #include <linux/vmalloc.h>
+#include <linux/srcu.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -1057,20 +1058,22 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 	unsigned long hva, psize, offset;
 	unsigned long pa;
 	unsigned long *physp;
+	int srcu_idx;
 
+	srcu_idx = srcu_read_lock(&kvm->srcu);
 	memslot = gfn_to_memslot(kvm, gfn);
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
-		return NULL;
+		goto err;
 	if (!kvm->arch.using_mmu_notifiers) {
 		physp = kvm->arch.slot_phys[memslot->id];
 		if (!physp)
-			return NULL;
+			goto err;
 		physp += gfn - memslot->base_gfn;
 		pa = *physp;
 		if (!pa) {
 			if (kvmppc_get_guest_page(kvm, gfn, memslot,
 						  PAGE_SIZE) < 0)
-				return NULL;
+				goto err;
 			pa = *physp;
 		}
 		page = pfn_to_page(pa >> PAGE_SHIFT);
@@ -1079,9 +1082,11 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 		hva = gfn_to_hva_memslot(memslot, gfn);
 		npages = get_user_pages_fast(hva, 1, 1, pages);
 		if (npages < 1)
-			return NULL;
+			goto err;
 		page = pages[0];
 	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+
 	psize = PAGE_SIZE;
 	if (PageHuge(page)) {
 		page = compound_head(page);
@@ -1091,6 +1096,10 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 	if (nb_ret)
 		*nb_ret = psize - offset;
 	return page_address(page) + offset;
+
+ err:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+	return NULL;
 }
 
 void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 83e929e66f9d..48b0d4a73b9d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -30,6 +30,7 @@
 #include <linux/cpumask.h>
 #include <linux/spinlock.h>
 #include <linux/page-flags.h>
+#include <linux/srcu.h>
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
@@ -366,13 +367,16 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
 	struct kvm_vcpu *tvcpu;
+	int idx;
 
 	switch (req) {
 	case H_ENTER:
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
 					      kvmppc_get_gpr(vcpu, 5),
 					      kvmppc_get_gpr(vcpu, 6),
 					      kvmppc_get_gpr(vcpu, 7));
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
 		break;
 	case H_CEDE:
 		break;
@@ -411,6 +415,7 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			      struct task_struct *tsk)
 {
 	int r = RESUME_HOST;
+	int srcu_idx;
 
 	vcpu->stat.sum_exits++;
 
@@ -470,12 +475,16 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * have been handled already.
 	 */
 	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = kvmppc_book3s_hv_page_fault(run, vcpu,
 				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 		break;
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
+		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
 		r = kvmppc_book3s_hv_page_fault(run, vcpu,
 				kvmppc_get_pc(vcpu), 0);
+		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 		break;
 	/*
 	 * This occurs if the guest executes an illegal instruction.
@@ -820,6 +829,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	long ret;
 	u64 now;
 	int ptid, i, need_vpa_update;
+	int srcu_idx;
 
 	/* don't start if any threads have a signal pending */
 	need_vpa_update = 0;
@@ -898,6 +908,9 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	spin_unlock(&vc->lock);
 
 	kvm_guest_enter();
+
+	srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
+
 	__kvmppc_vcore_entry(NULL, vcpu0);
 	for (i = 0; i < threads_per_core; ++i)
 		kvmppc_release_hwthread(vc->pcpu + i);
@@ -913,6 +926,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	vc->vcore_state = VCORE_EXITING;
 	spin_unlock(&vc->lock);
 
+	srcu_read_unlock(&vcpu0->kvm->srcu, srcu_idx);
+
 	/* make sure updates to secondary vcpu structs are visible now */
 	smp_mb();
 	kvm_guest_exit();
@@ -1362,6 +1377,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	unsigned long rmls;
 	unsigned long *physp;
 	unsigned long i, npages;
+	int srcu_idx;
 
 	mutex_lock(&kvm->lock);
 	if (kvm->arch.rma_setup_done)
@@ -1377,12 +1393,13 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	}
 
 	/* Look up the memslot for guest physical address 0 */
+	srcu_idx = srcu_read_lock(&kvm->srcu);
 	memslot = gfn_to_memslot(kvm, 0);
 
 	/* We must have some memory at 0 by now */
 	err = -EINVAL;
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
-		goto out;
+		goto out_srcu;
 
 	/* Look up the VMA for the start of this memory slot */
 	hva = memslot->userspace_addr;
@@ -1406,14 +1423,14 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		err = -EPERM;
 		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
 			pr_err("KVM: CPU requires an RMO\n");
-			goto out;
+			goto out_srcu;
 		}
 
 		/* We can handle 4k, 64k or 16M pages in the VRMA */
 		err = -EINVAL;
 		if (!(psize == 0x1000 || psize == 0x10000 ||
 		      psize == 0x1000000))
-			goto out;
+			goto out_srcu;
 
 		/* Update VRMASD field in the LPCR */
 		senc = slb_pgsize_encoding(psize);
@@ -1436,7 +1453,7 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		err = -EINVAL;
 		if (rmls < 0) {
 			pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
-			goto out;
+			goto out_srcu;
 		}
 		atomic_inc(&ri->use_count);
 		kvm->arch.rma = ri;
@@ -1476,6 +1493,8 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 	smp_wmb();
 	kvm->arch.rma_setup_done = 1;
 	err = 0;
+ out_srcu:
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
  out:
 	mutex_unlock(&kvm->lock);
 	return err;
-- 
cgit v1.2.3


From a66b48c3a39fa1c4223d4f847fdc7a04ed1618de Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 11 Sep 2012 13:27:46 +0000
Subject: KVM: PPC: Move kvm->arch.slot_phys into memslot.arch

Now that we have an architecture-specific field in the kvm_memory_slot
structure, we can use it to store the array of page physical addresses
that we need for Book3S HV KVM on PPC970 processors.  This reduces the
size of struct kvm_arch for Book3S HV, and also reduces the size of
struct kvm_arch_memory_slot for other PPC KVM variants since the fields
in it are now only compiled in for Book3S HV.

This necessitates making the kvm_arch_create_memslot and
kvm_arch_free_memslot operations specific to each PPC KVM variant.
That in turn means that we now don't allocate the rmap arrays on
Book3S PR and Book E.

Since we now unpin pages and free the slot_phys array in
kvmppc_core_free_memslot, we no longer need to do it in
kvmppc_core_destroy_vm, since the generic code takes care to free
all the memslots when destroying a VM.

We now need the new memslot to be passed in to
kvmppc_core_prepare_memory_region, since we need to initialize its
arch.slot_phys member on Book3S HV.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |   9 ++--
 arch/powerpc/include/asm/kvm_ppc.h  |   5 ++
 arch/powerpc/kvm/book3s_64_mmu_hv.c |   6 +--
 arch/powerpc/kvm/book3s_hv.c        | 104 +++++++++++++++++++++---------------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |   2 +-
 arch/powerpc/kvm/book3s_pr.c        |  12 +++++
 arch/powerpc/kvm/booke.c            |  12 +++++
 arch/powerpc/kvm/powerpc.c          |  13 ++---
 8 files changed, 102 insertions(+), 61 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index f20a5ef1c7e8..68f5a308737a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -204,7 +204,7 @@ struct revmap_entry {
 };
 
 /*
- * We use the top bit of each memslot->rmap entry as a lock bit,
+ * We use the top bit of each memslot->arch.rmap entry as a lock bit,
  * and bit 32 as a present flag.  The bottom 32 bits are the
  * index in the guest HPT of a HPTE that points to the page.
  */
@@ -215,14 +215,17 @@ struct revmap_entry {
 #define KVMPPC_RMAP_PRESENT	0x100000000ul
 #define KVMPPC_RMAP_INDEX	0xfffffffful
 
-/* Low-order bits in kvm->arch.slot_phys[][] */
+/* Low-order bits in memslot->arch.slot_phys[] */
 #define KVMPPC_PAGE_ORDER_MASK	0x1f
 #define KVMPPC_PAGE_NO_CACHE	HPTE_R_I	/* 0x20 */
 #define KVMPPC_PAGE_WRITETHRU	HPTE_R_W	/* 0x40 */
 #define KVMPPC_GOT_PAGE		0x80
 
 struct kvm_arch_memory_slot {
+#ifdef CONFIG_KVM_BOOK3S_64_HV
 	unsigned long *rmap;
+	unsigned long *slot_phys;
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
 };
 
 struct kvm_arch {
@@ -246,8 +249,6 @@ struct kvm_arch {
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
 	spinlock_t slot_phys_lock;
-	unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
-	int slot_npages[KVM_MEM_SLOTS_NUM];
 	unsigned short last_vcpu[NR_CPUS];
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 	struct kvmppc_linear_info *hpt_li;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c06a64b53362..41a00eae68c7 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -143,7 +143,12 @@ extern struct kvmppc_linear_info *kvm_alloc_hpt(void);
 extern void kvm_release_hpt(struct kvmppc_linear_info *li);
 extern int kvmppc_core_init_vm(struct kvm *kvm);
 extern void kvmppc_core_destroy_vm(struct kvm *kvm);
+extern void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+				     struct kvm_memory_slot *dont);
+extern int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+				      unsigned long npages);
 extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem);
 extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
 				struct kvm_userspace_memory_region *mem);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 0f031c07f7e5..a389cc62b16c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -261,7 +261,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
 
 /*
  * This is called to get a reference to a guest page if there isn't
- * one already in the kvm->arch.slot_phys[][] arrays.
+ * one already in the memslot->arch.slot_phys[] array.
  */
 static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 				  struct kvm_memory_slot *memslot,
@@ -276,7 +276,7 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 	struct vm_area_struct *vma;
 	unsigned long pfn, i, npages;
 
-	physp = kvm->arch.slot_phys[memslot->id];
+	physp = memslot->arch.slot_phys;
 	if (!physp)
 		return -EINVAL;
 	if (physp[gfn - memslot->base_gfn])
@@ -1065,7 +1065,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		goto err;
 	if (!kvm->arch.using_mmu_notifiers) {
-		physp = kvm->arch.slot_phys[memslot->id];
+		physp = memslot->arch.slot_phys;
 		if (!physp)
 			goto err;
 		physp += gfn - memslot->base_gfn;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 48b0d4a73b9d..817837de7362 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1314,48 +1314,67 @@ static unsigned long slb_pgsize_encoding(unsigned long psize)
 	return senc;
 }
 
-int kvmppc_core_prepare_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem)
+static void unpin_slot(struct kvm_memory_slot *memslot)
 {
-	unsigned long npages;
-	unsigned long *phys;
+	unsigned long *physp;
+	unsigned long j, npages, pfn;
+	struct page *page;
 
-	/* Allocate a slot_phys array */
-	phys = kvm->arch.slot_phys[mem->slot];
-	if (!kvm->arch.using_mmu_notifiers && !phys) {
-		npages = mem->memory_size >> PAGE_SHIFT;
-		phys = vzalloc(npages * sizeof(unsigned long));
-		if (!phys)
-			return -ENOMEM;
-		kvm->arch.slot_phys[mem->slot] = phys;
-		kvm->arch.slot_npages[mem->slot] = npages;
+	physp = memslot->arch.slot_phys;
+	npages = memslot->npages;
+	if (!physp)
+		return;
+	for (j = 0; j < npages; j++) {
+		if (!(physp[j] & KVMPPC_GOT_PAGE))
+			continue;
+		pfn = physp[j] >> PAGE_SHIFT;
+		page = pfn_to_page(pfn);
+		SetPageDirty(page);
+		put_page(page);
+	}
+}
+
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+			      struct kvm_memory_slot *dont)
+{
+	if (!dont || free->arch.rmap != dont->arch.rmap) {
+		vfree(free->arch.rmap);
+		free->arch.rmap = NULL;
 	}
+	if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
+		unpin_slot(free);
+		vfree(free->arch.slot_phys);
+		free->arch.slot_phys = NULL;
+	}
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+			       unsigned long npages)
+{
+	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+	if (!slot->arch.rmap)
+		return -ENOMEM;
+	slot->arch.slot_phys = NULL;
 
 	return 0;
 }
 
-static void unpin_slot(struct kvm *kvm, int slot_id)
+int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				      struct kvm_memory_slot *memslot,
+				      struct kvm_userspace_memory_region *mem)
 {
-	unsigned long *physp;
-	unsigned long j, npages, pfn;
-	struct page *page;
+	unsigned long *phys;
 
-	physp = kvm->arch.slot_phys[slot_id];
-	npages = kvm->arch.slot_npages[slot_id];
-	if (physp) {
-		spin_lock(&kvm->arch.slot_phys_lock);
-		for (j = 0; j < npages; j++) {
-			if (!(physp[j] & KVMPPC_GOT_PAGE))
-				continue;
-			pfn = physp[j] >> PAGE_SHIFT;
-			page = pfn_to_page(pfn);
-			SetPageDirty(page);
-			put_page(page);
-		}
-		kvm->arch.slot_phys[slot_id] = NULL;
-		spin_unlock(&kvm->arch.slot_phys_lock);
-		vfree(physp);
+	/* Allocate a slot_phys array if needed */
+	phys = memslot->arch.slot_phys;
+	if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
+		phys = vzalloc(memslot->npages * sizeof(unsigned long));
+		if (!phys)
+			return -ENOMEM;
+		memslot->arch.slot_phys = phys;
 	}
+
+	return 0;
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
@@ -1482,11 +1501,16 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
 		/* Initialize phys addrs of pages in RMO */
 		npages = ri->npages;
 		porder = __ilog2(npages);
-		physp = kvm->arch.slot_phys[memslot->id];
-		spin_lock(&kvm->arch.slot_phys_lock);
-		for (i = 0; i < npages; ++i)
-			physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
-		spin_unlock(&kvm->arch.slot_phys_lock);
+		physp = memslot->arch.slot_phys;
+		if (physp) {
+			if (npages > memslot->npages)
+				npages = memslot->npages;
+			spin_lock(&kvm->arch.slot_phys_lock);
+			for (i = 0; i < npages; ++i)
+				physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
+					porder;
+			spin_unlock(&kvm->arch.slot_phys_lock);
+		}
 	}
 
 	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
@@ -1547,12 +1571,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
-	unsigned long i;
-
-	if (!kvm->arch.using_mmu_notifiers)
-		for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-			unpin_slot(kvm, i);
-
 	if (kvm->arch.rma) {
 		kvm_release_rma(kvm->arch.rma);
 		kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fb0e821622d4..63eb94e63cc3 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -183,7 +183,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	rmap = &memslot->arch.rmap[slot_fn];
 
 	if (!kvm->arch.using_mmu_notifiers) {
-		physp = kvm->arch.slot_phys[memslot->id];
+		physp = memslot->arch.slot_phys;
 		if (!physp)
 			return H_PARAMETER;
 		physp += slot_fn;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b3c584f94cb3..fdadc9e57da2 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1220,7 +1220,19 @@ int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info)
 }
 #endif /* CONFIG_PPC64 */
 
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+			      struct kvm_memory_slot *dont)
+{
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+			       unsigned long npages)
+{
+	return 0;
+}
+
 int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				      struct kvm_memory_slot *memslot,
 				      struct kvm_userspace_memory_region *mem)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 5f0476a602d8..514405752988 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1438,7 +1438,19 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	return -ENOTSUPP;
 }
 
+void kvmppc_core_free_memslot(struct kvm_memory_slot *free,
+			      struct kvm_memory_slot *dont)
+{
+}
+
+int kvmppc_core_create_memslot(struct kvm_memory_slot *slot,
+			       unsigned long npages)
+{
+	return 0;
+}
+
 int kvmppc_core_prepare_memory_region(struct kvm *kvm,
+				      struct kvm_memory_slot *memslot,
 				      struct kvm_userspace_memory_region *mem)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 0ffd7d17adc7..33122dd89da9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -389,19 +389,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
-	if (!dont || free->arch.rmap != dont->arch.rmap) {
-		vfree(free->arch.rmap);
-		free->arch.rmap = NULL;
-	}
+	kvmppc_core_free_memslot(free, dont);
 }
 
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
-	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
-	if (!slot->arch.rmap)
-		return -ENOMEM;
-
-	return 0;
+	return kvmppc_core_create_memslot(slot, npages);
 }
 
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
@@ -410,7 +403,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
                                    struct kvm_userspace_memory_region *mem,
                                    int user_alloc)
 {
-	return kvmppc_core_prepare_memory_region(kvm, mem);
+	return kvmppc_core_prepare_memory_region(kvm, memslot, mem);
 }
 
 void kvm_arch_commit_memory_region(struct kvm *kvm,
-- 
cgit v1.2.3


From dfe49dbd1fc7310a4e0e2f83ae737cd7d34fa0cd Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 11 Sep 2012 13:28:18 +0000
Subject: KVM: PPC: Book3S HV: Handle memory slot deletion and modification
 correctly

This adds an implementation of kvm_arch_flush_shadow_memslot for
Book3S HV, and arranges for kvmppc_core_commit_memory_region to
flush the dirty log when modifying an existing slot.  With this,
we can handle deletion and modification of memory slots.

kvm_arch_flush_shadow_memslot calls kvmppc_core_flush_memslot, which
on Book3S HV now traverses the reverse map chains to remove any HPT
(hashed page table) entries referring to pages in the memslot.  This
gets called by generic code whenever deleting a memslot or changing
the guest physical address for a memslot.

We flush the dirty log in kvmppc_core_commit_memory_region for
consistency with what x86 does.  We only need to flush when an
existing memslot is being modified, because for a new memslot the
rmap array (which stores the dirty bits) is all zero, meaning that
every page is considered clean already, and when deleting a memslot
we obviously don't care about the dirty bits any more.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h    |  5 ++++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c   | 33 ++++++++++++++++++++++++++++-----
 arch/powerpc/kvm/book3s_hv.c          | 18 ++++++++++++++++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c   |  2 +-
 arch/powerpc/kvm/book3s_pr.c          |  7 ++++++-
 arch/powerpc/kvm/booke.c              |  7 ++++++-
 arch/powerpc/kvm/powerpc.c            |  3 ++-
 8 files changed, 64 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f0e0c6a66d97..ab738005d2ea 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -160,7 +160,7 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel);
 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
-			struct kvm_memory_slot *memslot);
+			struct kvm_memory_slot *memslot, unsigned long *map);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 41a00eae68c7..3fb980d293e5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -151,9 +151,12 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 				struct kvm_memory_slot *memslot,
 				struct kvm_userspace_memory_region *mem);
 extern void kvmppc_core_commit_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem);
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old);
 extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm,
 				      struct kvm_ppc_smmu_info *info);
+extern void kvmppc_core_flush_memslot(struct kvm *kvm,
+				      struct kvm_memory_slot *memslot);
 
 extern int kvmppc_bookehv_init(void);
 extern void kvmppc_bookehv_exit(void);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index a389cc62b16c..f598366e51c6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -851,7 +851,8 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		psize = hpte_page_size(hptep[0], ptel);
 		if ((hptep[0] & HPTE_V_VALID) &&
 		    hpte_rpn(ptel, psize) == gfn) {
-			hptep[0] |= HPTE_V_ABSENT;
+			if (kvm->arch.using_mmu_notifiers)
+				hptep[0] |= HPTE_V_ABSENT;
 			kvmppc_invalidate_hpte(kvm, hptep, i);
 			/* Harvest R and C */
 			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
@@ -878,6 +879,28 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return 0;
 }
 
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
+{
+	unsigned long *rmapp;
+	unsigned long gfn;
+	unsigned long n;
+
+	rmapp = memslot->arch.rmap;
+	gfn = memslot->base_gfn;
+	for (n = memslot->npages; n; --n) {
+		/*
+		 * Testing the present bit without locking is OK because
+		 * the memslot has been marked invalid already, and hence
+		 * no new HPTEs referencing this page can be created,
+		 * thus the present bit can't go from 0 to 1.
+		 */
+		if (*rmapp & KVMPPC_RMAP_PRESENT)
+			kvm_unmap_rmapp(kvm, rmapp, gfn);
+		++rmapp;
+		++gfn;
+	}
+}
+
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			 unsigned long gfn)
 {
@@ -1031,16 +1054,16 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
 	return ret;
 }
 
-long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
+long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot,
+			     unsigned long *map)
 {
 	unsigned long i;
-	unsigned long *rmapp, *map;
+	unsigned long *rmapp;
 
 	preempt_disable();
 	rmapp = memslot->arch.rmap;
-	map = memslot->dirty_bitmap;
 	for (i = 0; i < memslot->npages; ++i) {
-		if (kvm_test_clear_dirty(kvm, rmapp))
+		if (kvm_test_clear_dirty(kvm, rmapp) && map)
 			__set_bit_le(i, map);
 		++rmapp;
 	}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 817837de7362..38c7f1bc3495 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1288,7 +1288,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
 	n = kvm_dirty_bitmap_bytes(memslot);
 	memset(memslot->dirty_bitmap, 0, n);
 
-	r = kvmppc_hv_get_dirty_log(kvm, memslot);
+	r = kvmppc_hv_get_dirty_log(kvm, memslot, memslot->dirty_bitmap);
 	if (r)
 		goto out;
 
@@ -1378,8 +1378,22 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem)
+				      struct kvm_userspace_memory_region *mem,
+				      struct kvm_memory_slot old)
 {
+	unsigned long npages = mem->memory_size >> PAGE_SHIFT;
+	struct kvm_memory_slot *memslot;
+
+	if (npages && old.npages) {
+		/*
+		 * If modifying a memslot, reset all the rmap dirty bits.
+		 * If this is a new memslot, we don't need to do anything
+		 * since the rmap array starts out as all zeroes,
+		 * i.e. no pages are dirty.
+		 */
+		memslot = id_to_memslot(kvm->memslots, mem->slot);
+		kvmppc_hv_get_dirty_log(kvm, memslot, NULL);
+	}
 }
 
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 63eb94e63cc3..9955216477a4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -81,7 +81,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	ptel = rev->guest_rpte |= rcbits;
 	gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
 	memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
-	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+	if (!memslot)
 		return;
 
 	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index fdadc9e57da2..4d0667a810a4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1239,7 +1239,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem)
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 514405752988..3a6490fc6fcd 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1457,7 +1457,12 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm,
 }
 
 void kvmppc_core_commit_memory_region(struct kvm *kvm,
-				struct kvm_userspace_memory_region *mem)
+				struct kvm_userspace_memory_region *mem,
+				struct kvm_memory_slot old)
+{
+}
+
+void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 33122dd89da9..8443e23f3605 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -411,7 +411,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
                struct kvm_memory_slot old,
                int user_alloc)
 {
-	kvmppc_core_commit_memory_region(kvm, mem);
+	kvmppc_core_commit_memory_region(kvm, mem, old);
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
@@ -421,6 +421,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
 void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
 				   struct kvm_memory_slot *slot)
 {
+	kvmppc_core_flush_memslot(kvm, slot);
 }
 
 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
-- 
cgit v1.2.3


From a47d72f3613d5edfd8e752c9b804d7df35810649 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 20 Sep 2012 19:35:51 +0000
Subject: KVM: PPC: Book3S HV: Fix updates of vcpu->cpu

This removes the powerpc "generic" updates of vcpu->cpu in load and
put, and moves them to the various backends.

The reason is that "HV" KVM does its own sauce with that field
and the generic updates might corrupt it. The field contains the
CPU# of the -first- HW CPU of the core always for all the VCPU
threads of a core (the one that's online from a host Linux
perspective).

However, the preempt notifiers are going to be called on the
threads VCPUs when they are running (due to them sleeping on our
private waitqueue) causing unload to be called, potentially
clobbering the value.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c | 3 ++-
 arch/powerpc/kvm/booke.c     | 2 ++
 arch/powerpc/kvm/powerpc.c   | 2 --
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 4d0667a810a4..bf3ec5d66d8c 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -64,7 +64,7 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	svcpu->slb_max = to_book3s(vcpu)->slb_shadow_max;
 	svcpu_put(svcpu);
 #endif
-
+	vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
 	current->thread.kvm_shadow_vcpu = to_book3s(vcpu)->shadow_vcpu;
 #endif
@@ -84,6 +84,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 	kvmppc_giveup_ext(vcpu, MSR_FP);
 	kvmppc_giveup_ext(vcpu, MSR_VEC);
 	kvmppc_giveup_ext(vcpu, MSR_VSX);
+	vcpu->cpu = -1;
 }
 
 int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3a6490fc6fcd..69d047c22d20 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1509,12 +1509,14 @@ void kvmppc_decrementer_func(unsigned long data)
 
 void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
+	vcpu->cpu = smp_processor_id();
 	current->thread.kvm_vcpu = vcpu;
 }
 
 void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	current->thread.kvm_vcpu = NULL;
+	vcpu->cpu = -1;
 }
 
 int __init kvmppc_booke_init(void)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8443e23f3605..6002ea938a48 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -504,7 +504,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
 #endif
 	kvmppc_core_vcpu_load(vcpu, cpu);
-	vcpu->cpu = smp_processor_id();
 }
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
@@ -513,7 +512,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_BOOKE
 	vcpu->arch.vrsave = mfspr(SPRN_VRSAVE);
 #endif
-	vcpu->cpu = -1;
 }
 
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-- 
cgit v1.2.3


From 964ee98ccde0534548565a201827cf06d813180f Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 20 Sep 2012 19:36:32 +0000
Subject: KVM: PPC: Book3S HV: Remove bogus update of physical thread IDs

When making a vcpu non-runnable we incorrectly changed the
thread IDs of all other threads on the core, just remove that
code.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 38c7f1bc3495..c9ae3148c981 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -706,17 +706,11 @@ extern void xics_wake_cpu(int cpu);
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
 {
-	struct kvm_vcpu *v;
-
 	if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
 		return;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 	--vc->n_runnable;
 	++vc->n_busy;
-	/* decrement the physical thread id of each following vcpu */
-	v = vcpu;
-	list_for_each_entry_continue(v, &vc->runnable_threads, arch.run_list)
-		--v->arch.ptid;
 	list_del(&vcpu->arch.run_list);
 }
 
-- 
cgit v1.2.3


From 70bddfefbdcdbfdebd81d8b59ff8a7fa5d450ccc Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 20 Sep 2012 19:39:21 +0000
Subject: KVM: PPC: Book3S HV: Fix calculation of guest phys address for MMIO
 emulation

In the case where the host kernel is using a 64kB base page size and
the guest uses a 4k HPTE (hashed page table entry) to map an emulated
MMIO device, we were calculating the guest physical address wrongly.
We were calculating a gfn as the guest physical address shifted right
16 bits (PAGE_SHIFT) but then only adding back in 12 bits from the
effective address, since the HPTE had a 4k page size.  Thus the gpa
reported to userspace was missing 4 bits.

Instead, we now compute the guest physical address from the HPTE
without reference to the host page size, and then compute the gfn
by shifting the gpa right PAGE_SHIFT bits.

Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index f598366e51c6..7a4aae99ac5b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -571,7 +571,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long *hptep, hpte[3], r;
 	unsigned long mmu_seq, psize, pte_size;
-	unsigned long gfn, hva, pfn;
+	unsigned long gpa, gfn, hva, pfn;
 	struct kvm_memory_slot *memslot;
 	unsigned long *rmap;
 	struct revmap_entry *rev;
@@ -609,15 +609,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	/* Translate the logical address and get the page */
 	psize = hpte_page_size(hpte[0], r);
-	gfn = hpte_rpn(r, psize);
+	gpa = (r & HPTE_R_RPN & ~(psize - 1)) | (ea & (psize - 1));
+	gfn = gpa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(kvm, gfn);
 
 	/* No memslot means it's an emulated MMIO region */
-	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) {
-		unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1));
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
 					      dsisr & DSISR_ISSTORE);
-	}
 
 	if (!kvm->arch.using_mmu_notifiers)
 		return -EFAULT;		/* should never get here */
-- 
cgit v1.2.3


From e400e72f250d2567e89c9bafb47ab91e8d9a15a2 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Wed, 22 Aug 2012 15:04:23 +0000
Subject: KVM: PPC: e500: fix allocation size error on g2h_tlb1_map

We were only allocating half the bytes we need, which was made more
obvious by a recent fix to the memset in  clear_tlb1_bitmap().

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
Cc: stable@vger.kernel.org
---
 arch/powerpc/kvm/e500_tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 43489a8fa985..a27d134eef36 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1385,7 +1385,7 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500)
 	if (!vcpu_e500->gtlb_priv[1])
 		goto err;
 
-	vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(unsigned int) *
+	vcpu_e500->g2h_tlb1_map = kzalloc(sizeof(u64) *
 					  vcpu_e500->gtlb_params[1].entries,
 					  GFP_KERNEL);
 	if (!vcpu_e500->g2h_tlb1_map)
-- 
cgit v1.2.3


From adbb48a854bf8dee556dc42b96dd61503351a82d Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Wed, 22 Aug 2012 15:04:24 +0000
Subject: KVM: PPC: e500: MMU API: fix leak of shared_tlb_pages

This was found by kmemleak.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500_tlb.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index a27d134eef36..641f97847b95 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1134,6 +1134,8 @@ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500)
 		}
 
 		vcpu_e500->num_shared_tlb_pages = 0;
+
+		kfree(vcpu_e500->shared_tlb_pages);
 		vcpu_e500->shared_tlb_pages = NULL;
 	} else {
 		kfree(vcpu_e500->gtlb_arch);
-- 
cgit v1.2.3


From 5bd1cf118533aba41b3fbd4834e6362a9237db71 Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Wed, 22 Aug 2012 15:03:50 +0000
Subject: KVM: PPC: set IN_GUEST_MODE before checking requests

Avoid a race as described in the code comment.

Also remove a related smp_wmb() from booke's kvmppc_prepare_to_enter().
I can't see any reason for it, and the book3s_pr version doesn't have it.

Signed-off-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c   |  1 -
 arch/powerpc/kvm/powerpc.c | 14 +++++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 69d047c22d20..3d1f35dc7862 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -674,7 +674,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
 out:
 	vcpu->mode = OUTSIDE_GUEST_MODE;
-	smp_wmb();
 	return ret;
 }
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6002ea938a48..deb0d596d815 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -78,7 +78,16 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 			break;
 		}
 
+		vcpu->mode = IN_GUEST_MODE;
+
+		/*
+		 * Reading vcpu->requests must happen after setting vcpu->mode,
+		 * so we don't miss a request because the requester sees
+		 * OUTSIDE_GUEST_MODE and assumes we'll be checking requests
+		 * before next entering the guest (and thus doesn't IPI).
+		 */
 		smp_mb();
+
 		if (vcpu->requests) {
 			/* Make sure we process requests preemptable */
 			local_irq_enable();
@@ -111,11 +120,6 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 #endif
 
 		kvm_guest_enter();
-
-		/* Going into guest context! Yay! */
-		vcpu->mode = IN_GUEST_MODE;
-		smp_wmb();
-
 		break;
 	}
 
-- 
cgit v1.2.3


From a136a8bdc02fc14625ac45ee846cc646fc46597e Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 25 Sep 2012 20:31:56 +0000
Subject: KVM: PPC: Book3S: Get/set guest SPRs using the GET/SET_ONE_REG
 interface

This enables userspace to get and set various SPRs (special-purpose
registers) using the KVM_[GS]ET_ONE_REG ioctls.  With this, userspace
can get and set all the SPRs that are part of the guest state, either
through the KVM_[GS]ET_REGS ioctls, the KVM_[GS]ET_SREGS ioctls, or
the KVM_[GS]ET_ONE_REG ioctls.

The SPRs that are added here are:

- DABR:  Data address breakpoint register
- DSCR:  Data stream control register
- PURR:  Processor utilization of resources register
- SPURR: Scaled PURR
- DAR:   Data address register
- DSISR: Data storage interrupt status register
- AMR:   Authority mask register
- UAMOR: User authority mask override register
- MMCR0, MMCR1, MMCRA: Performance monitor unit control registers
- PMC1..PMC8: Performance monitor unit counter registers

In order to reduce code duplication between PR and HV KVM code, this
moves the kvm_vcpu_ioctl_[gs]et_one_reg functions into book3s.c and
centralizes the copying between user and kernel space there.  The
registers that are handled differently between PR and HV, and those
that exist only in one flavor, are handled in kvmppc_[gs]et_one_reg()
functions that are specific to each flavor.

Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: minimal style fixes]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm.h     | 21 +++++++++++
 arch/powerpc/include/asm/kvm_ppc.h | 32 ++++++++++++++++
 arch/powerpc/kvm/book3s.c          | 68 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c       | 76 ++++++++++++++++++++++++++++++++------
 arch/powerpc/kvm/book3s_pr.c       | 23 ++++++------
 5 files changed, 196 insertions(+), 24 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 3c14202a3c84..9557576a5325 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -338,5 +338,26 @@ struct kvm_book3e_206_tlb_params {
 #define KVM_REG_PPC_IAC4	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
 #define KVM_REG_PPC_DAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
 #define KVM_REG_PPC_DAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_PPC_DABR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_PPC_DSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
+#define KVM_REG_PPC_PURR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
+#define KVM_REG_PPC_SPURR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
+#define KVM_REG_PPC_DAR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
+#define KVM_REG_PPC_DSISR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
+#define KVM_REG_PPC_AMR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
+#define KVM_REG_PPC_UAMOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
+
+#define KVM_REG_PPC_MMCR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
+#define KVM_REG_PPC_MMCR1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
+#define KVM_REG_PPC_MMCRA	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+
+#define KVM_REG_PPC_PMC1	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
+#define KVM_REG_PPC_PMC2	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
+#define KVM_REG_PPC_PMC3	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
+#define KVM_REG_PPC_PMC4	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
+#define KVM_REG_PPC_PMC5	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
+#define KVM_REG_PPC_PMC6	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
+#define KVM_REG_PPC_PMC7	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
+#define KVM_REG_PPC_PMC8	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
 
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 3fb980d293e5..709f0ddae1f1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
+#include <linux/bug.h>
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/kvm_book3s.h>
 #else
@@ -196,6 +197,35 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
 	return r;
 }
 
+union kvmppc_one_reg {
+	u32	wval;
+	u64	dval;
+};
+
+#define one_reg_size(id)	\
+	(1ul << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+#define get_reg_val(id, reg)	({		\
+	union kvmppc_one_reg __u;		\
+	switch (one_reg_size(id)) {		\
+	case 4: __u.wval = (reg); break;	\
+	case 8: __u.dval = (reg); break;	\
+	default: BUG();				\
+	}					\
+	__u;					\
+})
+
+
+#define set_reg_val(id, val)	({		\
+	u64 __v;				\
+	switch (one_reg_size(id)) {		\
+	case 4: __v = (val).wval; break;	\
+	case 8: __v = (val).dval; break;	\
+	default: BUG();				\
+	}					\
+	__v;					\
+})
+
 void kvmppc_core_get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 int kvmppc_core_set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
@@ -204,6 +234,8 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs);
 
 int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
 int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg);
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *);
 
 void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid);
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index e94666566fa9..a5af28fc3a8f 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -485,6 +485,74 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 	return -ENOTSUPP;
 }
 
+int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r;
+	union kvmppc_one_reg val;
+	int size;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
+
+	r = kvmppc_get_one_reg(vcpu, reg->id, &val);
+
+	if (r == -EINVAL) {
+		r = 0;
+		switch (reg->id) {
+		case KVM_REG_PPC_DAR:
+			val = get_reg_val(reg->id, vcpu->arch.shared->dar);
+			break;
+		case KVM_REG_PPC_DSISR:
+			val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
+			break;
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+	if (r)
+		return r;
+
+	if (copy_to_user((char __user *)(unsigned long)reg->addr, &val, size))
+		r = -EFAULT;
+
+	return r;
+}
+
+int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+{
+	int r;
+	union kvmppc_one_reg val;
+	int size;
+
+	size = one_reg_size(reg->id);
+	if (size > sizeof(val))
+		return -EINVAL;
+
+	if (copy_from_user(&val, (char __user *)(unsigned long)reg->addr, size))
+		return -EFAULT;
+
+	r = kvmppc_set_one_reg(vcpu, reg->id, &val);
+
+	if (r == -EINVAL) {
+		r = 0;
+		switch (reg->id) {
+		case KVM_REG_PPC_DAR:
+			vcpu->arch.shared->dar = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_DSISR:
+			vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
+			break;
+		default:
+			r = -EINVAL;
+			break;
+		}
+	}
+
+	return r;
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
                                   struct kvm_translation *tr)
 {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c9ae3148c981..1cc6b77fa63d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -544,36 +544,88 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 {
-	int r = -EINVAL;
+	int r = 0;
+	long int i;
 
-	switch (reg->id) {
+	switch (id) {
 	case KVM_REG_PPC_HIOR:
-		r = put_user(0, (u64 __user *)reg->addr);
+		*val = get_reg_val(id, 0);
+		break;
+	case KVM_REG_PPC_DABR:
+		*val = get_reg_val(id, vcpu->arch.dabr);
+		break;
+	case KVM_REG_PPC_DSCR:
+		*val = get_reg_val(id, vcpu->arch.dscr);
+		break;
+	case KVM_REG_PPC_PURR:
+		*val = get_reg_val(id, vcpu->arch.purr);
+		break;
+	case KVM_REG_PPC_SPURR:
+		*val = get_reg_val(id, vcpu->arch.spurr);
+		break;
+	case KVM_REG_PPC_AMR:
+		*val = get_reg_val(id, vcpu->arch.amr);
+		break;
+	case KVM_REG_PPC_UAMOR:
+		*val = get_reg_val(id, vcpu->arch.uamor);
+		break;
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+		i = id - KVM_REG_PPC_MMCR0;
+		*val = get_reg_val(id, vcpu->arch.mmcr[i]);
+		break;
+	case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+		i = id - KVM_REG_PPC_PMC1;
+		*val = get_reg_val(id, vcpu->arch.pmc[i]);
 		break;
 	default:
+		r = -EINVAL;
 		break;
 	}
 
 	return r;
 }
 
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 {
-	int r = -EINVAL;
+	int r = 0;
+	long int i;
 
-	switch (reg->id) {
+	switch (id) {
 	case KVM_REG_PPC_HIOR:
-	{
-		u64 hior;
 		/* Only allow this to be set to zero */
-		r = get_user(hior, (u64 __user *)reg->addr);
-		if (!r && (hior != 0))
+		if (set_reg_val(id, *val))
 			r = -EINVAL;
 		break;
-	}
+	case KVM_REG_PPC_DABR:
+		vcpu->arch.dabr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_DSCR:
+		vcpu->arch.dscr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_PURR:
+		vcpu->arch.purr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_SPURR:
+		vcpu->arch.spurr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_AMR:
+		vcpu->arch.amr = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_UAMOR:
+		vcpu->arch.uamor = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRA:
+		i = id - KVM_REG_PPC_MMCR0;
+		vcpu->arch.mmcr[i] = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
+		i = id - KVM_REG_PPC_PMC1;
+		vcpu->arch.pmc[i] = set_reg_val(id, *val);
+		break;
 	default:
+		r = -EINVAL;
 		break;
 	}
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index bf3ec5d66d8c..c81109f3a376 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -945,34 +945,33 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 {
-	int r = -EINVAL;
+	int r = 0;
 
-	switch (reg->id) {
+	switch (id) {
 	case KVM_REG_PPC_HIOR:
-		r = copy_to_user((u64 __user *)(long)reg->addr,
-				&to_book3s(vcpu)->hior, sizeof(u64));
+		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
 	default:
+		r = -EINVAL;
 		break;
 	}
 
 	return r;
 }
 
-int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
+int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 {
-	int r = -EINVAL;
+	int r = 0;
 
-	switch (reg->id) {
+	switch (id) {
 	case KVM_REG_PPC_HIOR:
-		r = copy_from_user(&to_book3s(vcpu)->hior,
-				   (u64 __user *)(long)reg->addr, sizeof(u64));
-		if (!r)
-			to_book3s(vcpu)->hior_explicit = true;
+		to_book3s(vcpu)->hior = set_reg_val(id, *val);
+		to_book3s(vcpu)->hior_explicit = true;
 		break;
 	default:
+		r = -EINVAL;
 		break;
 	}
 
-- 
cgit v1.2.3


From a8bd19ef4dd49f0eef86a4a8eb43d60f967236b8 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 25 Sep 2012 20:32:30 +0000
Subject: KVM: PPC: Book3S: Get/set guest FP regs using the GET/SET_ONE_REG
 interface

This enables userspace to get and set all the guest floating-point
state using the KVM_[GS]ET_ONE_REG ioctls.  The floating-point state
includes all of the traditional floating-point registers and the
FPSCR (floating point status/control register), all the VMX/Altivec
vector registers and the VSCR (vector status/control register), and
on POWER7, the vector-scalar registers (note that each FP register
is the high-order half of the corresponding VSR).

Most of these are implemented in common Book 3S code, except for VSX
on POWER7.  Because HV and PR differ in how they store the FP and VSX
registers on POWER7, the code for these cases is not common.  On POWER7,
the FP registers are the upper halves of the VSX registers vsr0 - vsr31.
PR KVM stores vsr0 - vsr31 in two halves, with the upper halves in the
arch.fpr[] array and the lower halves in the arch.vsr[] array, whereas
HV KVM on POWER7 stores the whole VSX register in arch.vsr[].

Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix whitespace, vsx compilation]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm.h     | 20 ++++++++++++++++
 arch/powerpc/include/asm/kvm_ppc.h |  2 ++
 arch/powerpc/kvm/book3s.c          | 48 ++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c       | 42 +++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_pr.c       | 26 +++++++++++++++++++++
 5 files changed, 138 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 9557576a5325..1466975129c7 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -360,4 +360,24 @@ struct kvm_book3e_206_tlb_params {
 #define KVM_REG_PPC_PMC7	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
 #define KVM_REG_PPC_PMC8	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
 
+/* 32 floating-point registers */
+#define KVM_REG_PPC_FPR0	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
+#define KVM_REG_PPC_FPR(n)	(KVM_REG_PPC_FPR0 + (n))
+#define KVM_REG_PPC_FPR31	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
+
+/* 32 VMX/Altivec vector registers */
+#define KVM_REG_PPC_VR0		(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
+#define KVM_REG_PPC_VR(n)	(KVM_REG_PPC_VR0 + (n))
+#define KVM_REG_PPC_VR31	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
+
+/* 32 double-width FP registers for VSX */
+/* High-order halves overlap with FP regs */
+#define KVM_REG_PPC_VSR0	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
+#define KVM_REG_PPC_VSR(n)	(KVM_REG_PPC_VSR0 + (n))
+#define KVM_REG_PPC_VSR31	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
+
+/* FP and vector status/control registers */
+#define KVM_REG_PPC_FPSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+#define KVM_REG_PPC_VSCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 709f0ddae1f1..51604a16c8a5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -200,6 +200,8 @@ static inline u32 kvmppc_set_field(u64 inst, int msb, int lsb, int value)
 union kvmppc_one_reg {
 	u32	wval;
 	u64	dval;
+	vector128 vval;
+	u64	vsxval[2];
 };
 
 #define one_reg_size(id)	\
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a5af28fc3a8f..a4b645285240 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -490,6 +490,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	int r;
 	union kvmppc_one_reg val;
 	int size;
+	long int i;
 
 	size = one_reg_size(reg->id);
 	if (size > sizeof(val))
@@ -506,6 +507,29 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_DSISR:
 			val = get_reg_val(reg->id, vcpu->arch.shared->dsisr);
 			break;
+		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+			i = reg->id - KVM_REG_PPC_FPR0;
+			val = get_reg_val(reg->id, vcpu->arch.fpr[i]);
+			break;
+		case KVM_REG_PPC_FPSCR:
+			val = get_reg_val(reg->id, vcpu->arch.fpscr);
+			break;
+#ifdef CONFIG_ALTIVEC
+		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			val.vval = vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0];
+			break;
+		case KVM_REG_PPC_VSCR:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			val = get_reg_val(reg->id, vcpu->arch.vscr.u[3]);
+			break;
+#endif /* CONFIG_ALTIVEC */
 		default:
 			r = -EINVAL;
 			break;
@@ -525,6 +549,7 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 	int r;
 	union kvmppc_one_reg val;
 	int size;
+	long int i;
 
 	size = one_reg_size(reg->id);
 	if (size > sizeof(val))
@@ -544,6 +569,29 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_DSISR:
 			vcpu->arch.shared->dsisr = set_reg_val(reg->id, val);
 			break;
+		case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+			i = reg->id - KVM_REG_PPC_FPR0;
+			vcpu->arch.fpr[i] = set_reg_val(reg->id, val);
+			break;
+		case KVM_REG_PPC_FPSCR:
+			vcpu->arch.fpscr = set_reg_val(reg->id, val);
+			break;
+#ifdef CONFIG_ALTIVEC
+		case KVM_REG_PPC_VR0 ... KVM_REG_PPC_VR31:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vr[reg->id - KVM_REG_PPC_VR0] = val.vval;
+			break;
+		case KVM_REG_PPC_VSCR:
+			if (!cpu_has_feature(CPU_FTR_ALTIVEC)) {
+				r = -ENXIO;
+				break;
+			}
+			vcpu->arch.vscr.u[3] = set_reg_val(reg->id, val);
+			break;
+#endif /* CONFIG_ALTIVEC */
 		default:
 			r = -EINVAL;
 			break;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1cc6b77fa63d..94ec0e30969d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -579,6 +579,27 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		i = id - KVM_REG_PPC_PMC1;
 		*val = get_reg_val(id, vcpu->arch.pmc[i]);
 		break;
+#ifdef CONFIG_VSX
+	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+		if (cpu_has_feature(CPU_FTR_VSX)) {
+			/* VSX => FP reg i is stored in arch.vsr[2*i] */
+			long int i = id - KVM_REG_PPC_FPR0;
+			*val = get_reg_val(id, vcpu->arch.vsr[2 * i]);
+		} else {
+			/* let generic code handle it */
+			r = -EINVAL;
+		}
+		break;
+	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+		if (cpu_has_feature(CPU_FTR_VSX)) {
+			long int i = id - KVM_REG_PPC_VSR0;
+			val->vsxval[0] = vcpu->arch.vsr[2 * i];
+			val->vsxval[1] = vcpu->arch.vsr[2 * i + 1];
+		} else {
+			r = -ENXIO;
+		}
+		break;
+#endif /* CONFIG_VSX */
 	default:
 		r = -EINVAL;
 		break;
@@ -624,6 +645,27 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		i = id - KVM_REG_PPC_PMC1;
 		vcpu->arch.pmc[i] = set_reg_val(id, *val);
 		break;
+#ifdef CONFIG_VSX
+	case KVM_REG_PPC_FPR0 ... KVM_REG_PPC_FPR31:
+		if (cpu_has_feature(CPU_FTR_VSX)) {
+			/* VSX => FP reg i is stored in arch.vsr[2*i] */
+			long int i = id - KVM_REG_PPC_FPR0;
+			vcpu->arch.vsr[2 * i] = set_reg_val(id, *val);
+		} else {
+			/* let generic code handle it */
+			r = -EINVAL;
+		}
+		break;
+	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31:
+		if (cpu_has_feature(CPU_FTR_VSX)) {
+			long int i = id - KVM_REG_PPC_VSR0;
+			vcpu->arch.vsr[2 * i] = val->vsxval[0];
+			vcpu->arch.vsr[2 * i + 1] = val->vsxval[1];
+		} else {
+			r = -ENXIO;
+		}
+		break;
+#endif /* CONFIG_VSX */
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c81109f3a376..b853696b6d8e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -953,6 +953,19 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 	case KVM_REG_PPC_HIOR:
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
+#ifdef CONFIG_VSX
+	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
+		long int i = id - KVM_REG_PPC_VSR0;
+
+		if (!cpu_has_feature(CPU_FTR_VSX)) {
+			r = -ENXIO;
+			break;
+		}
+		val->vsxval[0] = vcpu->arch.fpr[i];
+		val->vsxval[1] = vcpu->arch.vsr[i];
+		break;
+	}
+#endif /* CONFIG_VSX */
 	default:
 		r = -EINVAL;
 		break;
@@ -970,6 +983,19 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		to_book3s(vcpu)->hior = set_reg_val(id, *val);
 		to_book3s(vcpu)->hior_explicit = true;
 		break;
+#ifdef CONFIG_VSX
+	case KVM_REG_PPC_VSR0 ... KVM_REG_PPC_VSR31: {
+		long int i = id - KVM_REG_PPC_VSR0;
+
+		if (!cpu_has_feature(CPU_FTR_VSX)) {
+			r = -ENXIO;
+			break;
+		}
+		vcpu->arch.fpr[i] = val->vsxval[0];
+		vcpu->arch.vsr[i] = val->vsxval[1];
+		break;
+	}
+#endif /* CONFIG_VSX */
 	default:
 		r = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 55b665b0263ae88a776071306ef1eee4b769016b Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 25 Sep 2012 20:33:06 +0000
Subject: KVM: PPC: Book3S HV: Provide a way for userspace to get/set per-vCPU
 areas

The PAPR paravirtualization interface lets guests register three
different types of per-vCPU buffer areas in its memory for communication
with the hypervisor.  These are called virtual processor areas (VPAs).
Currently the hypercalls to register and unregister VPAs are handled
by KVM in the kernel, and userspace has no way to know about or save
and restore these registrations across a migration.

This adds "register" codes for these three areas that userspace can
use with the KVM_GET/SET_ONE_REG ioctls to see what addresses have
been registered, and to register or unregister them.  This will be
needed for guest hibernation and migration, and is also needed so
that userspace can unregister them on reset (otherwise we corrupt
guest memory after reboot by writing to the VPAs registered by the
previous kernel).

The "register" for the VPA is a 64-bit value containing the address,
since the length of the VPA is fixed.  The "registers" for the SLB
shadow buffer and dispatch trace log (DTL) are 128 bits long,
consisting of the guest physical address in the high (first) 64 bits
and the length in the low 64 bits.

This also fixes a bug where we were calling init_vpa unconditionally,
leading to an oops when unregistering the VPA.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm.h     |  6 ++++
 arch/powerpc/include/asm/kvm_ppc.h |  4 +++
 arch/powerpc/kvm/book3s_hv.c       | 64 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm.h b/arch/powerpc/include/asm/kvm.h
index 1466975129c7..b89ae4db45ce 100644
--- a/arch/powerpc/include/asm/kvm.h
+++ b/arch/powerpc/include/asm/kvm.h
@@ -380,4 +380,10 @@ struct kvm_book3e_206_tlb_params {
 #define KVM_REG_PPC_FPSCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
 #define KVM_REG_PPC_VSCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
 
+/* Virtual processor areas */
+/* For SLB & DTL, address in high (first) half, length in low half */
+#define KVM_REG_PPC_VPA_ADDR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
+#define KVM_REG_PPC_VPA_SLB	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
+#define KVM_REG_PPC_VPA_DTL	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 51604a16c8a5..609cca3e9426 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -202,6 +202,10 @@ union kvmppc_one_reg {
 	u64	dval;
 	vector128 vval;
 	u64	vsxval[2];
+	struct {
+		u64	addr;
+		u64	length;
+	}	vpaval;
 };
 
 #define one_reg_size(id)	\
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 94ec0e30969d..9a15da76e56b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -143,6 +143,22 @@ static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
 	vpa->yield_count = 1;
 }
 
+static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
+		   unsigned long addr, unsigned long len)
+{
+	/* check address is cacheline aligned */
+	if (addr & (L1_CACHE_BYTES - 1))
+		return -EINVAL;
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	if (v->next_gpa != addr || v->len != len) {
+		v->next_gpa = addr;
+		v->len = addr ? len : 0;
+		v->update_pending = 1;
+	}
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return 0;
+}
+
 /* Length for a per-processor buffer is passed in at offset 4 in the buffer */
 struct reg_vpa {
 	u32 dummy;
@@ -321,7 +337,8 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 	spin_lock(&vcpu->arch.vpa_update_lock);
 	if (vcpu->arch.vpa.update_pending) {
 		kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
-		init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
+		if (vcpu->arch.vpa.pinned_addr)
+			init_vpa(vcpu, vcpu->arch.vpa.pinned_addr);
 	}
 	if (vcpu->arch.dtl.update_pending) {
 		kvmppc_update_vpa(vcpu, &vcpu->arch.dtl);
@@ -600,6 +617,23 @@ int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		}
 		break;
 #endif /* CONFIG_VSX */
+	case KVM_REG_PPC_VPA_ADDR:
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		*val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		break;
+	case KVM_REG_PPC_VPA_SLB:
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		val->vpaval.addr = vcpu->arch.slb_shadow.next_gpa;
+		val->vpaval.length = vcpu->arch.slb_shadow.len;
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		break;
+	case KVM_REG_PPC_VPA_DTL:
+		spin_lock(&vcpu->arch.vpa_update_lock);
+		val->vpaval.addr = vcpu->arch.dtl.next_gpa;
+		val->vpaval.length = vcpu->arch.dtl.len;
+		spin_unlock(&vcpu->arch.vpa_update_lock);
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -612,6 +646,7 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 {
 	int r = 0;
 	long int i;
+	unsigned long addr, len;
 
 	switch (id) {
 	case KVM_REG_PPC_HIOR:
@@ -666,6 +701,33 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		}
 		break;
 #endif /* CONFIG_VSX */
+	case KVM_REG_PPC_VPA_ADDR:
+		addr = set_reg_val(id, *val);
+		r = -EINVAL;
+		if (!addr && (vcpu->arch.slb_shadow.next_gpa ||
+			      vcpu->arch.dtl.next_gpa))
+			break;
+		r = set_vpa(vcpu, &vcpu->arch.vpa, addr, sizeof(struct lppaca));
+		break;
+	case KVM_REG_PPC_VPA_SLB:
+		addr = val->vpaval.addr;
+		len = val->vpaval.length;
+		r = -EINVAL;
+		if (addr && !vcpu->arch.vpa.next_gpa)
+			break;
+		r = set_vpa(vcpu, &vcpu->arch.slb_shadow, addr, len);
+		break;
+	case KVM_REG_PPC_VPA_DTL:
+		addr = val->vpaval.addr;
+		len = val->vpaval.length;
+		r = -EINVAL;
+		if (len < sizeof(struct dtl_entry))
+			break;
+		if (addr && !vcpu->arch.vpa.next_gpa)
+			break;
+		len -= len % sizeof(struct dtl_entry);
+		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
+		break;
 	default:
 		r = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 12ecd9570d8941c15602a11725ec9b0ede48d6c2 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Sat, 4 Aug 2012 23:52:33 +0000
Subject: arch/powerpc/kvm/e500_tlb.c: fix error return code

Convert a 0 error return code to a negative one, as returned elsewhere in the
function.

A new label is also added to avoid freeing things that are known to not yet
be allocated.

A simplified version of the semantic match that finds the first problem is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
identifier ret;
expression e,e1,e2,e3,e4,x;
@@

(
if (\(ret != 0\|ret < 0\) || ...) { ... return ...; }
|
ret = 0
)
... when != ret = e1
*x = \(kmalloc\|kzalloc\|kcalloc\|devm_kzalloc\|ioremap\|ioremap_nocache\|devm_ioremap\|devm_ioremap_nocache\)(...);
... when != x = e2
    when != ret = e3
*if (x == NULL || ...)
{
  ... when != ret = e4
*  return ret;
}
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500_tlb.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 641f97847b95..c73389477d17 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -1233,21 +1233,27 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 	}
 
 	virt = vmap(pages, num_pages, VM_MAP, PAGE_KERNEL);
-	if (!virt)
+	if (!virt) {
+		ret = -ENOMEM;
 		goto err_put_page;
+	}
 
 	privs[0] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[0],
 			   GFP_KERNEL);
 	privs[1] = kzalloc(sizeof(struct tlbe_priv) * params.tlb_sizes[1],
 			   GFP_KERNEL);
 
-	if (!privs[0] || !privs[1])
-		goto err_put_page;
+	if (!privs[0] || !privs[1]) {
+		ret = -ENOMEM;
+		goto err_privs;
+	}
 
 	g2h_bitmap = kzalloc(sizeof(u64) * params.tlb_sizes[1],
 	                     GFP_KERNEL);
-	if (!g2h_bitmap)
-		goto err_put_page;
+	if (!g2h_bitmap) {
+		ret = -ENOMEM;
+		goto err_privs;
+	}
 
 	free_gtlb(vcpu_e500);
 
@@ -1287,10 +1293,11 @@ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu,
 	kvmppc_recalc_tlb1map_range(vcpu_e500);
 	return 0;
 
-err_put_page:
+err_privs:
 	kfree(privs[0]);
 	kfree(privs[1]);
 
+err_put_page:
 	for (i = 0; i < num_pages; i++)
 		put_page(pages[i]);
 
-- 
cgit v1.2.3


From 8ca40a70a70988c0bdea106c894843f763ca2989 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Sun, 14 Oct 2012 23:10:18 -0400
Subject: KVM: Take kvm instead of vcpu to mmu_notifier_retry

The mmu_notifier_retry is not specific to any vcpu (and never will be)
so only take struct kvm as a parameter.

The motivation is the ARM mmu code that needs to call this from
somewhere where we long let go of the vcpu pointer.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7a4aae99ac5b..2a89a36e7263 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -710,7 +710,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 	/* Check if we might have been invalidated; let the guest retry if so */
 	ret = RESUME_GUEST;
-	if (mmu_notifier_retry(vcpu, mmu_seq)) {
+	if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
 		unlock_rmap(rmap);
 		goto out_unlock;
 	}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 9955216477a4..5e06e3153888 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -297,7 +297,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
 		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(vcpu, mmu_seq)) {
+		    mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
-- 
cgit v1.2.3


From 81c52c56e2b43589091ee29038bcf793d3f184ab Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 16 Oct 2012 20:10:59 +0800
Subject: KVM: do not treat noslot pfn as a error pfn

This patch filters noslot pfn out from error pfns based on Marcelo comment:
noslot pfn is not a error pfn

After this patch,
- is_noslot_pfn indicates that the gfn is not in slot
- is_error_pfn indicates that the gfn is in slot but the error is occurred
  when translate the gfn to pfn
- is_error_noslot_pfn indicates that the pfn either it is error pfns or it
  is noslot pfn
And is_invalid_pfn can be removed, it makes the code more clean

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/book3s_32_mmu_host.c | 2 +-
 arch/powerpc/kvm/book3s_64_mmu_host.c | 2 +-
 arch/powerpc/kvm/e500_tlb.c           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index d1107a9b5d13..00e619bf608e 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -155,7 +155,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
-	if (is_error_pfn(hpaddr)) {
+	if (is_error_noslot_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
 				 orig_pte->eaddr);
 		r = -EINVAL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index d0205a545a81..ead58e317294 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -93,7 +93,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
 
 	/* Get host physical address for gpa */
 	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT);
-	if (is_error_pfn(hpaddr)) {
+	if (is_error_noslot_pfn(hpaddr)) {
 		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", orig_pte->eaddr);
 		r = -EINVAL;
 		goto out;
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c73389477d17..6305ee692ef7 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -524,7 +524,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	if (likely(!pfnmap)) {
 		unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
 		pfn = gfn_to_pfn_memslot(slot, gfn);
-		if (is_error_pfn(pfn)) {
+		if (is_error_noslot_pfn(pfn)) {
 			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
 					(long)gfn);
 			return;
-- 
cgit v1.2.3


From e43a028752fed049e4bd94ef895542f96d79fa74 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sat, 6 Oct 2012 03:56:35 +0200
Subject: KVM: PPC: 44x: fix DCR read/write

When remembering the direction of a DCR transaction, we should write
to the same variable that we interpret on later when doing vcpu_run
again.

Signed-off-by: Alexander Graf <agraf@suse.de>
Cc: stable@vger.kernel.org
---
 arch/powerpc/kvm/44x_emulate.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
index 1a793c4c4a67..35ec0a8547da 100644
--- a/arch/powerpc/kvm/44x_emulate.c
+++ b/arch/powerpc/kvm/44x_emulate.c
@@ -46,6 +46,7 @@ static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
 		vcpu->run->dcr.dcrn = dcrn;
 		vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
 		vcpu->run->dcr.is_write = 1;
+		vcpu->arch.dcr_is_write = 1;
 		vcpu->arch.dcr_needed = 1;
 		kvmppc_account_exit(vcpu, DCR_EXITS);
 		return EMULATE_DO_DCR;
@@ -80,6 +81,7 @@ static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
 		vcpu->run->dcr.dcrn = dcrn;
 		vcpu->run->dcr.data =  0;
 		vcpu->run->dcr.is_write = 0;
+		vcpu->arch.dcr_is_write = 0;
 		vcpu->arch.io_gpr = rt;
 		vcpu->arch.dcr_needed = 1;
 		kvmppc_account_exit(vcpu, DCR_EXITS);
-- 
cgit v1.2.3


From 388cf9ee3c751c3a4cf8776987143354d6d8c797 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sat, 6 Oct 2012 23:19:01 +0200
Subject: KVM: PPC: Move mtspr/mfspr emulation into own functions

The mtspr/mfspr emulation code became quite big over time. Move it
into its own function so things stay more readable.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/emulate.c | 221 +++++++++++++++++++++++++--------------------
 1 file changed, 121 insertions(+), 100 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index ee04abaefe23..b0855e5d8905 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -131,6 +131,125 @@ u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb)
 	return vcpu->arch.dec - jd;
 }
 
+static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs)
+{
+	enum emulation_result emulated = EMULATE_DONE;
+	ulong spr_val = kvmppc_get_gpr(vcpu, rs);
+
+	switch (sprn) {
+	case SPRN_SRR0:
+		vcpu->arch.shared->srr0 = spr_val;
+		break;
+	case SPRN_SRR1:
+		vcpu->arch.shared->srr1 = spr_val;
+		break;
+
+	/* XXX We need to context-switch the timebase for
+	 * watchdog and FIT. */
+	case SPRN_TBWL: break;
+	case SPRN_TBWU: break;
+
+	case SPRN_MSSSR0: break;
+
+	case SPRN_DEC:
+		vcpu->arch.dec = spr_val;
+		kvmppc_emulate_dec(vcpu);
+		break;
+
+	case SPRN_SPRG0:
+		vcpu->arch.shared->sprg0 = spr_val;
+		break;
+	case SPRN_SPRG1:
+		vcpu->arch.shared->sprg1 = spr_val;
+		break;
+	case SPRN_SPRG2:
+		vcpu->arch.shared->sprg2 = spr_val;
+		break;
+	case SPRN_SPRG3:
+		vcpu->arch.shared->sprg3 = spr_val;
+		break;
+
+	default:
+		emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
+						     spr_val);
+		if (emulated == EMULATE_FAIL)
+			printk(KERN_INFO "mtspr: unknown spr "
+				"0x%x\n", sprn);
+		break;
+	}
+
+	kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+
+	return emulated;
+}
+
+static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
+{
+	enum emulation_result emulated = EMULATE_DONE;
+	ulong spr_val = 0;
+
+	switch (sprn) {
+	case SPRN_SRR0:
+		spr_val = vcpu->arch.shared->srr0;
+		break;
+	case SPRN_SRR1:
+		spr_val = vcpu->arch.shared->srr1;
+		break;
+	case SPRN_PVR:
+		spr_val = vcpu->arch.pvr;
+		break;
+	case SPRN_PIR:
+		spr_val = vcpu->vcpu_id;
+		break;
+	case SPRN_MSSSR0:
+		spr_val = 0;
+		break;
+
+	/* Note: mftb and TBRL/TBWL are user-accessible, so
+	 * the guest can always access the real TB anyways.
+	 * In fact, we probably will never see these traps. */
+	case SPRN_TBWL:
+		spr_val = get_tb() >> 32;
+		break;
+	case SPRN_TBWU:
+		spr_val = get_tb();
+		break;
+
+	case SPRN_SPRG0:
+		spr_val = vcpu->arch.shared->sprg0;
+		break;
+	case SPRN_SPRG1:
+		spr_val = vcpu->arch.shared->sprg1;
+		break;
+	case SPRN_SPRG2:
+		spr_val = vcpu->arch.shared->sprg2;
+		break;
+	case SPRN_SPRG3:
+		spr_val = vcpu->arch.shared->sprg3;
+		break;
+	/* Note: SPRG4-7 are user-readable, so we don't get
+	 * a trap. */
+
+	case SPRN_DEC:
+		spr_val = kvmppc_get_dec(vcpu, get_tb());
+		break;
+	default:
+		emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
+						     &spr_val);
+		if (unlikely(emulated == EMULATE_FAIL)) {
+			printk(KERN_INFO "mfspr: unknown spr "
+				"0x%x\n", sprn);
+		}
+		break;
+	}
+
+	if (emulated == EMULATE_DONE)
+		kvmppc_set_gpr(vcpu, rt, spr_val);
+	kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+
+	return emulated;
+}
+
 /* XXX to do:
  * lhax
  * lhaux
@@ -156,7 +275,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	int sprn = get_sprn(inst);
 	enum emulation_result emulated = EMULATE_DONE;
 	int advance = 1;
-	ulong spr_val = 0;
 
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
@@ -236,62 +354,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			break;
 
 		case OP_31_XOP_MFSPR:
-			switch (sprn) {
-			case SPRN_SRR0:
-				spr_val = vcpu->arch.shared->srr0;
-				break;
-			case SPRN_SRR1:
-				spr_val = vcpu->arch.shared->srr1;
-				break;
-			case SPRN_PVR:
-				spr_val = vcpu->arch.pvr;
-				break;
-			case SPRN_PIR:
-				spr_val = vcpu->vcpu_id;
-				break;
-			case SPRN_MSSSR0:
-				spr_val = 0;
-				break;
-
-			/* Note: mftb and TBRL/TBWL are user-accessible, so
-			 * the guest can always access the real TB anyways.
-			 * In fact, we probably will never see these traps. */
-			case SPRN_TBWL:
-				spr_val = get_tb() >> 32;
-				break;
-			case SPRN_TBWU:
-				spr_val = get_tb();
-				break;
-
-			case SPRN_SPRG0:
-				spr_val = vcpu->arch.shared->sprg0;
-				break;
-			case SPRN_SPRG1:
-				spr_val = vcpu->arch.shared->sprg1;
-				break;
-			case SPRN_SPRG2:
-				spr_val = vcpu->arch.shared->sprg2;
-				break;
-			case SPRN_SPRG3:
-				spr_val = vcpu->arch.shared->sprg3;
-				break;
-			/* Note: SPRG4-7 are user-readable, so we don't get
-			 * a trap. */
-
-			case SPRN_DEC:
-				spr_val = kvmppc_get_dec(vcpu, get_tb());
-				break;
-			default:
-				emulated = kvmppc_core_emulate_mfspr(vcpu, sprn,
-								     &spr_val);
-				if (unlikely(emulated == EMULATE_FAIL)) {
-					printk(KERN_INFO "mfspr: unknown spr "
-						"0x%x\n", sprn);
-				}
-				break;
-			}
-			kvmppc_set_gpr(vcpu, rt, spr_val);
-			kvmppc_set_exit_type(vcpu, EMULATED_MFSPR_EXITS);
+			emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
 			break;
 
 		case OP_31_XOP_STHX:
@@ -308,49 +371,7 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			break;
 
 		case OP_31_XOP_MTSPR:
-			spr_val = kvmppc_get_gpr(vcpu, rs);
-			switch (sprn) {
-			case SPRN_SRR0:
-				vcpu->arch.shared->srr0 = spr_val;
-				break;
-			case SPRN_SRR1:
-				vcpu->arch.shared->srr1 = spr_val;
-				break;
-
-			/* XXX We need to context-switch the timebase for
-			 * watchdog and FIT. */
-			case SPRN_TBWL: break;
-			case SPRN_TBWU: break;
-
-			case SPRN_MSSSR0: break;
-
-			case SPRN_DEC:
-				vcpu->arch.dec = spr_val;
-				kvmppc_emulate_dec(vcpu);
-				break;
-
-			case SPRN_SPRG0:
-				vcpu->arch.shared->sprg0 = spr_val;
-				break;
-			case SPRN_SPRG1:
-				vcpu->arch.shared->sprg1 = spr_val;
-				break;
-			case SPRN_SPRG2:
-				vcpu->arch.shared->sprg2 = spr_val;
-				break;
-			case SPRN_SPRG3:
-				vcpu->arch.shared->sprg3 = spr_val;
-				break;
-
-			default:
-				emulated = kvmppc_core_emulate_mtspr(vcpu, sprn,
-								     spr_val);
-				if (emulated == EMULATE_FAIL)
-					printk(KERN_INFO "mtspr: unknown spr "
-						"0x%x\n", sprn);
-				break;
-			}
-			kvmppc_set_exit_type(vcpu, EMULATED_MTSPR_EXITS);
+			emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
 			break;
 
 		case OP_31_XOP_DCBI:
-- 
cgit v1.2.3


From c99ec973a63e2249020d6d93a46d7572432da6a2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sat, 27 Oct 2012 19:26:14 +0200
Subject: PPC: ePAPR: Convert header to uapi

The new uapi framework splits kernel internal and user space exported
bits of header files more cleanly. Adjust the ePAPR header accordingly.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/Kbuild              |  1 -
 arch/powerpc/include/asm/epapr_hcalls.h      | 55 +---------------
 arch/powerpc/include/uapi/asm/Kbuild         |  1 +
 arch/powerpc/include/uapi/asm/epapr_hcalls.h | 98 ++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+), 55 deletions(-)
 create mode 100644 arch/powerpc/include/uapi/asm/epapr_hcalls.h

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 13d6b7bf3b69..7e313f1ed183 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -34,6 +34,5 @@ header-y += termios.h
 header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
-header-y += epapr_hcalls.h
 
 generic-y += rwsem.h
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index b8d94459a929..58997afcd085 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -50,60 +50,7 @@
 #ifndef _EPAPR_HCALLS_H
 #define _EPAPR_HCALLS_H
 
-#define EV_BYTE_CHANNEL_SEND		1
-#define EV_BYTE_CHANNEL_RECEIVE		2
-#define EV_BYTE_CHANNEL_POLL		3
-#define EV_INT_SET_CONFIG		4
-#define EV_INT_GET_CONFIG		5
-#define EV_INT_SET_MASK			6
-#define EV_INT_GET_MASK			7
-#define EV_INT_IACK			9
-#define EV_INT_EOI			10
-#define EV_INT_SEND_IPI			11
-#define EV_INT_SET_TASK_PRIORITY	12
-#define EV_INT_GET_TASK_PRIORITY	13
-#define EV_DOORBELL_SEND		14
-#define EV_MSGSND			15
-#define EV_IDLE				16
-
-/* vendor ID: epapr */
-#define EV_LOCAL_VENDOR_ID		0	/* for private use */
-#define EV_EPAPR_VENDOR_ID		1
-#define EV_FSL_VENDOR_ID		2	/* Freescale Semiconductor */
-#define EV_IBM_VENDOR_ID		3	/* IBM */
-#define EV_GHS_VENDOR_ID		4	/* Green Hills Software */
-#define EV_ENEA_VENDOR_ID		5	/* Enea */
-#define EV_WR_VENDOR_ID			6	/* Wind River Systems */
-#define EV_AMCC_VENDOR_ID		7	/* Applied Micro Circuits */
-#define EV_KVM_VENDOR_ID		42	/* KVM */
-
-/* The max number of bytes that a byte channel can send or receive per call */
-#define EV_BYTE_CHANNEL_MAX_BYTES	16
-
-
-#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
-#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
-
-/* epapr return codes */
-#define EV_SUCCESS		0
-#define EV_EPERM		1	/* Operation not permitted */
-#define EV_ENOENT		2	/*  Entry Not Found */
-#define EV_EIO			3	/* I/O error occured */
-#define EV_EAGAIN		4	/* The operation had insufficient
-					 * resources to complete and should be
-					 * retried
-					 */
-#define EV_ENOMEM		5	/* There was insufficient memory to
-					 * complete the operation */
-#define EV_EFAULT		6	/* Bad guest address */
-#define EV_ENODEV		7	/* No such device */
-#define EV_EINVAL		8	/* An argument supplied to the hcall
-					   was out of range or invalid */
-#define EV_INTERNAL		9	/* An internal error occured */
-#define EV_CONFIG		10	/* A configuration error was detected */
-#define EV_INVALID_STATE	11	/* The object is in an invalid state */
-#define EV_UNIMPLEMENTED	12	/* Unimplemented hypercall */
-#define EV_BUFFER_OVERFLOW	13	/* Caller-supplied buffer too small */
+#include <uapi/asm/epapr_hcalls.h>
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
index baebb3da1d44..e6b5be86e4fa 100644
--- a/arch/powerpc/include/uapi/asm/Kbuild
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -1,3 +1,4 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
+header-y += epapr_hcalls.h
diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
new file mode 100644
index 000000000000..046c79364f83
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -0,0 +1,98 @@
+/*
+ * ePAPR hcall interface
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is provided under a dual BSD/GPL license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _UAPI__EPAPR_HCALLS_H
+#define _UAPI__EPAPR_HCALLS_H
+
+#define EV_BYTE_CHANNEL_SEND		1
+#define EV_BYTE_CHANNEL_RECEIVE		2
+#define EV_BYTE_CHANNEL_POLL		3
+#define EV_INT_SET_CONFIG		4
+#define EV_INT_GET_CONFIG		5
+#define EV_INT_SET_MASK			6
+#define EV_INT_GET_MASK			7
+#define EV_INT_IACK			9
+#define EV_INT_EOI			10
+#define EV_INT_SEND_IPI			11
+#define EV_INT_SET_TASK_PRIORITY	12
+#define EV_INT_GET_TASK_PRIORITY	13
+#define EV_DOORBELL_SEND		14
+#define EV_MSGSND			15
+#define EV_IDLE				16
+
+/* vendor ID: epapr */
+#define EV_LOCAL_VENDOR_ID		0	/* for private use */
+#define EV_EPAPR_VENDOR_ID		1
+#define EV_FSL_VENDOR_ID		2	/* Freescale Semiconductor */
+#define EV_IBM_VENDOR_ID		3	/* IBM */
+#define EV_GHS_VENDOR_ID		4	/* Green Hills Software */
+#define EV_ENEA_VENDOR_ID		5	/* Enea */
+#define EV_WR_VENDOR_ID			6	/* Wind River Systems */
+#define EV_AMCC_VENDOR_ID		7	/* Applied Micro Circuits */
+#define EV_KVM_VENDOR_ID		42	/* KVM */
+
+/* The max number of bytes that a byte channel can send or receive per call */
+#define EV_BYTE_CHANNEL_MAX_BYTES	16
+
+
+#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
+#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
+
+/* epapr return codes */
+#define EV_SUCCESS		0
+#define EV_EPERM		1	/* Operation not permitted */
+#define EV_ENOENT		2	/*  Entry Not Found */
+#define EV_EIO			3	/* I/O error occured */
+#define EV_EAGAIN		4	/* The operation had insufficient
+					 * resources to complete and should be
+					 * retried
+					 */
+#define EV_ENOMEM		5	/* There was insufficient memory to
+					 * complete the operation */
+#define EV_EFAULT		6	/* Bad guest address */
+#define EV_ENODEV		7	/* No such device */
+#define EV_EINVAL		8	/* An argument supplied to the hcall
+					   was out of range or invalid */
+#define EV_INTERNAL		9	/* An internal error occured */
+#define EV_CONFIG		10	/* A configuration error was detected */
+#define EV_INVALID_STATE	11	/* The object is in an invalid state */
+#define EV_UNIMPLEMENTED	12	/* Unimplemented hypercall */
+#define EV_BUFFER_OVERFLOW	13	/* Caller-supplied buffer too small */
+
+#endif /* _UAPI__EPAPR_HCALLS_H */
-- 
cgit v1.2.3


From 512691d4907d7cf4b8d05c6f8572d1fa60ccec20 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:15:41 +0000
Subject: KVM: PPC: Book3S HV: Allow KVM guests to stop secondary threads
 coming online

When a Book3S HV KVM guest is running, we need the host to be in
single-thread mode, that is, all of the cores (or at least all of
the cores where the KVM guest could run) to be running only one
active hardware thread.  This is because of the hardware restriction
in POWER processors that all of the hardware threads in the core
must be in the same logical partition.  Complying with this restriction
is much easier if, from the host kernel's point of view, only one
hardware thread is active.

This adds two hooks in the SMP hotplug code to allow the KVM code to
make sure that secondary threads (i.e. hardware threads other than
thread 0) cannot come online while any KVM guest exists.  The KVM
code still has to check that any core where it runs a guest has the
secondary threads offline, but having done that check it can now be
sure that they will not come online while the guest is running.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/smp.h |  8 ++++++++
 arch/powerpc/kernel/smp.c      | 46 ++++++++++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c   | 12 +++++++++--
 3 files changed, 64 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/smp.h b/arch/powerpc/include/asm/smp.h
index ebc24dc5b1a1..b625a1a9ad16 100644
--- a/arch/powerpc/include/asm/smp.h
+++ b/arch/powerpc/include/asm/smp.h
@@ -66,6 +66,14 @@ void generic_cpu_die(unsigned int cpu);
 void generic_mach_cpu_die(void);
 void generic_set_cpu_dead(unsigned int cpu);
 int generic_check_cpu_restart(unsigned int cpu);
+
+extern void inhibit_secondary_onlining(void);
+extern void uninhibit_secondary_onlining(void);
+
+#else /* HOTPLUG_CPU */
+static inline void inhibit_secondary_onlining(void) {}
+static inline void uninhibit_secondary_onlining(void) {}
+
 #endif
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8d4214afc21d..c4f420c5fc1b 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -417,6 +417,45 @@ int generic_check_cpu_restart(unsigned int cpu)
 {
 	return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE;
 }
+
+static atomic_t secondary_inhibit_count;
+
+/*
+ * Don't allow secondary CPU threads to come online
+ */
+void inhibit_secondary_onlining(void)
+{
+	/*
+	 * This makes secondary_inhibit_count stable during cpu
+	 * online/offline operations.
+	 */
+	get_online_cpus();
+
+	atomic_inc(&secondary_inhibit_count);
+	put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(inhibit_secondary_onlining);
+
+/*
+ * Allow secondary CPU threads to come online again
+ */
+void uninhibit_secondary_onlining(void)
+{
+	get_online_cpus();
+	atomic_dec(&secondary_inhibit_count);
+	put_online_cpus();
+}
+EXPORT_SYMBOL_GPL(uninhibit_secondary_onlining);
+
+static int secondaries_inhibited(void)
+{
+	return atomic_read(&secondary_inhibit_count);
+}
+
+#else /* HOTPLUG_CPU */
+
+#define secondaries_inhibited()		0
+
 #endif
 
 static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
@@ -435,6 +474,13 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
 	int rc, c;
 
+	/*
+	 * Don't allow secondary threads to come online if inhibited
+	 */
+	if (threads_per_core > 1 && secondaries_inhibited() &&
+	    cpu % threads_per_core != 0)
+		return -EBUSY;
+
 	if (smp_ops == NULL ||
 	    (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu)))
 		return -EINVAL;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9a15da76e56b..c5ddf048e19e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -47,6 +47,7 @@
 #include <asm/page.h>
 #include <asm/hvcall.h>
 #include <asm/switch_to.h>
+#include <asm/smp.h>
 #include <linux/gfp.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
@@ -1016,8 +1017,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	/*
 	 * Make sure we are running on thread 0, and that
 	 * secondary threads are offline.
-	 * XXX we should also block attempts to bring any
-	 * secondary threads online.
 	 */
 	if (threads_per_core > 1 && !on_primary_thread()) {
 		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
@@ -1730,11 +1729,20 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 
 	kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
 	spin_lock_init(&kvm->arch.slot_phys_lock);
+
+	/*
+	 * Don't allow secondary CPU threads to come online
+	 * while any KVM VMs exist.
+	 */
+	inhibit_secondary_onlining();
+
 	return 0;
 }
 
 void kvmppc_core_destroy_vm(struct kvm *kvm)
 {
+	uninhibit_secondary_onlining();
+
 	if (kvm->arch.rma) {
 		kvm_release_rma(kvm->arch.rma);
 		kvm->arch.rma = NULL;
-- 
cgit v1.2.3


From 7b444c6710c6c4994e31eb19216ce055836e65c4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:16:14 +0000
Subject: KVM: PPC: Book3S HV: Fix some races in starting secondary threads

Subsequent patches implementing in-kernel XICS emulation will make it
possible for IPIs to arrive at secondary threads at arbitrary times.
This fixes some races in how we start the secondary threads, which
if not fixed could lead to occasional crashes of the host kernel.

This makes sure that (a) we have grabbed all the secondary threads,
and verified that they are no longer in the kernel, before we start
any thread, (b) that the secondary thread loads its vcpu pointer
after clearing the IPI that woke it up (so we don't miss a wakeup),
and (c) that the secondary thread clears its vcpu pointer before
incrementing the nap count.  It also removes unnecessary setting
of the vcpu and vcore pointers in the paca in kvmppc_core_vcpu_load.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv.c            | 41 +++++++++++++++++++--------------
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 11 ++++++---
 2 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c5ddf048e19e..77dec0f8a030 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -64,8 +64,6 @@ void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
-	local_paca->kvm_hstate.kvm_vcpu = vcpu;
-	local_paca->kvm_hstate.kvm_vcore = vc;
 	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
 		vc->stolen_tb += mftb() - vc->preempt_tb;
 }
@@ -880,6 +878,7 @@ static int kvmppc_grab_hwthread(int cpu)
 
 	/* Ensure the thread won't go into the kernel if it wakes */
 	tpaca->kvm_hstate.hwthread_req = 1;
+	tpaca->kvm_hstate.kvm_vcpu = NULL;
 
 	/*
 	 * If the thread is already executing in the kernel (e.g. handling
@@ -929,7 +928,6 @@ static void kvmppc_start_thread(struct kvm_vcpu *vcpu)
 	smp_wmb();
 #if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
 	if (vcpu->arch.ptid) {
-		kvmppc_grab_hwthread(cpu);
 		xics_wake_cpu(cpu);
 		++vc->n_woken;
 	}
@@ -955,7 +953,8 @@ static void kvmppc_wait_for_nap(struct kvmppc_vcore *vc)
 
 /*
  * Check that we are on thread 0 and that any other threads in
- * this core are off-line.
+ * this core are off-line.  Then grab the threads so they can't
+ * enter the kernel.
  */
 static int on_primary_thread(void)
 {
@@ -967,6 +966,17 @@ static int on_primary_thread(void)
 	while (++thr < threads_per_core)
 		if (cpu_online(cpu + thr))
 			return 0;
+
+	/* Grab all hw threads so they can't go into the kernel */
+	for (thr = 1; thr < threads_per_core; ++thr) {
+		if (kvmppc_grab_hwthread(cpu + thr)) {
+			/* Couldn't grab one; let the others go */
+			do {
+				kvmppc_release_hwthread(cpu + thr);
+			} while (--thr > 0);
+			return 0;
+		}
+	}
 	return 1;
 }
 
@@ -1014,16 +1024,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		spin_lock(&vc->lock);
 	}
 
-	/*
-	 * Make sure we are running on thread 0, and that
-	 * secondary threads are offline.
-	 */
-	if (threads_per_core > 1 && !on_primary_thread()) {
-		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
-			vcpu->arch.ret = -EBUSY;
-		goto out;
-	}
-
 	/*
 	 * Assign physical thread IDs, first to non-ceded vcpus
 	 * and then to ceded ones.
@@ -1043,15 +1043,22 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		if (vcpu->arch.ceded)
 			vcpu->arch.ptid = ptid++;
 
+	/*
+	 * Make sure we are running on thread 0, and that
+	 * secondary threads are offline.
+	 */
+	if (threads_per_core > 1 && !on_primary_thread()) {
+		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
+			vcpu->arch.ret = -EBUSY;
+		goto out;
+	}
+
 	vc->stolen_tb += mftb() - vc->preempt_tb;
 	vc->pcpu = smp_processor_id();
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
 		kvmppc_create_dtl_entry(vcpu, vc);
 	}
-	/* Grab any remaining hw threads so they can't go into the kernel */
-	for (i = ptid; i < threads_per_core; ++i)
-		kvmppc_grab_hwthread(vc->pcpu + i);
 
 	preempt_disable();
 	spin_unlock(&vc->lock);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 44b72feaff7d..1e90ef6191a3 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -134,8 +134,11 @@ kvm_start_guest:
 
 27:	/* XXX should handle hypervisor maintenance interrupts etc. here */
 
+	/* reload vcpu pointer after clearing the IPI */
+	ld	r4,HSTATE_KVM_VCPU(r13)
+	cmpdi	r4,0
 	/* if we have no vcpu to run, go back to sleep */
-	beq	cr1,kvm_no_guest
+	beq	kvm_no_guest
 
 	/* were we napping due to cede? */
 	lbz	r0,HSTATE_NAPPING(r13)
@@ -1587,6 +1590,10 @@ secondary_too_late:
 	.endr
 
 secondary_nap:
+	/* Clear our vcpu pointer so we don't come back in early */
+	li	r0, 0
+	std	r0, HSTATE_KVM_VCPU(r13)
+	lwsync
 	/* Clear any pending IPI - assume we're a secondary thread */
 	ld	r5, HSTATE_XICS_PHYS(r13)
 	li	r7, XICS_XIRR
@@ -1612,8 +1619,6 @@ secondary_nap:
 kvm_no_guest:
 	li	r0, KVM_HWTHREAD_IN_NAP
 	stb	r0, HSTATE_HWTHREAD_STATE(r13)
-	li	r0, 0
-	std	r0, HSTATE_KVM_VCPU(r13)
 
 	li	r3, LPCR_PECE0
 	mfspr	r4, SPRN_LPCR
-- 
cgit v1.2.3


From 913d3ff9a3c3a13c3115eb4b3265aa35a9e0a7ad Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:16:48 +0000
Subject: KVM: PPC: Book3s HV: Don't access runnable threads list without vcore
 lock

There were a few places where we were traversing the list of runnable
threads in a virtual core, i.e. vc->runnable_threads, without holding
the vcore spinlock.  This extends the places where we hold the vcore
spinlock to cover everywhere that we traverse that list.

Since we possibly need to sleep inside kvmppc_book3s_hv_page_fault,
this moves the call of it from kvmppc_handle_exit out to
kvmppc_vcpu_run, where we don't hold the vcore lock.

In kvmppc_vcore_blocked, we don't actually need to check whether
all vcpus are ceded and don't have any pending exceptions, since the
caller has already done that.  The caller (kvmppc_run_vcpu) wasn't
actually checking for pending exceptions, so we add that.

The change of if to while in kvmppc_run_vcpu is to make sure that we
never call kvmppc_remove_runnable() when the vcore state is RUNNING or
EXITING.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_asm.h |  1 +
 arch/powerpc/kvm/book3s_hv.c       | 67 +++++++++++++++++++-------------------
 2 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 76fdcfef0889..aabcdba8f6b0 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -118,6 +118,7 @@
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
+#define RESUME_FLAG_ARCH1	(1<<2)
 
 #define RESUME_GUEST            0
 #define RESUME_GUEST_NV         RESUME_FLAG_NV
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 77dec0f8a030..3a737a4bb8bf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -57,6 +57,9 @@
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
 
+/* Used to indicate that a guest page fault needs to be handled */
+#define RESUME_PAGE_FAULT	(RESUME_GUEST | RESUME_FLAG_ARCH1)
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
@@ -431,7 +434,6 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			      struct task_struct *tsk)
 {
 	int r = RESUME_HOST;
-	int srcu_idx;
 
 	vcpu->stat.sum_exits++;
 
@@ -491,16 +493,12 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * have been handled already.
 	 */
 	case BOOK3S_INTERRUPT_H_DATA_STORAGE:
-		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-		r = kvmppc_book3s_hv_page_fault(run, vcpu,
-				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
-		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		r = RESUME_PAGE_FAULT;
 		break;
 	case BOOK3S_INTERRUPT_H_INST_STORAGE:
-		srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-		r = kvmppc_book3s_hv_page_fault(run, vcpu,
-				kvmppc_get_pc(vcpu), 0);
-		srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+		vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
+		vcpu->arch.fault_dsisr = 0;
+		r = RESUME_PAGE_FAULT;
 		break;
 	/*
 	 * This occurs if the guest executes an illegal instruction.
@@ -984,22 +982,24 @@ static int on_primary_thread(void)
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
  */
-static int kvmppc_run_core(struct kvmppc_vcore *vc)
+static void kvmppc_run_core(struct kvmppc_vcore *vc)
 {
 	struct kvm_vcpu *vcpu, *vcpu0, *vnext;
 	long ret;
 	u64 now;
 	int ptid, i, need_vpa_update;
 	int srcu_idx;
+	struct kvm_vcpu *vcpus_to_update[threads_per_core];
 
 	/* don't start if any threads have a signal pending */
 	need_vpa_update = 0;
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		if (signal_pending(vcpu->arch.run_task))
-			return 0;
-		need_vpa_update |= vcpu->arch.vpa.update_pending |
-			vcpu->arch.slb_shadow.update_pending |
-			vcpu->arch.dtl.update_pending;
+			return;
+		if (vcpu->arch.vpa.update_pending ||
+		    vcpu->arch.slb_shadow.update_pending ||
+		    vcpu->arch.dtl.update_pending)
+			vcpus_to_update[need_vpa_update++] = vcpu;
 	}
 
 	/*
@@ -1019,8 +1019,8 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	 */
 	if (need_vpa_update) {
 		spin_unlock(&vc->lock);
-		list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
-			kvmppc_update_vpas(vcpu);
+		for (i = 0; i < need_vpa_update; ++i)
+			kvmppc_update_vpas(vcpus_to_update[i]);
 		spin_lock(&vc->lock);
 	}
 
@@ -1037,8 +1037,10 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 			vcpu->arch.ptid = ptid++;
 		}
 	}
-	if (!vcpu0)
-		return 0;		/* nothing to run */
+	if (!vcpu0) {
+		vc->vcore_state = VCORE_INACTIVE;
+		return;		/* nothing to run; should never happen */
+	}
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 		if (vcpu->arch.ceded)
 			vcpu->arch.ptid = ptid++;
@@ -1091,6 +1093,7 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 	preempt_enable();
 	kvm_resched(vcpu);
 
+	spin_lock(&vc->lock);
 	now = get_tb();
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		/* cancel pending dec exception if dec is positive */
@@ -1114,7 +1117,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 		}
 	}
 
-	spin_lock(&vc->lock);
  out:
 	vc->vcore_state = VCORE_INACTIVE;
 	vc->preempt_tb = mftb();
@@ -1125,8 +1127,6 @@ static int kvmppc_run_core(struct kvmppc_vcore *vc)
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
-
-	return 1;
 }
 
 /*
@@ -1150,20 +1150,11 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
 	DEFINE_WAIT(wait);
-	struct kvm_vcpu *v;
-	int all_idle = 1;
 
 	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
 	vc->vcore_state = VCORE_SLEEPING;
 	spin_unlock(&vc->lock);
-	list_for_each_entry(v, &vc->runnable_threads, arch.run_list) {
-		if (!v->arch.ceded || v->arch.pending_exceptions) {
-			all_idle = 0;
-			break;
-		}
-	}
-	if (all_idle)
-		schedule();
+	schedule();
 	finish_wait(&vc->wq, &wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
@@ -1219,7 +1210,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		vc->runner = vcpu;
 		n_ceded = 0;
 		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
-			n_ceded += v->arch.ceded;
+			if (!v->arch.pending_exceptions)
+				n_ceded += v->arch.ceded;
 		if (n_ceded == vc->n_runnable)
 			kvmppc_vcore_blocked(vc);
 		else
@@ -1240,8 +1232,9 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	}
 
 	if (signal_pending(current)) {
-		if (vc->vcore_state == VCORE_RUNNING ||
-		    vc->vcore_state == VCORE_EXITING) {
+		while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+		       (vc->vcore_state == VCORE_RUNNING ||
+			vc->vcore_state == VCORE_EXITING)) {
 			spin_unlock(&vc->lock);
 			kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
 			spin_lock(&vc->lock);
@@ -1261,6 +1254,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	int r;
+	int srcu_idx;
 
 	if (!vcpu->arch.sane) {
 		run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@ -1299,6 +1293,11 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		    !(vcpu->arch.shregs.msr & MSR_PR)) {
 			r = kvmppc_pseries_do_hcall(vcpu);
 			kvmppc_core_prepare_to_enter(vcpu);
+		} else if (r == RESUME_PAGE_FAULT) {
+			srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+			r = kvmppc_book3s_hv_page_fault(run, vcpu,
+				vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+			srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
 		}
 	} while (r == RESUME_GUEST);
 
-- 
cgit v1.2.3


From 2f12f03436847e063cda8cc4c339ad84961cbf39 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:17:17 +0000
Subject: KVM: PPC: Book3S HV: Fixes for late-joining threads

If a thread in a virtual core becomes runnable while other threads
in the same virtual core are already running in the guest, it is
possible for the latecomer to join the others on the core without
first pulling them all out of the guest.  Currently this only happens
rarely, when a vcpu is first started.  This fixes some bugs and
omissions in the code in this case.

First, we need to check for VPA updates for the latecomer and make
a DTL entry for it.  Secondly, if it comes along while the master
vcpu is doing a VPA update, we don't need to do anything since the
master will pick it up in kvmppc_run_core.  To handle this correctly
we introduce a new vcore state, VCORE_STARTING.  Thirdly, there is
a race because we currently clear the hardware thread's hwthread_req
before waiting to see it get to nap.  A latecomer thread could have
its hwthread_req cleared before it gets to test it, and therefore
never increment the nap_count, leading to messages about wait_for_nap
timeouts.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |  7 ++++---
 arch/powerpc/kvm/book3s_hv.c        | 14 +++++++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 68f5a308737a..218534d46ae9 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -289,9 +289,10 @@ struct kvmppc_vcore {
 
 /* Values for vcore_state */
 #define VCORE_INACTIVE	0
-#define VCORE_RUNNING	1
-#define VCORE_EXITING	2
-#define VCORE_SLEEPING	3
+#define VCORE_SLEEPING	1
+#define VCORE_STARTING	2
+#define VCORE_RUNNING	3
+#define VCORE_EXITING	4
 
 /*
  * Struct used to manage memory for a virtual processor area
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3a737a4bb8bf..89995fa6e945 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -336,6 +336,11 @@ static void kvmppc_update_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *vpap)
 
 static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 {
+	if (!(vcpu->arch.vpa.update_pending ||
+	      vcpu->arch.slb_shadow.update_pending ||
+	      vcpu->arch.dtl.update_pending))
+		return;
+
 	spin_lock(&vcpu->arch.vpa_update_lock);
 	if (vcpu->arch.vpa.update_pending) {
 		kvmppc_update_vpa(vcpu, &vcpu->arch.vpa);
@@ -1009,7 +1014,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	vc->n_woken = 0;
 	vc->nap_count = 0;
 	vc->entry_exit_count = 0;
-	vc->vcore_state = VCORE_RUNNING;
+	vc->vcore_state = VCORE_STARTING;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
 
@@ -1062,6 +1067,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 		kvmppc_create_dtl_entry(vcpu, vc);
 	}
 
+	vc->vcore_state = VCORE_RUNNING;
 	preempt_disable();
 	spin_unlock(&vc->lock);
 
@@ -1070,8 +1076,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	srcu_idx = srcu_read_lock(&vcpu0->kvm->srcu);
 
 	__kvmppc_vcore_entry(NULL, vcpu0);
-	for (i = 0; i < threads_per_core; ++i)
-		kvmppc_release_hwthread(vc->pcpu + i);
 
 	spin_lock(&vc->lock);
 	/* disable sending of IPIs on virtual external irqs */
@@ -1080,6 +1084,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	/* wait for secondary threads to finish writing their state to memory */
 	if (vc->nap_count < vc->n_woken)
 		kvmppc_wait_for_nap(vc);
+	for (i = 0; i < threads_per_core; ++i)
+		kvmppc_release_hwthread(vc->pcpu + i);
 	/* prevent other vcpu threads from doing kvmppc_start_thread() now */
 	vc->vcore_state = VCORE_EXITING;
 	spin_unlock(&vc->lock);
@@ -1170,6 +1176,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	kvm_run->exit_reason = 0;
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
+	kvmppc_update_vpas(vcpu);
 
 	/*
 	 * Synchronize with other threads in this virtual core
@@ -1193,6 +1200,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		if (vc->vcore_state == VCORE_RUNNING &&
 		    VCORE_EXIT_COUNT(vc) == 0) {
 			vcpu->arch.ptid = vc->n_runnable - 1;
+			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
 		}
 
-- 
cgit v1.2.3


From 8455d79e2163997e479931b8d5b7e60a92cd2b86 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:17:42 +0000
Subject: KVM: PPC: Book3S HV: Run virtual core whenever any vcpus in it can
 run

Currently the Book3S HV code implements a policy on multi-threaded
processors (i.e. POWER7) that requires all of the active vcpus in a
virtual core to be ready to run before we run the virtual core.
However, that causes problems on reset, because reset stops all vcpus
except vcpu 0, and can also reduce throughput since all four threads
in a virtual core have to wait whenever any one of them hits a
hypervisor page fault.

This relaxes the policy, allowing the virtual core to run as soon as
any vcpu in it is runnable.  With this, the KVMPPC_VCPU_STOPPED state
and the KVMPPC_VCPU_BUSY_IN_HOST state have been combined into a single
KVMPPC_VCPU_NOTREADY state, since we no longer need to distinguish
between them.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |  5 +--
 arch/powerpc/kvm/book3s_hv.c        | 74 +++++++++++++++++++------------------
 2 files changed, 40 insertions(+), 39 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 218534d46ae9..1e8cbd1299fc 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -563,9 +563,8 @@ struct kvm_vcpu_arch {
 };
 
 /* Values for vcpu->arch.state */
-#define KVMPPC_VCPU_STOPPED		0
-#define KVMPPC_VCPU_BUSY_IN_HOST	1
-#define KVMPPC_VCPU_RUNNABLE		2
+#define KVMPPC_VCPU_NOTREADY		0
+#define KVMPPC_VCPU_RUNNABLE		1
 
 /* Values for vcpu->arch.io_gpr */
 #define KVM_MMIO_REG_MASK	0x001f
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 89995fa6e945..61d293465e81 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -776,10 +776,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
-	/*
-	 * We consider the vcpu stopped until we see the first run ioctl for it.
-	 */
-	vcpu->arch.state = KVMPPC_VCPU_STOPPED;
+	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 
 	init_waitqueue_head(&vcpu->arch.cpu_run);
 
@@ -866,9 +863,8 @@ static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 {
 	if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
 		return;
-	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 	--vc->n_runnable;
-	++vc->n_busy;
 	list_del(&vcpu->arch.run_list);
 }
 
@@ -1169,7 +1165,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
 	int n_ceded;
-	int prev_state;
 	struct kvmppc_vcore *vc;
 	struct kvm_vcpu *v, *vn;
 
@@ -1186,7 +1181,6 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	vcpu->arch.ceded = 0;
 	vcpu->arch.run_task = current;
 	vcpu->arch.kvm_run = kvm_run;
-	prev_state = vcpu->arch.state;
 	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
 	list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
 	++vc->n_runnable;
@@ -1196,35 +1190,26 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	 * If the vcore is already running, we may be able to start
 	 * this thread straight away and have it join in.
 	 */
-	if (prev_state == KVMPPC_VCPU_STOPPED) {
+	if (!signal_pending(current)) {
 		if (vc->vcore_state == VCORE_RUNNING &&
 		    VCORE_EXIT_COUNT(vc) == 0) {
 			vcpu->arch.ptid = vc->n_runnable - 1;
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
+		} else if (vc->vcore_state == VCORE_SLEEPING) {
+			wake_up(&vc->wq);
 		}
 
-	} else if (prev_state == KVMPPC_VCPU_BUSY_IN_HOST)
-		--vc->n_busy;
+	}
 
 	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
 	       !signal_pending(current)) {
-		if (vc->n_busy || vc->vcore_state != VCORE_INACTIVE) {
+		if (vc->vcore_state != VCORE_INACTIVE) {
 			spin_unlock(&vc->lock);
 			kvmppc_wait_for_exec(vcpu, TASK_INTERRUPTIBLE);
 			spin_lock(&vc->lock);
 			continue;
 		}
-		vc->runner = vcpu;
-		n_ceded = 0;
-		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
-			if (!v->arch.pending_exceptions)
-				n_ceded += v->arch.ceded;
-		if (n_ceded == vc->n_runnable)
-			kvmppc_vcore_blocked(vc);
-		else
-			kvmppc_run_core(vc);
-
 		list_for_each_entry_safe(v, vn, &vc->runnable_threads,
 					 arch.run_list) {
 			kvmppc_core_prepare_to_enter(v);
@@ -1236,23 +1221,40 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 				wake_up(&v->arch.cpu_run);
 			}
 		}
+		if (!vc->n_runnable || vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
+			break;
+		vc->runner = vcpu;
+		n_ceded = 0;
+		list_for_each_entry(v, &vc->runnable_threads, arch.run_list)
+			if (!v->arch.pending_exceptions)
+				n_ceded += v->arch.ceded;
+		if (n_ceded == vc->n_runnable)
+			kvmppc_vcore_blocked(vc);
+		else
+			kvmppc_run_core(vc);
 		vc->runner = NULL;
 	}
 
-	if (signal_pending(current)) {
-		while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
-		       (vc->vcore_state == VCORE_RUNNING ||
-			vc->vcore_state == VCORE_EXITING)) {
-			spin_unlock(&vc->lock);
-			kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
-			spin_lock(&vc->lock);
-		}
-		if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
-			kvmppc_remove_runnable(vc, vcpu);
-			vcpu->stat.signal_exits++;
-			kvm_run->exit_reason = KVM_EXIT_INTR;
-			vcpu->arch.ret = -EINTR;
-		}
+	while (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	       (vc->vcore_state == VCORE_RUNNING ||
+		vc->vcore_state == VCORE_EXITING)) {
+		spin_unlock(&vc->lock);
+		kvmppc_wait_for_exec(vcpu, TASK_UNINTERRUPTIBLE);
+		spin_lock(&vc->lock);
+	}
+
+	if (vcpu->arch.state == KVMPPC_VCPU_RUNNABLE) {
+		kvmppc_remove_runnable(vc, vcpu);
+		vcpu->stat.signal_exits++;
+		kvm_run->exit_reason = KVM_EXIT_INTR;
+		vcpu->arch.ret = -EINTR;
+	}
+
+	if (vc->n_runnable && vc->vcore_state == VCORE_INACTIVE) {
+		/* Wake up some vcpu to run the core */
+		v = list_first_entry(&vc->runnable_threads,
+				     struct kvm_vcpu, arch.run_list);
+		wake_up(&v->arch.cpu_run);
 	}
 
 	spin_unlock(&vc->lock);
-- 
cgit v1.2.3


From c7b676709c163e12ec161c0593c2c76809c25ff4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:18:07 +0000
Subject: KVM: PPC: Book3S HV: Fix accounting of stolen time

Currently the code that accounts stolen time tends to overestimate the
stolen time, and will sometimes report more stolen time in a DTL
(dispatch trace log) entry than has elapsed since the last DTL entry.
This can cause guests to underflow the user or system time measured
for some tasks, leading to ridiculous CPU percentages and total runtimes
being reported by top and other utilities.

In addition, the current code was designed for the previous policy where
a vcore would only run when all the vcpus in it were runnable, and so
only counted stolen time on a per-vcore basis.  Now that a vcore can
run while some of the vcpus in it are doing other things in the kernel
(e.g. handling a page fault), we need to count the time when a vcpu task
is preempted while it is not running as part of a vcore as stolen also.

To do this, we bring back the BUSY_IN_HOST vcpu state and extend the
vcpu_load/put functions to count preemption time while the vcpu is
in that state.  Handling the transitions between the RUNNING and
BUSY_IN_HOST states requires checking and updating two variables
(accumulated time stolen and time last preempted), so we add a new
spinlock, vcpu->arch.tbacct_lock.  This protects both the per-vcpu
stolen/preempt-time variables, and the per-vcore variables while this
vcpu is running the vcore.

Finally, we now don't count time spent in userspace as stolen time.
The task could be executing in userspace on behalf of the vcpu, or
it could be preempted, or the vcpu could be genuinely stopped.  Since
we have no way of dividing up the time between these cases, we don't
count any of it as stolen.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h |   5 ++
 arch/powerpc/kvm/book3s_hv.c        | 127 +++++++++++++++++++++++++++++++-----
 2 files changed, 117 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 1e8cbd1299fc..3093896015f0 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -559,12 +559,17 @@ struct kvm_vcpu_arch {
 	unsigned long dtl_index;
 	u64 stolen_logged;
 	struct kvmppc_vpa slb_shadow;
+
+	spinlock_t tbacct_lock;
+	u64 busy_stolen;
+	u64 busy_preempt;
 #endif
 };
 
 /* Values for vcpu->arch.state */
 #define KVMPPC_VCPU_NOTREADY		0
 #define KVMPPC_VCPU_RUNNABLE		1
+#define KVMPPC_VCPU_BUSY_IN_HOST	2
 
 /* Values for vcpu->arch.io_gpr */
 #define KVM_MMIO_REG_MASK	0x001f
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 61d293465e81..8b3c470e6cb9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -60,23 +60,74 @@
 /* Used to indicate that a guest page fault needs to be handled */
 #define RESUME_PAGE_FAULT	(RESUME_GUEST | RESUME_FLAG_ARCH1)
 
+/* Used as a "null" value for timebase values */
+#define TB_NIL	(~(u64)0)
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
+/*
+ * We use the vcpu_load/put functions to measure stolen time.
+ * Stolen time is counted as time when either the vcpu is able to
+ * run as part of a virtual core, but the task running the vcore
+ * is preempted or sleeping, or when the vcpu needs something done
+ * in the kernel by the task running the vcpu, but that task is
+ * preempted or sleeping.  Those two things have to be counted
+ * separately, since one of the vcpu tasks will take on the job
+ * of running the core, and the other vcpu tasks in the vcore will
+ * sleep waiting for it to do that, but that sleep shouldn't count
+ * as stolen time.
+ *
+ * Hence we accumulate stolen time when the vcpu can run as part of
+ * a vcore using vc->stolen_tb, and the stolen time when the vcpu
+ * needs its task to do other things in the kernel (for example,
+ * service a page fault) in busy_stolen.  We don't accumulate
+ * stolen time for a vcore when it is inactive, or for a vcpu
+ * when it is in state RUNNING or NOTREADY.  NOTREADY is a bit of
+ * a misnomer; it means that the vcpu task is not executing in
+ * the KVM_VCPU_RUN ioctl, i.e. it is in userspace or elsewhere in
+ * the kernel.  We don't have any way of dividing up that time
+ * between time that the vcpu is genuinely stopped, time that
+ * the task is actively working on behalf of the vcpu, and time
+ * that the task is preempted, so we don't count any of it as
+ * stolen.
+ *
+ * Updates to busy_stolen are protected by arch.tbacct_lock;
+ * updates to vc->stolen_tb are protected by the arch.tbacct_lock
+ * of the vcpu that has taken responsibility for running the vcore
+ * (i.e. vc->runner).  The stolen times are measured in units of
+ * timebase ticks.  (Note that the != TB_NIL checks below are
+ * purely defensive; they should never fail.)
+ */
+
 void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+	spin_lock(&vcpu->arch.tbacct_lock);
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
+	    vc->preempt_tb != TB_NIL) {
 		vc->stolen_tb += mftb() - vc->preempt_tb;
+		vc->preempt_tb = TB_NIL;
+	}
+	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
+	    vcpu->arch.busy_preempt != TB_NIL) {
+		vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
+		vcpu->arch.busy_preempt = TB_NIL;
+	}
+	spin_unlock(&vcpu->arch.tbacct_lock);
 }
 
 void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
+	spin_lock(&vcpu->arch.tbacct_lock);
 	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
 		vc->preempt_tb = mftb();
+	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
+		vcpu->arch.busy_preempt = mftb();
+	spin_unlock(&vcpu->arch.tbacct_lock);
 }
 
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
@@ -357,24 +408,61 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
 	spin_unlock(&vcpu->arch.vpa_update_lock);
 }
 
+/*
+ * Return the accumulated stolen time for the vcore up until `now'.
+ * The caller should hold the vcore lock.
+ */
+static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
+{
+	u64 p;
+
+	/*
+	 * If we are the task running the vcore, then since we hold
+	 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
+	 * can't be updated, so we don't need the tbacct_lock.
+	 * If the vcore is inactive, it can't become active (since we
+	 * hold the vcore lock), so the vcpu load/put functions won't
+	 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
+	 */
+	if (vc->vcore_state != VCORE_INACTIVE &&
+	    vc->runner->arch.run_task != current) {
+		spin_lock(&vc->runner->arch.tbacct_lock);
+		p = vc->stolen_tb;
+		if (vc->preempt_tb != TB_NIL)
+			p += now - vc->preempt_tb;
+		spin_unlock(&vc->runner->arch.tbacct_lock);
+	} else {
+		p = vc->stolen_tb;
+	}
+	return p;
+}
+
 static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 				    struct kvmppc_vcore *vc)
 {
 	struct dtl_entry *dt;
 	struct lppaca *vpa;
-	unsigned long old_stolen;
+	unsigned long stolen;
+	unsigned long core_stolen;
+	u64 now;
 
 	dt = vcpu->arch.dtl_ptr;
 	vpa = vcpu->arch.vpa.pinned_addr;
-	old_stolen = vcpu->arch.stolen_logged;
-	vcpu->arch.stolen_logged = vc->stolen_tb;
+	now = mftb();
+	core_stolen = vcore_stolen_time(vc, now);
+	stolen = core_stolen - vcpu->arch.stolen_logged;
+	vcpu->arch.stolen_logged = core_stolen;
+	spin_lock(&vcpu->arch.tbacct_lock);
+	stolen += vcpu->arch.busy_stolen;
+	vcpu->arch.busy_stolen = 0;
+	spin_unlock(&vcpu->arch.tbacct_lock);
 	if (!dt || !vpa)
 		return;
 	memset(dt, 0, sizeof(struct dtl_entry));
 	dt->dispatch_reason = 7;
 	dt->processor_id = vc->pcpu + vcpu->arch.ptid;
-	dt->timebase = mftb();
-	dt->enqueue_to_dispatch_time = vc->stolen_tb - old_stolen;
+	dt->timebase = now;
+	dt->enqueue_to_dispatch_time = stolen;
 	dt->srr0 = kvmppc_get_pc(vcpu);
 	dt->srr1 = vcpu->arch.shregs.msr;
 	++dt;
@@ -773,6 +861,8 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	vcpu->arch.pvr = mfspr(SPRN_PVR);
 	kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
 	spin_lock_init(&vcpu->arch.vpa_update_lock);
+	spin_lock_init(&vcpu->arch.tbacct_lock);
+	vcpu->arch.busy_preempt = TB_NIL;
 
 	kvmppc_mmu_book3s_hv_init(vcpu);
 
@@ -788,7 +878,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 			INIT_LIST_HEAD(&vcore->runnable_threads);
 			spin_lock_init(&vcore->lock);
 			init_waitqueue_head(&vcore->wq);
-			vcore->preempt_tb = mftb();
+			vcore->preempt_tb = TB_NIL;
 		}
 		kvm->arch.vcores[core] = vcore;
 	}
@@ -801,7 +891,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 	++vcore->num_threads;
 	spin_unlock(&vcore->lock);
 	vcpu->arch.vcore = vcore;
-	vcpu->arch.stolen_logged = vcore->stolen_tb;
 
 	vcpu->arch.cpu_type = KVM_CPU_3S_64;
 	kvmppc_sanity_check(vcpu);
@@ -861,9 +950,17 @@ extern void xics_wake_cpu(int cpu);
 static void kvmppc_remove_runnable(struct kvmppc_vcore *vc,
 				   struct kvm_vcpu *vcpu)
 {
+	u64 now;
+
 	if (vcpu->arch.state != KVMPPC_VCPU_RUNNABLE)
 		return;
-	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
+	spin_lock(&vcpu->arch.tbacct_lock);
+	now = mftb();
+	vcpu->arch.busy_stolen += vcore_stolen_time(vc, now) -
+		vcpu->arch.stolen_logged;
+	vcpu->arch.busy_preempt = now;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
+	spin_unlock(&vcpu->arch.tbacct_lock);
 	--vc->n_runnable;
 	list_del(&vcpu->arch.run_list);
 }
@@ -1038,10 +1135,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 			vcpu->arch.ptid = ptid++;
 		}
 	}
-	if (!vcpu0) {
-		vc->vcore_state = VCORE_INACTIVE;
-		return;		/* nothing to run; should never happen */
-	}
+	if (!vcpu0)
+		goto out;	/* nothing to run; should never happen */
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 		if (vcpu->arch.ceded)
 			vcpu->arch.ptid = ptid++;
@@ -1056,7 +1151,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 		goto out;
 	}
 
-	vc->stolen_tb += mftb() - vc->preempt_tb;
 	vc->pcpu = smp_processor_id();
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
@@ -1121,7 +1215,6 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
  out:
 	vc->vcore_state = VCORE_INACTIVE;
-	vc->preempt_tb = mftb();
 	list_for_each_entry_safe(vcpu, vnext, &vc->runnable_threads,
 				 arch.run_list) {
 		if (vcpu->arch.ret != RESUME_GUEST) {
@@ -1181,7 +1274,9 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	vcpu->arch.ceded = 0;
 	vcpu->arch.run_task = current;
 	vcpu->arch.kvm_run = kvm_run;
+	vcpu->arch.stolen_logged = vcore_stolen_time(vc, mftb());
 	vcpu->arch.state = KVMPPC_VCPU_RUNNABLE;
+	vcpu->arch.busy_preempt = TB_NIL;
 	list_add_tail(&vcpu->arch.run_list, &vc->runnable_threads);
 	++vc->n_runnable;
 
@@ -1295,6 +1390,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	flush_vsx_to_thread(current);
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 	vcpu->arch.pgdir = current->mm->pgd;
+	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
 
 	do {
 		r = kvmppc_run_vcpu(run, vcpu);
@@ -1312,6 +1408,7 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	} while (r == RESUME_GUEST);
 
  out:
+	vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
 	atomic_dec(&vcpu->kvm->arch.vcpus_running);
 	return r;
 }
-- 
cgit v1.2.3


From 9f8c8c7812976fbfaf4bb30aaf8f6b001864f20a Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:18:37 +0000
Subject: KVM: PPC: Book3S HV: Allow DTL to be set to address 0, length 0

Commit 55b665b026 ("KVM: PPC: Book3S HV: Provide a way for userspace
to get/set per-vCPU areas") includes a check on the length of the
dispatch trace log (DTL) to make sure the buffer is at least one entry
long.  This is appropriate when registering a buffer, but the
interface also allows for any existing buffer to be unregistered by
specifying a zero address.  In this case the length check is not
appropriate.  This makes the check conditional on the address being
non-zero.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 8b3c470e6cb9..812764c96229 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -811,9 +811,8 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *val)
 		addr = val->vpaval.addr;
 		len = val->vpaval.length;
 		r = -EINVAL;
-		if (len < sizeof(struct dtl_entry))
-			break;
-		if (addr && !vcpu->arch.vpa.next_gpa)
+		if (addr && (len < sizeof(struct dtl_entry) ||
+			     !vcpu->arch.vpa.next_gpa))
 			break;
 		len -= len % sizeof(struct dtl_entry);
 		r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
-- 
cgit v1.2.3


From 8b5869ad85f703ffeb25e656eab826f6b85b984c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 15 Oct 2012 01:20:50 +0000
Subject: KVM: PPC: Book3S HV: Fix thinko in try_lock_hpte()

This fixes an error in the inline asm in try_lock_hpte() where we
were erroneously using a register number as an immediate operand.
The bug only affects an error path, and in fact the code will still
work as long as the compiler chooses some register other than r0
for the "bits" variable.  Nevertheless it should still be fixed.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0dd1d86d3e31..1472a5b4e4e3 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -60,7 +60,7 @@ static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
 		     "	ori	%0,%0,%4\n"
 		     "  stdcx.	%0,0,%2\n"
 		     "	beq+	2f\n"
-		     "	li	%1,%3\n"
+		     "	mr	%1,%3\n"
 		     "2:	isync"
 		     : "=&r" (tmp), "=&r" (old)
 		     : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
-- 
cgit v1.2.3


From 63a1909190a3baa0abb9463ef28c8d8c969be951 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 31 Oct 2012 13:37:59 +0100
Subject: PPC: ePAPR: Convert hcall header to uapi (round 2)

The new uapi framework splits kernel internal and user space exported
bits of header files more cleanly. Adjust the ePAPR header accordingly.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/epapr_hcalls.h      | 458 +++++++++++++++++++++++++
 arch/powerpc/include/uapi/asm/epapr_hcalls.h | 478 ++++-----------------------
 2 files changed, 517 insertions(+), 419 deletions(-)
 create mode 100644 arch/powerpc/include/asm/epapr_hcalls.h

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
new file mode 100644
index 000000000000..d3d634274d2c
--- /dev/null
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -0,0 +1,458 @@
+/*
+ * ePAPR hcall interface
+ *
+ * Copyright 2008-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is provided under a dual BSD/GPL license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* A "hypercall" is an "sc 1" instruction.  This header file file provides C
+ * wrapper functions for the ePAPR hypervisor interface.  It is inteded
+ * for use by Linux device drivers and other operating systems.
+ *
+ * The hypercalls are implemented as inline assembly, rather than assembly
+ * language functions in a .S file, for optimization.  It allows
+ * the caller to issue the hypercall instruction directly, improving both
+ * performance and memory footprint.
+ */
+
+#ifndef _EPAPR_HCALLS_H
+#define _EPAPR_HCALLS_H
+
+#include <uapi/asm/epapr_hcalls.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/byteorder.h>
+
+/*
+ * Hypercall register clobber list
+ *
+ * These macros are used to define the list of clobbered registers during a
+ * hypercall.  Technically, registers r0 and r3-r12 are always clobbered,
+ * but the gcc inline assembly syntax does not allow us to specify registers
+ * on the clobber list that are also on the input/output list.  Therefore,
+ * the lists of clobbered registers depends on the number of register
+ * parmeters ("+r" and "=r") passed to the hypercall.
+ *
+ * Each assembly block should use one of the HCALL_CLOBBERSx macros.  As a
+ * general rule, 'x' is the number of parameters passed to the assembly
+ * block *except* for r11.
+ *
+ * If you're not sure, just use the smallest value of 'x' that does not
+ * generate a compilation error.  Because these are static inline functions,
+ * the compiler will only check the clobber list for a function if you
+ * compile code that calls that function.
+ *
+ * r3 and r11 are not included in any clobbers list because they are always
+ * listed as output registers.
+ *
+ * XER, CTR, and LR are currently listed as clobbers because it's uncertain
+ * whether they will be clobbered.
+ *
+ * Note that r11 can be used as an output parameter.
+ *
+ * The "memory" clobber is only necessary for hcalls where the Hypervisor
+ * will read or write guest memory. However, we add it to all hcalls because
+ * the impact is minimal, and we want to ensure that it's present for the
+ * hcalls that need it.
+*/
+
+/* List of common clobbered registers.  Do not use this macro. */
+#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory"
+
+#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS
+#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10"
+#define EV_HCALL_CLOBBERS6 EV_HCALL_CLOBBERS7, "r9"
+#define EV_HCALL_CLOBBERS5 EV_HCALL_CLOBBERS6, "r8"
+#define EV_HCALL_CLOBBERS4 EV_HCALL_CLOBBERS5, "r7"
+#define EV_HCALL_CLOBBERS3 EV_HCALL_CLOBBERS4, "r6"
+#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5"
+#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4"
+
+extern bool epapr_paravirt_enabled;
+extern u32 epapr_hypercall_start[];
+
+/*
+ * We use "uintptr_t" to define a register because it's guaranteed to be a
+ * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit
+ * platform.
+ *
+ * All registers are either input/output or output only.  Registers that are
+ * initialized before making the hypercall are input/output.  All
+ * input/output registers are represented with "+r".  Output-only registers
+ * are represented with "=r".  Do not specify any unused registers.  The
+ * clobber list will tell the compiler that the hypercall modifies those
+ * registers, which is good enough.
+ */
+
+/**
+ * ev_int_set_config - configure the specified interrupt
+ * @interrupt: the interrupt number
+ * @config: configuration for this interrupt
+ * @priority: interrupt priority
+ * @destination: destination CPU number
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_set_config(unsigned int interrupt,
+	uint32_t config, unsigned int priority, uint32_t destination)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_SET_CONFIG);
+	r3  = interrupt;
+	r4  = config;
+	r5  = priority;
+	r6  = destination;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6)
+		: : EV_HCALL_CLOBBERS4
+	);
+
+	return r3;
+}
+
+/**
+ * ev_int_get_config - return the config of the specified interrupt
+ * @interrupt: the interrupt number
+ * @config: returned configuration for this interrupt
+ * @priority: returned interrupt priority
+ * @destination: returned destination CPU number
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_get_config(unsigned int interrupt,
+	uint32_t *config, unsigned int *priority, uint32_t *destination)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
+	r3 = interrupt;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6)
+		: : EV_HCALL_CLOBBERS4
+	);
+
+	*config = r4;
+	*priority = r5;
+	*destination = r6;
+
+	return r3;
+}
+
+/**
+ * ev_int_set_mask - sets the mask for the specified interrupt source
+ * @interrupt: the interrupt number
+ * @mask: 0=enable interrupts, 1=disable interrupts
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_set_mask(unsigned int interrupt,
+	unsigned int mask)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_SET_MASK);
+	r3 = interrupt;
+	r4 = mask;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	return r3;
+}
+
+/**
+ * ev_int_get_mask - returns the mask for the specified interrupt source
+ * @interrupt: the interrupt number
+ * @mask: returned mask for this interrupt (0=enabled, 1=disabled)
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_get_mask(unsigned int interrupt,
+	unsigned int *mask)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
+	r3 = interrupt;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	*mask = r4;
+
+	return r3;
+}
+
+/**
+ * ev_int_eoi - signal the end of interrupt processing
+ * @interrupt: the interrupt number
+ *
+ * This function signals the end of processing for the the specified
+ * interrupt, which must be the interrupt currently in service. By
+ * definition, this is also the highest-priority interrupt.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_eoi(unsigned int interrupt)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_EOI);
+	r3 = interrupt;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/**
+ * ev_byte_channel_send - send characters to a byte stream
+ * @handle: byte stream handle
+ * @count: (input) num of chars to send, (output) num chars sent
+ * @buffer: pointer to a 16-byte buffer
+ *
+ * @buffer must be at least 16 bytes long, because all 16 bytes will be
+ * read from memory into registers, even if count < 16.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_byte_channel_send(unsigned int handle,
+	unsigned int *count, const char buffer[EV_BYTE_CHANNEL_MAX_BYTES])
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r7 __asm__("r7");
+	register uintptr_t r8 __asm__("r8");
+	const uint32_t *p = (const uint32_t *) buffer;
+
+	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_SEND);
+	r3 = handle;
+	r4 = *count;
+	r5 = be32_to_cpu(p[0]);
+	r6 = be32_to_cpu(p[1]);
+	r7 = be32_to_cpu(p[2]);
+	r8 = be32_to_cpu(p[3]);
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3),
+		  "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8)
+		: : EV_HCALL_CLOBBERS6
+	);
+
+	*count = r4;
+
+	return r3;
+}
+
+/**
+ * ev_byte_channel_receive - fetch characters from a byte channel
+ * @handle: byte channel handle
+ * @count: (input) max num of chars to receive, (output) num chars received
+ * @buffer: pointer to a 16-byte buffer
+ *
+ * The size of @buffer must be at least 16 bytes, even if you request fewer
+ * than 16 characters, because we always write 16 bytes to @buffer.  This is
+ * for performance reasons.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_byte_channel_receive(unsigned int handle,
+	unsigned int *count, char buffer[EV_BYTE_CHANNEL_MAX_BYTES])
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+	register uintptr_t r6 __asm__("r6");
+	register uintptr_t r7 __asm__("r7");
+	register uintptr_t r8 __asm__("r8");
+	uint32_t *p = (uint32_t *) buffer;
+
+	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_RECEIVE);
+	r3 = handle;
+	r4 = *count;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "+r" (r4),
+		  "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8)
+		: : EV_HCALL_CLOBBERS6
+	);
+
+	*count = r4;
+	p[0] = cpu_to_be32(r5);
+	p[1] = cpu_to_be32(r6);
+	p[2] = cpu_to_be32(r7);
+	p[3] = cpu_to_be32(r8);
+
+	return r3;
+}
+
+/**
+ * ev_byte_channel_poll - returns the status of the byte channel buffers
+ * @handle: byte channel handle
+ * @rx_count: returned count of bytes in receive queue
+ * @tx_count: returned count of free space in transmit queue
+ *
+ * This function reports the amount of data in the receive queue (i.e. the
+ * number of bytes you can read), and the amount of free space in the transmit
+ * queue (i.e. the number of bytes you can write).
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_byte_channel_poll(unsigned int handle,
+	unsigned int *rx_count,	unsigned int *tx_count)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+	register uintptr_t r5 __asm__("r5");
+
+	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5)
+		: : EV_HCALL_CLOBBERS3
+	);
+
+	*rx_count = r4;
+	*tx_count = r5;
+
+	return r3;
+}
+
+/**
+ * ev_int_iack - acknowledge an interrupt
+ * @handle: handle to the target interrupt controller
+ * @vector: returned interrupt vector
+ *
+ * If handle is zero, the function returns the next interrupt source
+ * number to be handled irrespective of the hierarchy or cascading
+ * of interrupt controllers. If non-zero, specifies a handle to the
+ * interrupt controller that is the target of the acknowledge.
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_int_iack(unsigned int handle,
+	unsigned int *vector)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+	register uintptr_t r4 __asm__("r4");
+
+	r11 = EV_HCALL_TOKEN(EV_INT_IACK);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3), "=r" (r4)
+		: : EV_HCALL_CLOBBERS2
+	);
+
+	*vector = r4;
+
+	return r3;
+}
+
+/**
+ * ev_doorbell_send - send a doorbell to another partition
+ * @handle: doorbell send handle
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_doorbell_send(unsigned int handle)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
+	r3 = handle;
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "+r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+
+/**
+ * ev_idle -- wait for next interrupt on this core
+ *
+ * Returns 0 for success, or an error code.
+ */
+static inline unsigned int ev_idle(void)
+{
+	register uintptr_t r11 __asm__("r11");
+	register uintptr_t r3 __asm__("r3");
+
+	r11 = EV_HCALL_TOKEN(EV_IDLE);
+
+	asm volatile("bl	epapr_hypercall_start"
+		: "+r" (r11), "=r" (r3)
+		: : EV_HCALL_CLOBBERS1
+	);
+
+	return r3;
+}
+#endif /* !__ASSEMBLY__ */
+#endif /* _EPAPR_HCALLS_H */
diff --git a/arch/powerpc/include/uapi/asm/epapr_hcalls.h b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
index 58997afcd085..7f9c74b46704 100644
--- a/arch/powerpc/include/uapi/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/uapi/asm/epapr_hcalls.h
@@ -37,422 +37,62 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* A "hypercall" is an "sc 1" instruction.  This header file file provides C
- * wrapper functions for the ePAPR hypervisor interface.  It is inteded
- * for use by Linux device drivers and other operating systems.
- *
- * The hypercalls are implemented as inline assembly, rather than assembly
- * language functions in a .S file, for optimization.  It allows
- * the caller to issue the hypercall instruction directly, improving both
- * performance and memory footprint.
- */
-
-#ifndef _EPAPR_HCALLS_H
-#define _EPAPR_HCALLS_H
-
-#include <uapi/asm/epapr_hcalls.h>
-
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <asm/byteorder.h>
-
-/*
- * Hypercall register clobber list
- *
- * These macros are used to define the list of clobbered registers during a
- * hypercall.  Technically, registers r0 and r3-r12 are always clobbered,
- * but the gcc inline assembly syntax does not allow us to specify registers
- * on the clobber list that are also on the input/output list.  Therefore,
- * the lists of clobbered registers depends on the number of register
- * parmeters ("+r" and "=r") passed to the hypercall.
- *
- * Each assembly block should use one of the HCALL_CLOBBERSx macros.  As a
- * general rule, 'x' is the number of parameters passed to the assembly
- * block *except* for r11.
- *
- * If you're not sure, just use the smallest value of 'x' that does not
- * generate a compilation error.  Because these are static inline functions,
- * the compiler will only check the clobber list for a function if you
- * compile code that calls that function.
- *
- * r3 and r11 are not included in any clobbers list because they are always
- * listed as output registers.
- *
- * XER, CTR, and LR are currently listed as clobbers because it's uncertain
- * whether they will be clobbered.
- *
- * Note that r11 can be used as an output parameter.
- *
- * The "memory" clobber is only necessary for hcalls where the Hypervisor
- * will read or write guest memory. However, we add it to all hcalls because
- * the impact is minimal, and we want to ensure that it's present for the
- * hcalls that need it.
-*/
-
-/* List of common clobbered registers.  Do not use this macro. */
-#define EV_HCALL_CLOBBERS "r0", "r12", "xer", "ctr", "lr", "cc", "memory"
-
-#define EV_HCALL_CLOBBERS8 EV_HCALL_CLOBBERS
-#define EV_HCALL_CLOBBERS7 EV_HCALL_CLOBBERS8, "r10"
-#define EV_HCALL_CLOBBERS6 EV_HCALL_CLOBBERS7, "r9"
-#define EV_HCALL_CLOBBERS5 EV_HCALL_CLOBBERS6, "r8"
-#define EV_HCALL_CLOBBERS4 EV_HCALL_CLOBBERS5, "r7"
-#define EV_HCALL_CLOBBERS3 EV_HCALL_CLOBBERS4, "r6"
-#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5"
-#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4"
-
-extern bool epapr_paravirt_enabled;
-extern u32 epapr_hypercall_start[];
-
-/*
- * We use "uintptr_t" to define a register because it's guaranteed to be a
- * 32-bit integer on a 32-bit platform, and a 64-bit integer on a 64-bit
- * platform.
- *
- * All registers are either input/output or output only.  Registers that are
- * initialized before making the hypercall are input/output.  All
- * input/output registers are represented with "+r".  Output-only registers
- * are represented with "=r".  Do not specify any unused registers.  The
- * clobber list will tell the compiler that the hypercall modifies those
- * registers, which is good enough.
- */
-
-/**
- * ev_int_set_config - configure the specified interrupt
- * @interrupt: the interrupt number
- * @config: configuration for this interrupt
- * @priority: interrupt priority
- * @destination: destination CPU number
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_int_set_config(unsigned int interrupt,
-	uint32_t config, unsigned int priority, uint32_t destination)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-	register uintptr_t r5 __asm__("r5");
-	register uintptr_t r6 __asm__("r6");
-
-	r11 = EV_HCALL_TOKEN(EV_INT_SET_CONFIG);
-	r3  = interrupt;
-	r4  = config;
-	r5  = priority;
-	r6  = destination;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3), "+r" (r4), "+r" (r5), "+r" (r6)
-		: : EV_HCALL_CLOBBERS4
-	);
-
-	return r3;
-}
-
-/**
- * ev_int_get_config - return the config of the specified interrupt
- * @interrupt: the interrupt number
- * @config: returned configuration for this interrupt
- * @priority: returned interrupt priority
- * @destination: returned destination CPU number
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_int_get_config(unsigned int interrupt,
-	uint32_t *config, unsigned int *priority, uint32_t *destination)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-	register uintptr_t r5 __asm__("r5");
-	register uintptr_t r6 __asm__("r6");
-
-	r11 = EV_HCALL_TOKEN(EV_INT_GET_CONFIG);
-	r3 = interrupt;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5), "=r" (r6)
-		: : EV_HCALL_CLOBBERS4
-	);
-
-	*config = r4;
-	*priority = r5;
-	*destination = r6;
-
-	return r3;
-}
-
-/**
- * ev_int_set_mask - sets the mask for the specified interrupt source
- * @interrupt: the interrupt number
- * @mask: 0=enable interrupts, 1=disable interrupts
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_int_set_mask(unsigned int interrupt,
-	unsigned int mask)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-
-	r11 = EV_HCALL_TOKEN(EV_INT_SET_MASK);
-	r3 = interrupt;
-	r4 = mask;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3), "+r" (r4)
-		: : EV_HCALL_CLOBBERS2
-	);
-
-	return r3;
-}
-
-/**
- * ev_int_get_mask - returns the mask for the specified interrupt source
- * @interrupt: the interrupt number
- * @mask: returned mask for this interrupt (0=enabled, 1=disabled)
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_int_get_mask(unsigned int interrupt,
-	unsigned int *mask)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-
-	r11 = EV_HCALL_TOKEN(EV_INT_GET_MASK);
-	r3 = interrupt;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3), "=r" (r4)
-		: : EV_HCALL_CLOBBERS2
-	);
-
-	*mask = r4;
-
-	return r3;
-}
-
-/**
- * ev_int_eoi - signal the end of interrupt processing
- * @interrupt: the interrupt number
- *
- * This function signals the end of processing for the the specified
- * interrupt, which must be the interrupt currently in service. By
- * definition, this is also the highest-priority interrupt.
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_int_eoi(unsigned int interrupt)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-
-	r11 = EV_HCALL_TOKEN(EV_INT_EOI);
-	r3 = interrupt;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3)
-		: : EV_HCALL_CLOBBERS1
-	);
-
-	return r3;
-}
-
-/**
- * ev_byte_channel_send - send characters to a byte stream
- * @handle: byte stream handle
- * @count: (input) num of chars to send, (output) num chars sent
- * @buffer: pointer to a 16-byte buffer
- *
- * @buffer must be at least 16 bytes long, because all 16 bytes will be
- * read from memory into registers, even if count < 16.
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_byte_channel_send(unsigned int handle,
-	unsigned int *count, const char buffer[EV_BYTE_CHANNEL_MAX_BYTES])
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-	register uintptr_t r5 __asm__("r5");
-	register uintptr_t r6 __asm__("r6");
-	register uintptr_t r7 __asm__("r7");
-	register uintptr_t r8 __asm__("r8");
-	const uint32_t *p = (const uint32_t *) buffer;
-
-	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_SEND);
-	r3 = handle;
-	r4 = *count;
-	r5 = be32_to_cpu(p[0]);
-	r6 = be32_to_cpu(p[1]);
-	r7 = be32_to_cpu(p[2]);
-	r8 = be32_to_cpu(p[3]);
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3),
-		  "+r" (r4), "+r" (r5), "+r" (r6), "+r" (r7), "+r" (r8)
-		: : EV_HCALL_CLOBBERS6
-	);
-
-	*count = r4;
-
-	return r3;
-}
-
-/**
- * ev_byte_channel_receive - fetch characters from a byte channel
- * @handle: byte channel handle
- * @count: (input) max num of chars to receive, (output) num chars received
- * @buffer: pointer to a 16-byte buffer
- *
- * The size of @buffer must be at least 16 bytes, even if you request fewer
- * than 16 characters, because we always write 16 bytes to @buffer.  This is
- * for performance reasons.
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_byte_channel_receive(unsigned int handle,
-	unsigned int *count, char buffer[EV_BYTE_CHANNEL_MAX_BYTES])
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-	register uintptr_t r5 __asm__("r5");
-	register uintptr_t r6 __asm__("r6");
-	register uintptr_t r7 __asm__("r7");
-	register uintptr_t r8 __asm__("r8");
-	uint32_t *p = (uint32_t *) buffer;
-
-	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_RECEIVE);
-	r3 = handle;
-	r4 = *count;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3), "+r" (r4),
-		  "=r" (r5), "=r" (r6), "=r" (r7), "=r" (r8)
-		: : EV_HCALL_CLOBBERS6
-	);
-
-	*count = r4;
-	p[0] = cpu_to_be32(r5);
-	p[1] = cpu_to_be32(r6);
-	p[2] = cpu_to_be32(r7);
-	p[3] = cpu_to_be32(r8);
-
-	return r3;
-}
-
-/**
- * ev_byte_channel_poll - returns the status of the byte channel buffers
- * @handle: byte channel handle
- * @rx_count: returned count of bytes in receive queue
- * @tx_count: returned count of free space in transmit queue
- *
- * This function reports the amount of data in the receive queue (i.e. the
- * number of bytes you can read), and the amount of free space in the transmit
- * queue (i.e. the number of bytes you can write).
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_byte_channel_poll(unsigned int handle,
-	unsigned int *rx_count,	unsigned int *tx_count)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-	register uintptr_t r5 __asm__("r5");
-
-	r11 = EV_HCALL_TOKEN(EV_BYTE_CHANNEL_POLL);
-	r3 = handle;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3), "=r" (r4), "=r" (r5)
-		: : EV_HCALL_CLOBBERS3
-	);
-
-	*rx_count = r4;
-	*tx_count = r5;
-
-	return r3;
-}
-
-/**
- * ev_int_iack - acknowledge an interrupt
- * @handle: handle to the target interrupt controller
- * @vector: returned interrupt vector
- *
- * If handle is zero, the function returns the next interrupt source
- * number to be handled irrespective of the hierarchy or cascading
- * of interrupt controllers. If non-zero, specifies a handle to the
- * interrupt controller that is the target of the acknowledge.
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_int_iack(unsigned int handle,
-	unsigned int *vector)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-	register uintptr_t r4 __asm__("r4");
-
-	r11 = EV_HCALL_TOKEN(EV_INT_IACK);
-	r3 = handle;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3), "=r" (r4)
-		: : EV_HCALL_CLOBBERS2
-	);
-
-	*vector = r4;
-
-	return r3;
-}
-
-/**
- * ev_doorbell_send - send a doorbell to another partition
- * @handle: doorbell send handle
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_doorbell_send(unsigned int handle)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-
-	r11 = EV_HCALL_TOKEN(EV_DOORBELL_SEND);
-	r3 = handle;
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "+r" (r3)
-		: : EV_HCALL_CLOBBERS1
-	);
-
-	return r3;
-}
-
-/**
- * ev_idle -- wait for next interrupt on this core
- *
- * Returns 0 for success, or an error code.
- */
-static inline unsigned int ev_idle(void)
-{
-	register uintptr_t r11 __asm__("r11");
-	register uintptr_t r3 __asm__("r3");
-
-	r11 = EV_HCALL_TOKEN(EV_IDLE);
-
-	asm volatile("bl	epapr_hypercall_start"
-		: "+r" (r11), "=r" (r3)
-		: : EV_HCALL_CLOBBERS1
-	);
-
-	return r3;
-}
-#endif /* !__ASSEMBLY__ */
-#endif
+#ifndef _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
+#define _UAPI_ASM_POWERPC_EPAPR_HCALLS_H
+
+#define EV_BYTE_CHANNEL_SEND		1
+#define EV_BYTE_CHANNEL_RECEIVE		2
+#define EV_BYTE_CHANNEL_POLL		3
+#define EV_INT_SET_CONFIG		4
+#define EV_INT_GET_CONFIG		5
+#define EV_INT_SET_MASK			6
+#define EV_INT_GET_MASK			7
+#define EV_INT_IACK			9
+#define EV_INT_EOI			10
+#define EV_INT_SEND_IPI			11
+#define EV_INT_SET_TASK_PRIORITY	12
+#define EV_INT_GET_TASK_PRIORITY	13
+#define EV_DOORBELL_SEND		14
+#define EV_MSGSND			15
+#define EV_IDLE				16
+
+/* vendor ID: epapr */
+#define EV_LOCAL_VENDOR_ID		0	/* for private use */
+#define EV_EPAPR_VENDOR_ID		1
+#define EV_FSL_VENDOR_ID		2	/* Freescale Semiconductor */
+#define EV_IBM_VENDOR_ID		3	/* IBM */
+#define EV_GHS_VENDOR_ID		4	/* Green Hills Software */
+#define EV_ENEA_VENDOR_ID		5	/* Enea */
+#define EV_WR_VENDOR_ID			6	/* Wind River Systems */
+#define EV_AMCC_VENDOR_ID		7	/* Applied Micro Circuits */
+#define EV_KVM_VENDOR_ID		42	/* KVM */
+
+/* The max number of bytes that a byte channel can send or receive per call */
+#define EV_BYTE_CHANNEL_MAX_BYTES	16
+
+
+#define _EV_HCALL_TOKEN(id, num) (((id) << 16) | (num))
+#define EV_HCALL_TOKEN(hcall_num) _EV_HCALL_TOKEN(EV_EPAPR_VENDOR_ID, hcall_num)
+
+/* epapr return codes */
+#define EV_SUCCESS		0
+#define EV_EPERM		1	/* Operation not permitted */
+#define EV_ENOENT		2	/*  Entry Not Found */
+#define EV_EIO			3	/* I/O error occured */
+#define EV_EAGAIN		4	/* The operation had insufficient
+					 * resources to complete and should be
+					 * retried
+					 */
+#define EV_ENOMEM		5	/* There was insufficient memory to
+					 * complete the operation */
+#define EV_EFAULT		6	/* Bad guest address */
+#define EV_ENODEV		7	/* No such device */
+#define EV_EINVAL		8	/* An argument supplied to the hcall
+					   was out of range or invalid */
+#define EV_INTERNAL		9	/* An internal error occured */
+#define EV_CONFIG		10	/* A configuration error was detected */
+#define EV_INVALID_STATE	11	/* The object is in an invalid state */
+#define EV_UNIMPLEMENTED	12	/* Unimplemented hypercall */
+#define EV_BUFFER_OVERFLOW	13	/* Caller-supplied buffer too small */
+
+#endif /* _UAPI_ASM_POWERPC_EPAPR_HCALLS_H */
-- 
cgit v1.2.3


From 42897d866b120547777ae1fd316680ec53356d9c Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 27 Nov 2012 23:29:02 -0200
Subject: KVM: x86: add kvm_arch_vcpu_postcreate callback, move TSC
 initialization

TSC initialization will soon make use of online_vcpus.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/powerpc.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index deb0d596d815..f9ab12aea829 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -439,6 +439,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	return vcpu;
 }
 
+int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
 {
 	/* Make sure we're not using the vcpu anymore */
-- 
cgit v1.2.3


From 0e673fb679027600cad45bd61a4cc9ebd2ed2bb1 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Tue, 9 Oct 2012 00:06:20 +0200
Subject: KVM: PPC: Support eventfd

In order to support the generic eventfd infrastructure on PPC, we need
to call into the generic KVM in-kernel device mmio code.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/Kconfig   |  1 +
 arch/powerpc/kvm/Makefile  |  4 +++-
 arch/powerpc/kvm/powerpc.c | 17 ++++++++++++++++-
 3 files changed, 20 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 71f0cd9edf33..4730c953f435 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
 	bool
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_EVENTFD
 
 config KVM_BOOK3S_HANDLER
 	bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index c2a08636e6d4..cd8965828676 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
 
 ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
 
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
+						eventfd.o)
 
 CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_tlb.o := -I.
@@ -76,6 +77,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 
 kvm-book3s_64-module-objs := \
 	../../../virt/kvm/kvm_main.o \
+	../../../virt/kvm/eventfd.o \
 	powerpc.o \
 	emulate.o \
 	book3s.o \
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f9ab12aea829..d583ea15e151 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -314,6 +314,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_IRQ_LEVEL:
 	case KVM_CAP_ENABLE_CAP:
 	case KVM_CAP_ONE_REG:
+	case KVM_CAP_IOEVENTFD:
 		r = 1;
 		break;
 #ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -618,6 +619,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	vcpu->mmio_is_write = 0;
 	vcpu->arch.mmio_sign_extend = 0;
 
+	if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+			     bytes, &run->mmio.data)) {
+		kvmppc_complete_mmio_load(vcpu, run);
+		vcpu->mmio_needed = 0;
+		return EMULATE_DONE;
+	}
+
 	return EMULATE_DO_MMIO;
 }
 
@@ -627,8 +635,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
 {
 	int r;
 
-	r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
 	vcpu->arch.mmio_sign_extend = 1;
+	r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
 
 	return r;
 }
@@ -666,6 +674,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		}
 	}
 
+	if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+			      bytes, &run->mmio.data)) {
+		kvmppc_complete_mmio_load(vcpu, run);
+		vcpu->mmio_needed = 0;
+		return EMULATE_DONE;
+	}
+
 	return EMULATE_DO_MMIO;
 }
 
-- 
cgit v1.2.3


From 7ed661bf852cefa1ab57ad709a675bfb029d47ab Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Tue, 13 Nov 2012 18:31:32 +0000
Subject: KVM: PPC: Book3S HV: Restructure HPT entry creation code

This restructures the code that creates HPT (hashed page table)
entries so that it can be called in situations where we don't have a
struct vcpu pointer, only a struct kvm pointer.  It also fixes a bug
where kvmppc_map_vrma() would corrupt the guest R4 value.

Most of the work of kvmppc_virtmode_h_enter is now done by a new
function, kvmppc_virtmode_do_h_enter, which itself calls another new
function, kvmppc_do_h_enter, which contains most of the old
kvmppc_h_enter.  The new kvmppc_do_h_enter takes explicit arguments
for the place to return the HPTE index, the Linux page tables to use,
and whether it is being called in real mode, thus removing the need
for it to have the vcpu as an argument.

Currently kvmppc_map_vrma creates the VRMA (virtual real mode area)
HPTEs by calling kvmppc_virtmode_h_enter, which is designed primarily
to handle H_ENTER hcalls from the guest that need to pin a page of
memory.  Since H_ENTER returns the index of the created HPTE in R4,
kvmppc_virtmode_h_enter updates the guest R4, corrupting the guest R4
in the case when it gets called from kvmppc_map_vrma on the first
VCPU_RUN ioctl.  With this, kvmppc_map_vrma instead calls
kvmppc_virtmode_do_h_enter with the address of a dummy word as the
place to store the HPTE index, thus avoiding corrupting the guest R4.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  5 +++--
 arch/powerpc/kvm/book3s_64_mmu_hv.c   | 36 ++++++++++++++++++++++++-----------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c   | 27 ++++++++++++++++----------
 3 files changed, 45 insertions(+), 23 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 36fcf4190461..fea768f21cd7 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -157,8 +157,9 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
 extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel);
-extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+			long pte_index, unsigned long pteh, unsigned long ptel,
+			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
 			struct kvm_memory_slot *memslot, unsigned long *map);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2a89a36e7263..6ee6516a0bee 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -41,6 +41,10 @@
 /* Power architecture requires HPT is at least 256kB */
 #define PPC_MIN_HPT_ORDER	18
 
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+				long pte_index, unsigned long pteh,
+				unsigned long ptel, unsigned long *pte_idx_ret);
+
 long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
 	unsigned long hpt;
@@ -185,6 +189,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 	unsigned long addr, hash;
 	unsigned long psize;
 	unsigned long hp0, hp1;
+	unsigned long idx_ret;
 	long ret;
 	struct kvm *kvm = vcpu->kvm;
 
@@ -216,7 +221,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
 		hash = (hash << 3) + 7;
 		hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
 		hp_r = hp1 | addr;
-		ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
+		ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
+						 &idx_ret);
 		if (ret != H_SUCCESS) {
 			pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
 			       addr, ret);
@@ -354,15 +360,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 	return err;
 }
 
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+				long pte_index, unsigned long pteh,
+				unsigned long ptel, unsigned long *pte_idx_ret)
 {
-	struct kvm *kvm = vcpu->kvm;
 	unsigned long psize, gpa, gfn;
 	struct kvm_memory_slot *memslot;
 	long ret;
@@ -390,8 +391,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
  do_insert:
 	/* Protect linux PTE lookup from page table destruction */
 	rcu_read_lock_sched();	/* this disables preemption too */
-	vcpu->arch.pgdir = current->mm->pgd;
-	ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
+	ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
+				current->mm->pgd, false, pte_idx_ret);
 	rcu_read_unlock_sched();
 	if (ret == H_TOO_HARD) {
 		/* this can't happen */
@@ -402,6 +403,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 
 }
 
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+			     long pte_index, unsigned long pteh,
+			     unsigned long ptel)
+{
+	return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
+					  pteh, ptel, &vcpu->arch.gpr[4]);
+}
+
 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
 							 gva_t eaddr)
 {
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5e06e3153888..362dffe4db10 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -103,14 +103,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	unlock_rmap(rmap);
 }
 
-static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
+static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
 			      int writing, unsigned long *pte_sizep)
 {
 	pte_t *ptep;
 	unsigned long ps = *pte_sizep;
 	unsigned int shift;
 
-	ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
+	ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
 	if (!ptep)
 		return __pte(0);
 	if (shift)
@@ -130,10 +130,10 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
 	hpte[0] = hpte_v;
 }
 
-long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-		    long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+		       long pte_index, unsigned long pteh, unsigned long ptel,
+		       pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
 {
-	struct kvm *kvm = vcpu->kvm;
 	unsigned long i, pa, gpa, gfn, psize;
 	unsigned long slot_fn, hva;
 	unsigned long *hpte;
@@ -147,7 +147,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	unsigned int writing;
 	unsigned long mmu_seq;
 	unsigned long rcbits;
-	bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
 
 	psize = hpte_page_size(pteh, ptel);
 	if (!psize)
@@ -201,7 +200,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 
 		/* Look up the Linux PTE for the backing page */
 		pte_size = psize;
-		pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
+		pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
 		if (pte_present(pte)) {
 			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
@@ -210,6 +209,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 			pa = pte_pfn(pte) << PAGE_SHIFT;
 		}
 	}
+
 	if (pte_size < psize)
 		return H_PARAMETER;
 	if (pa && pte_size > psize)
@@ -297,7 +297,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
 		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
+		    mmu_notifier_retry(kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
@@ -318,10 +318,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	hpte[0] = pteh;
 	asm volatile("ptesync" : : : "memory");
 
-	vcpu->arch.gpr[4] = pte_index;
+	*pte_idx_ret = pte_index;
 	return H_SUCCESS;
 }
-EXPORT_SYMBOL_GPL(kvmppc_h_enter);
+EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+		    long pte_index, unsigned long pteh, unsigned long ptel)
+{
+	return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+				 vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+}
 
 #define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
 
-- 
cgit v1.2.3


From 4879f241720cda3e6c18a1713bf9b2ed2de14ee4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 19 Nov 2012 23:01:34 +0000
Subject: KVM: PPC: Book3S HV: Fix bug causing loss of page dirty state

This fixes a bug where adding a new guest HPT entry via the H_ENTER
hcall would lose the "changed" bit in the reverse map information
for the guest physical page being mapped.  The result was that the
KVM_GET_DIRTY_LOG could return a zero bit for the page even though
the page had been modified by the guest.

This fixes it by only modifying the index and present bits in the
reverse map entry, thus preserving the reference and change bits.
We were also unnecessarily setting the reference bit, and this
fixes that too.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 362dffe4db10..ff2da5ce475c 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -59,10 +59,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 		head->back = pte_index;
 	} else {
 		rev->forw = rev->back = pte_index;
-		i = pte_index;
+		*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
+			pte_index | KVMPPC_RMAP_PRESENT;
 	}
-	smp_wmb();
-	*rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+	unlock_rmap(rmap);
 }
 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 
-- 
cgit v1.2.3


From 44e5f6be62741bd44968f40f3afa1cff1df983f2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 19 Nov 2012 22:52:49 +0000
Subject: KVM: PPC: Book3S HV: Add a mechanism for recording modified HPTEs

This uses a bit in our record of the guest view of the HPTE to record
when the HPTE gets modified.  We use a reserved bit for this, and ensure
that this bit is always cleared in HPTE values returned to the guest.

The recording of modified HPTEs is only done if other code indicates
its interest by setting kvm->arch.hpte_mod_interest to a non-zero value.
The reason for this is that when later commits add facilities for
userspace to read the HPT, the first pass of reading the HPT will be
quicker if there are no (or very few) HPTEs marked as modified,
rather than having most HPTEs marked as modified.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  9 +++++++++
 arch/powerpc/include/asm/kvm_host.h      |  1 +
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      | 28 ++++++++++++++++++++++++----
 3 files changed, 34 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 1472a5b4e4e3..b322e5bd6964 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -50,6 +50,15 @@ extern int kvm_hpt_order;		/* order of preallocated HPTs */
 #define HPTE_V_HVLOCK	0x40UL
 #define HPTE_V_ABSENT	0x20UL
 
+/*
+ * We use this bit in the guest_rpte field of the revmap entry
+ * to indicate a modified HPTE.
+ */
+#define HPTE_GR_MODIFIED	(1ul << 62)
+
+/* These bits are reserved in the guest view of the HPTE */
+#define HPTE_GR_RESERVED	HPTE_GR_MODIFIED
+
 static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
 {
 	unsigned long tmp, old;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3093896015f0..58c72646c445 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -248,6 +248,7 @@ struct kvm_arch {
 	atomic_t vcpus_running;
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
+	atomic_t hpte_mod_interest;
 	spinlock_t slot_phys_lock;
 	unsigned short last_vcpu[NR_CPUS];
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index ff2da5ce475c..ed563a5f25c8 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -66,6 +66,17 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 }
 EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
 
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+					  struct revmap_entry *rev)
+{
+	if (atomic_read(&kvm->arch.hpte_mod_interest))
+		rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+
 /* Remove this HPTE from the chain for a real page */
 static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 				struct revmap_entry *rev,
@@ -138,7 +149,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	unsigned long slot_fn, hva;
 	unsigned long *hpte;
 	struct revmap_entry *rev;
-	unsigned long g_ptel = ptel;
+	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
 	unsigned long *physp, pte_size;
 	unsigned long is_io;
@@ -153,6 +164,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		return H_PARAMETER;
 	writing = hpte_is_writable(ptel);
 	pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+	ptel &= ~HPTE_GR_RESERVED;
+	g_ptel = ptel;
 
 	/* used later to detect if we might have been invalidated */
 	mmu_seq = kvm->mmu_notifier_seq;
@@ -287,8 +300,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 	rev = &kvm->arch.revmap[pte_index];
 	if (realmode)
 		rev = real_vmalloc_addr(rev);
-	if (rev)
+	if (rev) {
 		rev->guest_rpte = g_ptel;
+		note_hpte_modification(kvm, rev);
+	}
 
 	/* Link HPTE into reverse-map chain */
 	if (pteh & HPTE_V_VALID) {
@@ -392,7 +407,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 		/* Read PTE low word after tlbie to get final R/C values */
 		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
 	}
-	r = rev->guest_rpte;
+	r = rev->guest_rpte & ~HPTE_GR_RESERVED;
+	note_hpte_modification(kvm, rev);
 	unlock_hpte(hpte, 0);
 
 	vcpu->arch.gpr[4] = v;
@@ -466,6 +482,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 
 			args[j] = ((0x80 | flags) << 56) + pte_index;
 			rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+			note_hpte_modification(kvm, rev);
 
 			if (!(hp[0] & HPTE_V_VALID)) {
 				/* insert R and C bits from PTE */
@@ -555,6 +572,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (rev) {
 		r = (rev->guest_rpte & ~mask) | bits;
 		rev->guest_rpte = r;
+		note_hpte_modification(kvm, rev);
 	}
 	r = (hpte[1] & ~mask) | bits;
 
@@ -606,8 +624,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 			v &= ~HPTE_V_ABSENT;
 			v |= HPTE_V_VALID;
 		}
-		if (v & HPTE_V_VALID)
+		if (v & HPTE_V_VALID) {
 			r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+			r &= ~HPTE_GR_RESERVED;
+		}
 		vcpu->arch.gpr[4 + i * 2] = v;
 		vcpu->arch.gpr[5 + i * 2] = r;
 	}
-- 
cgit v1.2.3


From 6b445ad4f839b06e68dd8e178e1168482ca20310 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 19 Nov 2012 22:55:44 +0000
Subject: KVM: PPC: Book3S HV: Make a HPTE removal function available

This makes a HPTE removal function, kvmppc_do_h_remove(), available
outside book3s_hv_rm_mmu.c.  This will be used by the HPT writing
code.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  3 +++
 arch/powerpc/kvm/book3s_hv_rm_mmu.c   | 19 +++++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index fea768f21cd7..46763d10ad52 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -160,6 +160,9 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel,
 			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
+extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+			unsigned long pte_index, unsigned long avpn,
+			unsigned long *hpret);
 extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
 			struct kvm_memory_slot *memslot, unsigned long *map);
 
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index ed563a5f25c8..fc3da3208fda 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -365,11 +365,10 @@ static inline int try_lock_tlbie(unsigned int *lock)
 	return old == 0;
 }
 
-long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
-		     unsigned long pte_index, unsigned long avpn,
-		     unsigned long va)
+long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+			unsigned long pte_index, unsigned long avpn,
+			unsigned long *hpret)
 {
-	struct kvm *kvm = vcpu->kvm;
 	unsigned long *hpte;
 	unsigned long v, r, rb;
 	struct revmap_entry *rev;
@@ -411,10 +410,18 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 	note_hpte_modification(kvm, rev);
 	unlock_hpte(hpte, 0);
 
-	vcpu->arch.gpr[4] = v;
-	vcpu->arch.gpr[5] = r;
+	hpret[0] = v;
+	hpret[1] = r;
 	return H_SUCCESS;
 }
+EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+		     unsigned long pte_index, unsigned long avpn)
+{
+	return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
+				  &vcpu->arch.gpr[4]);
+}
 
 long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 {
-- 
cgit v1.2.3


From a2932923ccf63c419c77aaa18ac09be98f2c94d8 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 19 Nov 2012 22:57:20 +0000
Subject: KVM: PPC: Book3S HV: Provide a method for userspace to read and write
 the HPT

A new ioctl, KVM_PPC_GET_HTAB_FD, returns a file descriptor.  Reads on
this fd return the contents of the HPT (hashed page table), writes
create and/or remove entries in the HPT.  There is a new capability,
KVM_CAP_PPC_HTAB_FD, to indicate the presence of the ioctl.  The ioctl
takes an argument structure with the index of the first HPT entry to
read out and a set of flags.  The flags indicate whether the user is
intending to read or write the HPT, and whether to return all entries
or only the "bolted" entries (those with the bolted bit, 0x10, set in
the first doubleword).

This is intended for use in implementing qemu's savevm/loadvm and for
live migration.  Therefore, on reads, the first pass returns information
about all HPTEs (or all bolted HPTEs).  When the first pass reaches the
end of the HPT, it returns from the read.  Subsequent reads only return
information about HPTEs that have changed since they were last read.
A read that finds no changed HPTEs in the HPT following where the last
read finished will return 0 bytes.

The format of the data provides a simple run-length compression of the
invalid entries.  Each block of data starts with a header that indicates
the index (position in the HPT, which is just an array), the number of
valid entries starting at that index (may be zero), and the number of
invalid entries following those valid entries.  The valid entries, 16
bytes each, follow the header.  The invalid entries are not explicitly
represented.

Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix documentation]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |  22 ++
 arch/powerpc/include/asm/kvm_ppc.h       |   2 +
 arch/powerpc/include/uapi/asm/kvm.h      |  25 +++
 arch/powerpc/kvm/book3s_64_mmu_hv.c      | 344 +++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv.c             |  12 --
 arch/powerpc/kvm/powerpc.c               |  17 ++
 6 files changed, 410 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index b322e5bd6964..38bec1dc9928 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -246,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
 	return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
 }
 
+/*
+ * This works for 4k, 64k and 16M pages on POWER7,
+ * and 4k and 16M pages on PPC970.
+ */
+static inline unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+	unsigned long senc = 0;
+
+	if (psize > 0x1000) {
+		senc = SLB_VSID_L;
+		if (psize == 0x10000)
+			senc |= SLB_VSID_LP_01;
+	}
+	return senc;
+}
+
+static inline int is_vrma_hpte(unsigned long hpte_v)
+{
+	return (hpte_v & ~0xffffffUL) ==
+		(HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
+}
+
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 609cca3e9426..1ca31e92ee75 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void);
 
 extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
 
+extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b89ae4db45ce..514883dd311e 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params {
 	__u32 reserved[8];
 };
 
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+	__u64	flags;
+	__u64	start_index;
+	__u64	reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY	((__u64)0x1)
+#define KVM_GET_HTAB_WRITE		((__u64)0x2)
+
+/*
+ * Data read on the file descriptor is formatted as a series of
+ * records, each consisting of a header followed by a series of
+ * `n_valid' HPTEs (16 bytes each), which are all valid.  Following
+ * those valid HPTEs there are `n_invalid' invalid HPTEs, which
+ * are not represented explicitly in the stream.  The same format
+ * is used for writing.
+ */
+struct kvm_get_htab_header {
+	__u32	index;
+	__u16	n_valid;
+	__u16	n_invalid;
+};
+
 #define KVM_REG_PPC_HIOR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
 #define KVM_REG_PPC_IAC1	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
 #define KVM_REG_PPC_IAC2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 6ee6516a0bee..0aa40734c8f6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -25,6 +25,8 @@
 #include <linux/hugetlb.h>
 #include <linux/vmalloc.h>
 #include <linux/srcu.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
 
 #include <asm/tlbflush.h>
 #include <asm/kvm_ppc.h>
@@ -1145,6 +1147,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
 	put_page(page);
 }
 
+/*
+ * Functions for reading and writing the hash table via reads and
+ * writes on a file descriptor.
+ *
+ * Reads return the guest view of the hash table, which has to be
+ * pieced together from the real hash table and the guest_rpte
+ * values in the revmap array.
+ *
+ * On writes, each HPTE written is considered in turn, and if it
+ * is valid, it is written to the HPT as if an H_ENTER with the
+ * exact flag set was done.  When the invalid count is non-zero
+ * in the header written to the stream, the kernel will make
+ * sure that that many HPTEs are invalid, and invalidate them
+ * if not.
+ */
+
+struct kvm_htab_ctx {
+	unsigned long	index;
+	unsigned long	flags;
+	struct kvm	*kvm;
+	int		first_pass;
+};
+
+#define HPTE_SIZE	(2 * sizeof(unsigned long))
+
+static long record_hpte(unsigned long flags, unsigned long *hptp,
+			unsigned long *hpte, struct revmap_entry *revp,
+			int want_valid, int first_pass)
+{
+	unsigned long v, r;
+	int ok = 1;
+	int valid, dirty;
+
+	/* Unmodified entries are uninteresting except on the first pass */
+	dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+	if (!first_pass && !dirty)
+		return 0;
+
+	valid = 0;
+	if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+		valid = 1;
+		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
+		    !(hptp[0] & HPTE_V_BOLTED))
+			valid = 0;
+	}
+	if (valid != want_valid)
+		return 0;
+
+	v = r = 0;
+	if (valid || dirty) {
+		/* lock the HPTE so it's stable and read it */
+		preempt_disable();
+		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+			cpu_relax();
+		v = hptp[0];
+		if (v & HPTE_V_ABSENT) {
+			v &= ~HPTE_V_ABSENT;
+			v |= HPTE_V_VALID;
+		}
+		/* re-evaluate valid and dirty from synchronized HPTE value */
+		valid = !!(v & HPTE_V_VALID);
+		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
+			valid = 0;
+		r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
+		dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+		/* only clear modified if this is the right sort of entry */
+		if (valid == want_valid && dirty) {
+			r &= ~HPTE_GR_MODIFIED;
+			revp->guest_rpte = r;
+		}
+		asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+		hptp[0] &= ~HPTE_V_HVLOCK;
+		preempt_enable();
+		if (!(valid == want_valid && (first_pass || dirty)))
+			ok = 0;
+	}
+	hpte[0] = v;
+	hpte[1] = r;
+	return ok;
+}
+
+static ssize_t kvm_htab_read(struct file *file, char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct kvm_htab_ctx *ctx = file->private_data;
+	struct kvm *kvm = ctx->kvm;
+	struct kvm_get_htab_header hdr;
+	unsigned long *hptp;
+	struct revmap_entry *revp;
+	unsigned long i, nb, nw;
+	unsigned long __user *lbuf;
+	struct kvm_get_htab_header __user *hptr;
+	unsigned long flags;
+	int first_pass;
+	unsigned long hpte[2];
+
+	if (!access_ok(VERIFY_WRITE, buf, count))
+		return -EFAULT;
+
+	first_pass = ctx->first_pass;
+	flags = ctx->flags;
+
+	i = ctx->index;
+	hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+	revp = kvm->arch.revmap + i;
+	lbuf = (unsigned long __user *)buf;
+
+	nb = 0;
+	while (nb + sizeof(hdr) + HPTE_SIZE < count) {
+		/* Initialize header */
+		hptr = (struct kvm_get_htab_header __user *)buf;
+		hdr.index = i;
+		hdr.n_valid = 0;
+		hdr.n_invalid = 0;
+		nw = nb;
+		nb += sizeof(hdr);
+		lbuf = (unsigned long __user *)(buf + sizeof(hdr));
+
+		/* Skip uninteresting entries, i.e. clean on not-first pass */
+		if (!first_pass) {
+			while (i < kvm->arch.hpt_npte &&
+			       !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
+				++i;
+				hptp += 2;
+				++revp;
+			}
+		}
+
+		/* Grab a series of valid entries */
+		while (i < kvm->arch.hpt_npte &&
+		       hdr.n_valid < 0xffff &&
+		       nb + HPTE_SIZE < count &&
+		       record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
+			/* valid entry, write it out */
+			++hdr.n_valid;
+			if (__put_user(hpte[0], lbuf) ||
+			    __put_user(hpte[1], lbuf + 1))
+				return -EFAULT;
+			nb += HPTE_SIZE;
+			lbuf += 2;
+			++i;
+			hptp += 2;
+			++revp;
+		}
+		/* Now skip invalid entries while we can */
+		while (i < kvm->arch.hpt_npte &&
+		       hdr.n_invalid < 0xffff &&
+		       record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
+			/* found an invalid entry */
+			++hdr.n_invalid;
+			++i;
+			hptp += 2;
+			++revp;
+		}
+
+		if (hdr.n_valid || hdr.n_invalid) {
+			/* write back the header */
+			if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
+				return -EFAULT;
+			nw = nb;
+			buf = (char __user *)lbuf;
+		} else {
+			nb = nw;
+		}
+
+		/* Check if we've wrapped around the hash table */
+		if (i >= kvm->arch.hpt_npte) {
+			i = 0;
+			ctx->first_pass = 0;
+			break;
+		}
+	}
+
+	ctx->index = i;
+
+	return nb;
+}
+
+static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
+			      size_t count, loff_t *ppos)
+{
+	struct kvm_htab_ctx *ctx = file->private_data;
+	struct kvm *kvm = ctx->kvm;
+	struct kvm_get_htab_header hdr;
+	unsigned long i, j;
+	unsigned long v, r;
+	unsigned long __user *lbuf;
+	unsigned long *hptp;
+	unsigned long tmp[2];
+	ssize_t nb;
+	long int err, ret;
+	int rma_setup;
+
+	if (!access_ok(VERIFY_READ, buf, count))
+		return -EFAULT;
+
+	/* lock out vcpus from running while we're doing this */
+	mutex_lock(&kvm->lock);
+	rma_setup = kvm->arch.rma_setup_done;
+	if (rma_setup) {
+		kvm->arch.rma_setup_done = 0;	/* temporarily */
+		/* order rma_setup_done vs. vcpus_running */
+		smp_mb();
+		if (atomic_read(&kvm->arch.vcpus_running)) {
+			kvm->arch.rma_setup_done = 1;
+			mutex_unlock(&kvm->lock);
+			return -EBUSY;
+		}
+	}
+
+	err = 0;
+	for (nb = 0; nb + sizeof(hdr) <= count; ) {
+		err = -EFAULT;
+		if (__copy_from_user(&hdr, buf, sizeof(hdr)))
+			break;
+
+		err = 0;
+		if (nb + hdr.n_valid * HPTE_SIZE > count)
+			break;
+
+		nb += sizeof(hdr);
+		buf += sizeof(hdr);
+
+		err = -EINVAL;
+		i = hdr.index;
+		if (i >= kvm->arch.hpt_npte ||
+		    i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
+			break;
+
+		hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+		lbuf = (unsigned long __user *)buf;
+		for (j = 0; j < hdr.n_valid; ++j) {
+			err = -EFAULT;
+			if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+				goto out;
+			err = -EINVAL;
+			if (!(v & HPTE_V_VALID))
+				goto out;
+			lbuf += 2;
+			nb += HPTE_SIZE;
+
+			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+			err = -EIO;
+			ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
+							 tmp);
+			if (ret != H_SUCCESS) {
+				pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
+				       "r=%lx\n", ret, i, v, r);
+				goto out;
+			}
+			if (!rma_setup && is_vrma_hpte(v)) {
+				unsigned long psize = hpte_page_size(v, r);
+				unsigned long senc = slb_pgsize_encoding(psize);
+				unsigned long lpcr;
+
+				kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+					(VRMA_VSID << SLB_VSID_SHIFT_1T);
+				lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+				lpcr |= senc << (LPCR_VRMASD_SH - 4);
+				kvm->arch.lpcr = lpcr;
+				rma_setup = 1;
+			}
+			++i;
+			hptp += 2;
+		}
+
+		for (j = 0; j < hdr.n_invalid; ++j) {
+			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+			++i;
+			hptp += 2;
+		}
+		err = 0;
+	}
+
+ out:
+	/* Order HPTE updates vs. rma_setup_done */
+	smp_wmb();
+	kvm->arch.rma_setup_done = rma_setup;
+	mutex_unlock(&kvm->lock);
+
+	if (err)
+		return err;
+	return nb;
+}
+
+static int kvm_htab_release(struct inode *inode, struct file *filp)
+{
+	struct kvm_htab_ctx *ctx = filp->private_data;
+
+	filp->private_data = NULL;
+	if (!(ctx->flags & KVM_GET_HTAB_WRITE))
+		atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
+	kvm_put_kvm(ctx->kvm);
+	kfree(ctx);
+	return 0;
+}
+
+static struct file_operations kvm_htab_fops = {
+	.read		= kvm_htab_read,
+	.write		= kvm_htab_write,
+	.llseek		= default_llseek,
+	.release	= kvm_htab_release,
+};
+
+int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
+{
+	int ret;
+	struct kvm_htab_ctx *ctx;
+	int rwflag;
+
+	/* reject flags we don't recognize */
+	if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
+		return -EINVAL;
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	kvm_get_kvm(kvm);
+	ctx->kvm = kvm;
+	ctx->index = ghf->start_index;
+	ctx->flags = ghf->flags;
+	ctx->first_pass = 1;
+
+	rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
+	ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
+	if (ret < 0) {
+		kvm_put_kvm(kvm);
+		return ret;
+	}
+
+	if (rwflag == O_RDONLY) {
+		mutex_lock(&kvm->slots_lock);
+		atomic_inc(&kvm->arch.hpte_mod_interest);
+		/* make sure kvmppc_do_h_enter etc. see the increment */
+		synchronize_srcu_expedited(&kvm->srcu);
+		mutex_unlock(&kvm->slots_lock);
+	}
+
+	return ret;
+}
+
 void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
 {
 	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 843eb754a1d5..a4f59dbcd800 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1563,18 +1563,6 @@ out:
 	return r;
 }
 
-static unsigned long slb_pgsize_encoding(unsigned long psize)
-{
-	unsigned long senc = 0;
-
-	if (psize > 0x1000) {
-		senc = SLB_VSID_L;
-		if (psize == 0x10000)
-			senc |= SLB_VSID_LP_01;
-	}
-	return senc;
-}
-
 static void unpin_slot(struct kvm_memory_slot *memslot)
 {
 	unsigned long *physp;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d583ea15e151..70739a089560 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -354,6 +354,12 @@ int kvm_dev_ioctl_check_extension(long ext)
 		r = 1;
 #else
 		r = 0;
+		break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+	case KVM_CAP_PPC_HTAB_FD:
+		r = 1;
+		break;
 #endif
 		break;
 	case KVM_CAP_NR_VCPUS:
@@ -954,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+
+	case KVM_PPC_GET_HTAB_FD: {
+		struct kvm *kvm = filp->private_data;
+		struct kvm_get_htab_fd ghf;
+
+		r = -EFAULT;
+		if (copy_from_user(&ghf, argp, sizeof(ghf)))
+			break;
+		r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+		break;
+	}
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
 
 #ifdef CONFIG_PPC_BOOK3S_64
-- 
cgit v1.2.3


From a64fd707481631b9682f9baeefac489bc55bbf73 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 21 Nov 2012 23:27:19 +0000
Subject: KVM: PPC: Book3S HV: Reset reverse-map chains when resetting the HPT

With HV-style KVM, we maintain reverse-mapping lists that enable us to
find all the HPT (hashed page table) entries that reference each guest
physical page, with the heads of the lists in the memslot->arch.rmap
arrays.  When we reset the HPT (i.e. when we reboot the VM), we clear
out all the HPT entries but we were not clearing out the reverse
mapping lists.  The result is that as we create new HPT entries, the
lists get corrupted, which can easily lead to loops, resulting in the
host kernel hanging when it tries to traverse those lists.

This fixes the problem by zeroing out all the reverse mapping lists
when we zero out the HPT.  This incidentally means that we are also
zeroing our record of the referenced and changed bits (not the bits
in the Linux PTEs, used by the Linux MM subsystem, but the bits used
by the KVM_GET_DIRTY_LOG ioctl, and those used by kvm_age_hva() and
kvm_test_age_hva()).

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 0aa40734c8f6..1029e2201bf6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -46,6 +46,7 @@
 static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 				long pte_index, unsigned long pteh,
 				unsigned long ptel, unsigned long *pte_idx_ret);
+static void kvmppc_rmap_reset(struct kvm *kvm);
 
 long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
 {
@@ -143,6 +144,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
 		order = kvm->arch.hpt_order;
 		/* Set the entire HPT to 0, i.e. invalid HPTEs */
 		memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
+		/*
+		 * Reset all the reverse-mapping chains for all memslots
+		 */
+		kvmppc_rmap_reset(kvm);
 		/*
 		 * Set the whole last_vcpu array to an invalid vcpu number.
 		 * This ensures that each vcpu will flush its TLB on next entry.
@@ -772,6 +777,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	goto out_put;
 }
 
+static void kvmppc_rmap_reset(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int srcu_idx;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm->memslots;
+	kvm_for_each_memslot(memslot, slots) {
+		/*
+		 * This assumes it is acceptable to lose reference and
+		 * change bits across a reset.
+		 */
+		memset(memslot->arch.rmap, 0,
+		       memslot->npages * sizeof(*memslot->arch.rmap));
+	}
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
 static int kvm_handle_hva_range(struct kvm *kvm,
 				unsigned long start,
 				unsigned long end,
-- 
cgit v1.2.3


From 05dd85f7933ffbe6d71415e631c95ca615ae1e81 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 21 Nov 2012 23:29:12 +0000
Subject: KVM: PPC: Book3S HV: Report correct HPT entry index when reading HPT

This fixes a bug in the code which allows userspace to read out the
contents of the guest's hashed page table (HPT).  On the second and
subsequent passes through the HPT, when we are reporting only those
entries that have changed, we were incorrectly initializing the index
field of the header with the index of the first entry we skipped
rather than the first changed entry.  This fixes it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 1029e2201bf6..ac6b5acb99b9 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1282,7 +1282,6 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 	while (nb + sizeof(hdr) + HPTE_SIZE < count) {
 		/* Initialize header */
 		hptr = (struct kvm_get_htab_header __user *)buf;
-		hdr.index = i;
 		hdr.n_valid = 0;
 		hdr.n_invalid = 0;
 		nw = nb;
@@ -1298,6 +1297,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 				++revp;
 			}
 		}
+		hdr.index = i;
 
 		/* Grab a series of valid entries */
 		while (i < kvm->arch.hpt_npte &&
-- 
cgit v1.2.3


From 1cc8ed0b13ae6e076a1dd1f18da508b48c7aa05a Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 21 Nov 2012 23:28:41 +0000
Subject: KVM: PPC: Book3S HV: Don't give the guest RW access to RO pages

Currently, if the guest does an H_PROTECT hcall requesting that the
permissions on a HPT entry be changed to allow writing, we make the
requested change even if the page is marked read-only in the host
Linux page tables.  This is a problem since it would for instance
allow a guest to modify a page that KSM has decided can be shared
between multiple guests.

To fix this, if the new permissions for the page allow writing, we need
to look up the memslot for the page, work out the host virtual address,
and look up the Linux page tables to get the PTE for the page.  If that
PTE is read-only, we reduce the HPTE permissions to read-only.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_hv_rm_mmu.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fc3da3208fda..7a57ea49172d 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -600,6 +600,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			asm volatile("tlbiel %0" : : "r" (rb));
 			asm volatile("ptesync" : : : "memory");
 		}
+		/*
+		 * If the host has this page as readonly but the guest
+		 * wants to make it read/write, reduce the permissions.
+		 * Checking the host permissions involves finding the
+		 * memslot and then the Linux PTE for the page.
+		 */
+		if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
+			unsigned long psize, gfn, hva;
+			struct kvm_memory_slot *memslot;
+			pgd_t *pgdir = vcpu->arch.pgdir;
+			pte_t pte;
+
+			psize = hpte_page_size(v, r);
+			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+			memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+			if (memslot) {
+				hva = __gfn_to_hva_memslot(memslot, gfn);
+				pte = lookup_linux_pte(pgdir, hva, 1, &psize);
+				if (pte_present(pte) && !pte_write(pte))
+					r = hpte_make_readonly(r);
+			}
+		}
 	}
 	hpte[1] = r;
 	eieio();
-- 
cgit v1.2.3


From b0a94d4e23201c7559bb8f8657cfb629561288f2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sun, 4 Nov 2012 18:15:43 +0000
Subject: KVM: PPC: Book3S PR: Emulate PURR, SPURR and DSCR registers

This adds basic emulation of the PURR and SPURR registers.  We assume
we are emulating a single-threaded core, so these advance at the same
rate as the timebase.  A Linux kernel running on a POWER7 expects to
be able to access these registers and is not prepared to handle a
program interrupt on accessing them.

This also adds a very minimal emulation of the DSCR (data stream
control register).  Writes are ignored and reads return zero.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s.h |  2 ++
 arch/powerpc/kvm/book3s_emulate.c     | 16 +++++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 46763d10ad52..5a56e1c5f851 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -81,6 +81,8 @@ struct kvmppc_vcpu_book3s {
 	u64 sdr1;
 	u64 hior;
 	u64 msr_mask;
+	u64 purr_offset;
+	u64 spurr_offset;
 #ifdef CONFIG_PPC_BOOK3S_32
 	u32 vsid_pool[VSID_POOL_SIZE];
 	u32 vsid_next;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index b9a989dc76cc..d31a716f7f2b 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -22,6 +22,7 @@
 #include <asm/kvm_book3s.h>
 #include <asm/reg.h>
 #include <asm/switch_to.h>
+#include <asm/time.h>
 
 #define OP_19_XOP_RFID		18
 #define OP_19_XOP_RFI		50
@@ -395,6 +396,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		    (mfmsr() & MSR_HV))
 			vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 		break;
+	case SPRN_PURR:
+		to_book3s(vcpu)->purr_offset = spr_val - get_tb();
+		break;
+	case SPRN_SPURR:
+		to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
+		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
@@ -412,6 +419,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_CTRLF:
 	case SPRN_CTRLT:
 	case SPRN_L2CR:
+	case SPRN_DSCR:
 	case SPRN_MMCR0_GEKKO:
 	case SPRN_MMCR1_GEKKO:
 	case SPRN_PMC1_GEKKO:
@@ -483,9 +491,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 		*spr_val = to_book3s(vcpu)->hid[5];
 		break;
 	case SPRN_CFAR:
-	case SPRN_PURR:
+	case SPRN_DSCR:
 		*spr_val = 0;
 		break;
+	case SPRN_PURR:
+		*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+		break;
+	case SPRN_SPURR:
+		*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
-- 
cgit v1.2.3


From 28c483b62fcd2589dadfc1250970f85aa0ab3df6 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sun, 4 Nov 2012 18:16:46 +0000
Subject: KVM: PPC: Book3S PR: Fix VSX handling

This fixes various issues in how we were handling the VSX registers
that exist on POWER7 machines.  First, we were running off the end
of the current->thread.fpr[] array.  Ultimately this was because the
vcpu->arch.vsr[] array is sized to be able to store both the FP
registers and the extra VSX registers (i.e. 64 entries), but PR KVM
only uses it for the extra VSX registers (i.e. 32 entries).

Secondly, calling load_up_vsx() from C code is a really bad idea,
because it jumps to fast_exception_return at the end, rather than
returning with a blr instruction.  This was causing it to jump off
to a random location with random register contents, since it was using
the largely uninitialized stack frame created by kvmppc_load_up_vsx.

In fact, it isn't necessary to call either __giveup_vsx or load_up_vsx,
since giveup_fpu and load_up_fpu handle the extra VSX registers as well
as the standard FP registers on machines with VSX.  Also, since VSX
instructions can access the VMX registers and the FP registers as well
as the extra VSX registers, we have to load up the FP and VMX registers
before we can turn on the MSR_VSX bit for the guest.  Conversely, if
we save away any of the VSX or FP registers, we have to turn off MSR_VSX
for the guest.

To handle all this, it is more convenient for a single call to
kvmppc_giveup_ext() to handle all the state saving that needs to be done,
so we make it take a set of MSR bits rather than just one, and the switch
statement becomes a series of if statements.  Similarly kvmppc_handle_ext
needs to be able to load up more than one set of registers.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/reg.h       |   1 +
 arch/powerpc/kvm/book3s_exports.c    |   3 -
 arch/powerpc/kvm/book3s_pr.c         | 112 +++++++++++++++++++----------------
 arch/powerpc/kvm/book3s_rmhandlers.S |   3 -
 4 files changed, 62 insertions(+), 57 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d24c14163966..97d37278ea2d 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -518,6 +518,7 @@
 #define	  SRR1_WS_DEEPER	0x00020000 /* Some resources not maintained */
 #define	  SRR1_WS_DEEP		0x00010000 /* All resources maintained */
 #define   SRR1_PROGFPE		0x00100000 /* Floating Point Enabled */
+#define   SRR1_PROGILL		0x00080000 /* Illegal instruction */
 #define   SRR1_PROGPRIV		0x00040000 /* Privileged instruction */
 #define   SRR1_PROGTRAP		0x00020000 /* Trap */
 #define   SRR1_PROGADDR		0x00010000 /* SRR0 contains subsequent addr */
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index a150817d6d4c..7057a02f0906 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -28,8 +28,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
 #ifdef CONFIG_ALTIVEC
 EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
 #endif
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
-#endif
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b853696b6d8e..5c496ecf5718 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -81,9 +81,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
 	svcpu_put(svcpu);
 #endif
 
-	kvmppc_giveup_ext(vcpu, MSR_FP);
-	kvmppc_giveup_ext(vcpu, MSR_VEC);
-	kvmppc_giveup_ext(vcpu, MSR_VSX);
+	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	vcpu->cpu = -1;
 }
 
@@ -433,10 +431,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 
 static inline int get_fpr_index(int i)
 {
-#ifdef CONFIG_VSX
-	i *= 2;
-#endif
-	return i;
+	return i * TS_FPRWIDTH;
 }
 
 /* Give up external provider (FPU, Altivec, VSX) */
@@ -450,41 +445,49 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 	u64 *thread_fpr = (u64*)t->fpr;
 	int i;
 
-	if (!(vcpu->arch.guest_owned_ext & msr))
+	/*
+	 * VSX instructions can access FP and vector registers, so if
+	 * we are giving up VSX, make sure we give up FP and VMX as well.
+	 */
+	if (msr & MSR_VSX)
+		msr |= MSR_FP | MSR_VEC;
+
+	msr &= vcpu->arch.guest_owned_ext;
+	if (!msr)
 		return;
 
 #ifdef DEBUG_EXT
 	printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
 #endif
 
-	switch (msr) {
-	case MSR_FP:
+	if (msr & MSR_FP) {
+		/*
+		 * Note that on CPUs with VSX, giveup_fpu stores
+		 * both the traditional FP registers and the added VSX
+		 * registers into thread.fpr[].
+		 */
 		giveup_fpu(current);
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
 
 		vcpu->arch.fpscr = t->fpscr.val;
-		break;
-	case MSR_VEC:
+
+#ifdef CONFIG_VSX
+		if (cpu_has_feature(CPU_FTR_VSX))
+			for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
+				vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+	}
+
 #ifdef CONFIG_ALTIVEC
+	if (msr & MSR_VEC) {
 		giveup_altivec(current);
 		memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
 		vcpu->arch.vscr = t->vscr;
-#endif
-		break;
-	case MSR_VSX:
-#ifdef CONFIG_VSX
-		__giveup_vsx(current);
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
-			vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
-#endif
-		break;
-	default:
-		BUG();
 	}
+#endif
 
-	vcpu->arch.guest_owned_ext &= ~msr;
-	current->thread.regs->msr &= ~msr;
+	vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
 	kvmppc_recalc_shadow_msr(vcpu);
 }
 
@@ -544,47 +547,56 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 		return RESUME_GUEST;
 	}
 
-	/* We already own the ext */
-	if (vcpu->arch.guest_owned_ext & msr) {
-		return RESUME_GUEST;
+	if (msr == MSR_VSX) {
+		/* No VSX?  Give an illegal instruction interrupt */
+#ifdef CONFIG_VSX
+		if (!cpu_has_feature(CPU_FTR_VSX))
+#endif
+		{
+			kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+			return RESUME_GUEST;
+		}
+
+		/*
+		 * We have to load up all the FP and VMX registers before
+		 * we can let the guest use VSX instructions.
+		 */
+		msr = MSR_FP | MSR_VEC | MSR_VSX;
 	}
 
+	/* See if we already own all the ext(s) needed */
+	msr &= ~vcpu->arch.guest_owned_ext;
+	if (!msr)
+		return RESUME_GUEST;
+
 #ifdef DEBUG_EXT
 	printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
 #endif
 
 	current->thread.regs->msr |= msr;
 
-	switch (msr) {
-	case MSR_FP:
+	if (msr & MSR_FP) {
 		for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
 			thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
-
+#ifdef CONFIG_VSX
+		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
+			thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+#endif
 		t->fpscr.val = vcpu->arch.fpscr;
 		t->fpexc_mode = 0;
 		kvmppc_load_up_fpu();
-		break;
-	case MSR_VEC:
+	}
+
+	if (msr & MSR_VEC) {
 #ifdef CONFIG_ALTIVEC
 		memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
 		t->vscr = vcpu->arch.vscr;
 		t->vrsave = -1;
 		kvmppc_load_up_altivec();
 #endif
-		break;
-	case MSR_VSX:
-#ifdef CONFIG_VSX
-		for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
-			thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
-		kvmppc_load_up_vsx();
-#endif
-		break;
-	default:
-		BUG();
 	}
 
 	vcpu->arch.guest_owned_ext |= msr;
-
 	kvmppc_recalc_shadow_msr(vcpu);
 
 	return RESUME_GUEST;
@@ -1134,7 +1146,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* Save VSX state in stack */
 	used_vsr = current->thread.used_vsr;
 	if (used_vsr && (current->thread.regs->msr & MSR_VSX))
-			__giveup_vsx(current);
+		__giveup_vsx(current);
 #endif
 
 	/* Remember the MSR with disabled extensions */
@@ -1151,14 +1163,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 	/* No need for kvm_guest_exit. It's done in handle_exit.
 	   We also get here with interrupts enabled. */
 
-	current->thread.regs->msr = ext_msr;
-
 	/* Make sure we save the guest FPU/Altivec/VSX state */
-	kvmppc_giveup_ext(vcpu, MSR_FP);
-	kvmppc_giveup_ext(vcpu, MSR_VEC);
-	kvmppc_giveup_ext(vcpu, MSR_VSX);
+	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+
+	current->thread.regs->msr = ext_msr;
 
-	/* Restore FPU state from stack */
+	/* Restore FPU/VSX state from stack */
 	memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
 	current->thread.fpscr.val = fpscr;
 	current->thread.fpexc_mode = fpexc_mode;
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index b2f8258b545a..8f7633e3afb8 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -234,8 +234,5 @@ define_load_up(fpu)
 #ifdef CONFIG_ALTIVEC
 define_load_up(altivec)
 #endif
-#ifdef CONFIG_VSX
-define_load_up(vsx)
-#endif
 
 #include "book3s_segment.S"
-- 
cgit v1.2.3


From 3a2e7b0d761ae3faecdb43482d178b5fe2e3b8a5 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sun, 4 Nov 2012 18:17:28 +0000
Subject: KVM: PPC: Book3S PR: MSR_DE doesn't exist on Book 3S

The mask of MSR bits that get transferred from the guest MSR to the
shadow MSR included MSR_DE.  In fact that bit only exists on Book 3E
processors, and it is assigned the same bit used for MSR_BE on Book 3S
processors.  Since we already had MSR_BE in the mask, this just removes
MSR_DE.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/book3s_pr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 5c496ecf5718..28d38adeca73 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -145,7 +145,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 	ulong smsr = vcpu->arch.shared->msr;
 
 	/* Guest MSR values */
-	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
+	smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
 	/* Process MSR values */
 	smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
 	/* External providers the guest reserved */
-- 
cgit v1.2.3


From 1b400ba0cd24a5994d792c7cfa0ee24cac266d3c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Wed, 21 Nov 2012 23:28:08 +0000
Subject: KVM: PPC: Book3S HV: Improve handling of local vs. global TLB
 invalidations

When we change or remove a HPT (hashed page table) entry, we can do
either a global TLB invalidation (tlbie) that works across the whole
machine, or a local invalidation (tlbiel) that only affects this core.
Currently we do local invalidations if the VM has only one vcpu or if
the guest requests it with the H_LOCAL flag, though the guest Linux
kernel currently doesn't ever use H_LOCAL.  Then, to cope with the
possibility that vcpus moving around to different physical cores might
expose stale TLB entries, there is some code in kvmppc_hv_entry to
flush the whole TLB of entries for this VM if either this vcpu is now
running on a different physical core from where it last ran, or if this
physical core last ran a different vcpu.

There are a number of problems on POWER7 with this as it stands:

- The TLB invalidation is done per thread, whereas it only needs to be
  done per core, since the TLB is shared between the threads.
- With the possibility of the host paging out guest pages, the use of
  H_LOCAL by an SMP guest is dangerous since the guest could possibly
  retain and use a stale TLB entry pointing to a page that had been
  removed from the guest.
- The TLB invalidations that we do when a vcpu moves from one physical
  core to another are unnecessary in the case of an SMP guest that isn't
  using H_LOCAL.
- The optimization of using local invalidations rather than global should
  apply to guests with one virtual core, not just one vcpu.

(None of this applies on PPC970, since there we always have to
invalidate the whole TLB when entering and leaving the guest, and we
can't support paging out guest memory.)

To fix these problems and simplify the code, we now maintain a simple
cpumask of which cpus need to flush the TLB on entry to the guest.
(This is indexed by cpu, though we only ever use the bits for thread
0 of each core.)  Whenever we do a local TLB invalidation, we set the
bits for every cpu except the bit for thread 0 of the core that we're
currently running on.  Whenever we enter a guest, we test and clear the
bit for our core, and flush the TLB if it was set.

On initial startup of the VM, and when resetting the HPT, we set all the
bits in the need_tlb_flush cpumask, since any core could potentially have
stale TLB entries from the previous VM to use the same LPID, or the
previous contents of the HPT.

Then, we maintain a count of the number of online virtual cores, and use
that when deciding whether to use a local invalidation rather than the
number of online vcpus.  The code to make that decision is extracted out
into a new function, global_invalidates().  For multi-core guests on
POWER7 (i.e. when we are using mmu notifiers), we now never do local
invalidations regardless of the H_LOCAL flag.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h     |  5 +--
 arch/powerpc/kernel/asm-offsets.c       |  4 +--
 arch/powerpc/kvm/book3s_64_mmu_hv.c     |  7 ++---
 arch/powerpc/kvm/book3s_hv.c            |  9 +++++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c     | 37 +++++++++++++++++++---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 56 +++++++++++++++------------------
 6 files changed, 73 insertions(+), 45 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 58c72646c445..62fbd38b15fa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -246,11 +246,12 @@ struct kvm_arch {
 	int using_mmu_notifiers;
 	u32 hpt_order;
 	atomic_t vcpus_running;
+	u32 online_vcores;
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
 	atomic_t hpte_mod_interest;
 	spinlock_t slot_phys_lock;
-	unsigned short last_vcpu[NR_CPUS];
+	cpumask_t need_tlb_flush;
 	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 	struct kvmppc_linear_info *hpt_li;
 #endif /* CONFIG_KVM_BOOK3S_64_HV */
@@ -275,6 +276,7 @@ struct kvmppc_vcore {
 	int nap_count;
 	int napping_threads;
 	u16 pcpu;
+	u16 last_cpu;
 	u8 vcore_state;
 	u8 in_guest;
 	struct list_head runnable_threads;
@@ -523,7 +525,6 @@ struct kvm_vcpu_arch {
 	u64 dec_jiffies;
 	u64 dec_expires;
 	unsigned long pending_exceptions;
-	u16 last_cpu;
 	u8 ceded;
 	u8 prodded;
 	u32 last_inst;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7523539cfe9f..4e23ba2f3ca7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -441,8 +441,7 @@ int main(void)
 	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
 	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
-	DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter));
-	DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
+	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
 	DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
 	DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
@@ -470,7 +469,6 @@ int main(void)
 	DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
 	DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
 	DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
-	DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
 	DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
 	DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ac6b5acb99b9..8cc18abd6dde 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -148,11 +148,8 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
 		 * Reset all the reverse-mapping chains for all memslots
 		 */
 		kvmppc_rmap_reset(kvm);
-		/*
-		 * Set the whole last_vcpu array to an invalid vcpu number.
-		 * This ensures that each vcpu will flush its TLB on next entry.
-		 */
-		memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
+		/* Ensure that each vcpu will flush its TLB on next entry. */
+		cpumask_setall(&kvm->arch.need_tlb_flush);
 		*htab_orderp = order;
 		err = 0;
 	} else {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a4f59dbcd800..ddbec60cb0d2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -853,7 +853,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 		goto free_vcpu;
 
 	vcpu->arch.shared = &vcpu->arch.shregs;
-	vcpu->arch.last_cpu = -1;
 	vcpu->arch.mmcr[0] = MMCR0_FC;
 	vcpu->arch.ctrl = CTRL_RUNLATCH;
 	/* default to host PVR, since we can't spoof it */
@@ -880,6 +879,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
 			vcore->preempt_tb = TB_NIL;
 		}
 		kvm->arch.vcores[core] = vcore;
+		kvm->arch.online_vcores++;
 	}
 	mutex_unlock(&kvm->lock);
 
@@ -1802,6 +1802,13 @@ int kvmppc_core_init_vm(struct kvm *kvm)
 		return -ENOMEM;
 	kvm->arch.lpid = lpid;
 
+	/*
+	 * Since we don't flush the TLB when tearing down a VM,
+	 * and this lpid might have previously been used,
+	 * make sure we flush on each core before running the new VM.
+	 */
+	cpumask_setall(&kvm->arch.need_tlb_flush);
+
 	INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
 
 	kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7a57ea49172d..19c93bae1aea 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x)
 	return __va(addr);
 }
 
+/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
+static int global_invalidates(struct kvm *kvm, unsigned long flags)
+{
+	int global;
+
+	/*
+	 * If there is only one vcore, and it's currently running,
+	 * we can use tlbiel as long as we mark all other physical
+	 * cores as potentially having stale TLB entries for this lpid.
+	 * If we're not using MMU notifiers, we never take pages away
+	 * from the guest, so we can use tlbiel if requested.
+	 * Otherwise, don't use tlbiel.
+	 */
+	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
+		global = 0;
+	else if (kvm->arch.using_mmu_notifiers)
+		global = 1;
+	else
+		global = !(flags & H_LOCAL);
+
+	if (!global) {
+		/* any other core might now have stale TLB entries... */
+		smp_wmb();
+		cpumask_setall(&kvm->arch.need_tlb_flush);
+		cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
+				  &kvm->arch.need_tlb_flush);
+	}
+
+	return global;
+}
+
 /*
  * Add this HPTE into the chain for the real page.
  * Must be called with the chain locked; it unlocks the chain.
@@ -390,7 +421,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 	if (v & HPTE_V_VALID) {
 		hpte[0] &= ~HPTE_V_VALID;
 		rb = compute_tlbie_rb(v, hpte[1], pte_index);
-		if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+		if (global_invalidates(kvm, flags)) {
 			while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 				cpu_relax();
 			asm volatile("ptesync" : : : "memory");
@@ -565,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		return H_NOT_FOUND;
 	}
 
-	if (atomic_read(&kvm->online_vcpus) == 1)
-		flags |= H_LOCAL;
 	v = hpte[0];
 	bits = (flags << 55) & HPTE_R_PP0;
 	bits |= (flags << 48) & HPTE_R_KEY_HI;
@@ -587,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (v & HPTE_V_VALID) {
 		rb = compute_tlbie_rb(v, r, pte_index);
 		hpte[0] = v & ~HPTE_V_VALID;
-		if (!(flags & H_LOCAL)) {
+		if (global_invalidates(kvm, flags)) {
 			while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
 				cpu_relax();
 			asm volatile("ptesync" : : : "memory");
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 690d1120402d..b48bd53dd771 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -313,7 +313,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
 	mtspr	SPRN_LPID,r7
 	isync
+
+	/* See if we need to flush the TLB */
+	lhz	r6,PACAPACAINDEX(r13)	/* test_bit(cpu, need_tlb_flush) */
+	clrldi	r7,r6,64-6		/* extract bit number (6 bits) */
+	srdi	r6,r6,6			/* doubleword number */
+	sldi	r6,r6,3			/* address offset */
+	add	r6,r6,r9
+	addi	r6,r6,KVM_NEED_FLUSH	/* dword in kvm->arch.need_tlb_flush */
 	li	r0,1
+	sld	r0,r0,r7
+	ld	r7,0(r6)
+	and.	r7,r7,r0
+	beq	22f
+23:	ldarx	r7,0,r6			/* if set, clear the bit */
+	andc	r7,r7,r0
+	stdcx.	r7,0,r6
+	bne	23b
+	li	r6,128			/* and flush the TLB */
+	mtctr	r6
+	li	r7,0x800		/* IS field = 0b10 */
+	ptesync
+28:	tlbiel	r7
+	addi	r7,r7,0x1000
+	bdnz	28b
+	ptesync
+
+22:	li	r0,1
 	stb	r0,VCORE_IN_GUEST(r5)	/* signal secondaries to continue */
 	b	10f
 
@@ -336,36 +362,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mr	r9,r4
 	blt	hdec_soon
 
-	/*
-	 * Invalidate the TLB if we could possibly have stale TLB
-	 * entries for this partition on this core due to the use
-	 * of tlbiel.
-	 * XXX maybe only need this on primary thread?
-	 */
-	ld	r9,VCPU_KVM(r4)		/* pointer to struct kvm */
-	lwz	r5,VCPU_VCPUID(r4)
-	lhz	r6,PACAPACAINDEX(r13)
-	rldimi	r6,r5,0,62		/* XXX map as if threads 1:1 p:v */
-	lhz	r8,VCPU_LAST_CPU(r4)
-	sldi	r7,r6,1			/* see if this is the same vcpu */
-	add	r7,r7,r9		/* as last ran on this pcpu */
-	lhz	r0,KVM_LAST_VCPU(r7)
-	cmpw	r6,r8			/* on the same cpu core as last time? */
-	bne	3f
-	cmpw	r0,r5			/* same vcpu as this core last ran? */
-	beq	1f
-3:	sth	r6,VCPU_LAST_CPU(r4)	/* if not, invalidate partition TLB */
-	sth	r5,KVM_LAST_VCPU(r7)
-	li	r6,128
-	mtctr	r6
-	li	r7,0x800		/* IS field = 0b10 */
-	ptesync
-2:	tlbiel	r7
-	addi	r7,r7,0x1000
-	bdnz	2b
-	ptesync
-1:
-
 	/* Save purr/spurr */
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
-- 
cgit v1.2.3


From b4072df4076c4f33ac9f518052c318c979bca533 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 23 Nov 2012 22:37:50 +0000
Subject: KVM: PPC: Book3S HV: Handle guest-caused machine checks on POWER7
 without panicking

Currently, if a machine check interrupt happens while we are in the
guest, we exit the guest and call the host's machine check handler,
which tends to cause the host to panic.  Some machine checks can be
triggered by the guest; for example, if the guest creates two entries
in the SLB that map the same effective address, and then accesses that
effective address, the CPU will take a machine check interrupt.

To handle this better, when a machine check happens inside the guest,
we call a new function, kvmppc_realmode_machine_check(), while still in
real mode before exiting the guest.  On POWER7, it handles the cases
that the guest can trigger, either by flushing and reloading the SLB,
or by flushing the TLB, and then it delivers the machine check interrupt
directly to the guest without going back to the host.  On POWER7, the
OPAL firmware patches the machine check interrupt vector so that it
gets control first, and it leaves behind its analysis of the situation
in a structure pointed to by the opal_mc_evt field of the paca.  The
kvmppc_realmode_machine_check() function looks at this, and if OPAL
reports that there was no error, or that it has handled the error, we
also go straight back to the guest with a machine check.  We have to
deliver a machine check to the guest since the machine check interrupt
might have trashed valid values in SRR0/1.

If the machine check is one we can't handle in real mode, and one that
OPAL hasn't already handled, or on PPC970, we exit the guest and call
the host's machine check handler.  We do this by jumping to the
machine_check_fwnmi label, rather than absolute address 0x200, because
we don't want to re-execute OPAL's handler on POWER7.  On PPC970, the
two are equivalent because address 0x200 just contains a branch.

Then, if the host machine check handler decides that the system can
continue executing, kvmppc_handle_exit() delivers a machine check
interrupt to the guest -- once again to let the guest know that SRR0/1
have been modified.

Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix checkpatch warnings]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/mmu-hash64.h   |  10 +++
 arch/powerpc/kvm/Makefile               |   1 +
 arch/powerpc/kvm/book3s_hv.c            |  11 +++
 arch/powerpc/kvm/book3s_hv_ras.c        | 144 ++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  75 ++++++++++-------
 5 files changed, 213 insertions(+), 28 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_ras.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 9673f73eb8db..2fdb47a19efd 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -121,6 +121,16 @@ extern char initial_stab[];
 #define PP_RXRX 3	/* Supervisor read,       User read */
 #define PP_RXXX	(HPTE_R_PP0 | 2)	/* Supervisor read, user none */
 
+/* Fields for tlbiel instruction in architecture 2.06 */
+#define TLBIEL_INVAL_SEL_MASK	0xc00	/* invalidation selector */
+#define  TLBIEL_INVAL_PAGE	0x000	/* invalidate a single page */
+#define  TLBIEL_INVAL_SET_LPID	0x800	/* invalidate a set for current LPID */
+#define  TLBIEL_INVAL_SET	0xc00	/* invalidate a set for all LPIDs */
+#define TLBIEL_INVAL_SET_MASK	0xfff000	/* set number to inval. */
+#define TLBIEL_INVAL_SET_SHIFT	12
+
+#define POWER7_TLB_SETS		128	/* # sets in POWER7 TLB */
+
 #ifndef __ASSEMBLY__
 
 struct hash_pte {
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index cd8965828676..1e473d46322c 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -73,6 +73,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
 	book3s_hv_rmhandlers.o \
 	book3s_hv_rm_mmu.o \
 	book3s_64_vio_hv.o \
+	book3s_hv_ras.o \
 	book3s_hv_builtin.o
 
 kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ddbec60cb0d2..71d0c90b62bf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -545,6 +545,17 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_PERFMON:
 		r = RESUME_GUEST;
 		break;
+	case BOOK3S_INTERRUPT_MACHINE_CHECK:
+		/*
+		 * Deliver a machine check interrupt to the guest.
+		 * We have to do this, even if the host has handled the
+		 * machine check, because machine checks use SRR0/1 and
+		 * the interrupt might have trashed guest state in them.
+		 */
+		kvmppc_book3s_queue_irqprio(vcpu,
+					    BOOK3S_INTERRUPT_MACHINE_CHECK);
+		r = RESUME_GUEST;
+		break;
 	case BOOK3S_INTERRUPT_PROGRAM:
 	{
 		ulong flags;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
new file mode 100644
index 000000000000..35f3cf0269b3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -0,0 +1,144 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/kernel.h>
+#include <asm/opal.h>
+
+/* SRR1 bits for machine check on POWER7 */
+#define SRR1_MC_LDSTERR		(1ul << (63-42))
+#define SRR1_MC_IFETCH_SH	(63-45)
+#define SRR1_MC_IFETCH_MASK	0x7
+#define SRR1_MC_IFETCH_SLBPAR		2	/* SLB parity error */
+#define SRR1_MC_IFETCH_SLBMULTI		3	/* SLB multi-hit */
+#define SRR1_MC_IFETCH_SLBPARMULTI	4	/* SLB parity + multi-hit */
+#define SRR1_MC_IFETCH_TLBMULTI		5	/* I-TLB multi-hit */
+
+/* DSISR bits for machine check on POWER7 */
+#define DSISR_MC_DERAT_MULTI	0x800		/* D-ERAT multi-hit */
+#define DSISR_MC_TLB_MULTI	0x400		/* D-TLB multi-hit */
+#define DSISR_MC_SLB_PARITY	0x100		/* SLB parity error */
+#define DSISR_MC_SLB_MULTI	0x080		/* SLB multi-hit */
+#define DSISR_MC_SLB_PARMULTI	0x040		/* SLB parity + multi-hit */
+
+/* POWER7 SLB flush and reload */
+static void reload_slb(struct kvm_vcpu *vcpu)
+{
+	struct slb_shadow *slb;
+	unsigned long i, n;
+
+	/* First clear out SLB */
+	asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+	/* Do they have an SLB shadow buffer registered? */
+	slb = vcpu->arch.slb_shadow.pinned_addr;
+	if (!slb)
+		return;
+
+	/* Sanity check */
+	n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+	if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
+		return;
+
+	/* Load up the SLB from that */
+	for (i = 0; i < n; ++i) {
+		unsigned long rb = slb->save_area[i].esid;
+		unsigned long rs = slb->save_area[i].vsid;
+
+		rb = (rb & ~0xFFFul) | i;	/* insert entry number */
+		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+	}
+}
+
+/* POWER7 TLB flush */
+static void flush_tlb_power7(struct kvm_vcpu *vcpu)
+{
+	unsigned long i, rb;
+
+	rb = TLBIEL_INVAL_SET_LPID;
+	for (i = 0; i < POWER7_TLB_SETS; ++i) {
+		asm volatile("tlbiel %0" : : "r" (rb));
+		rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+	}
+}
+
+/*
+ * On POWER7, see if we can handle a machine check that occurred inside
+ * the guest in real mode, without switching to the host partition.
+ *
+ * Returns: 0 => exit guest, 1 => deliver machine check to guest
+ */
+static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+{
+	unsigned long srr1 = vcpu->arch.shregs.msr;
+	struct opal_machine_check_event *opal_evt;
+	long handled = 1;
+
+	if (srr1 & SRR1_MC_LDSTERR) {
+		/* error on load/store */
+		unsigned long dsisr = vcpu->arch.shregs.dsisr;
+
+		if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+			     DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
+			/* flush and reload SLB; flushes D-ERAT too */
+			reload_slb(vcpu);
+			dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+				   DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
+		}
+		if (dsisr & DSISR_MC_TLB_MULTI) {
+			flush_tlb_power7(vcpu);
+			dsisr &= ~DSISR_MC_TLB_MULTI;
+		}
+		/* Any other errors we don't understand? */
+		if (dsisr & 0xffffffffUL)
+			handled = 0;
+	}
+
+	switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
+	case 0:
+		break;
+	case SRR1_MC_IFETCH_SLBPAR:
+	case SRR1_MC_IFETCH_SLBMULTI:
+	case SRR1_MC_IFETCH_SLBPARMULTI:
+		reload_slb(vcpu);
+		break;
+	case SRR1_MC_IFETCH_TLBMULTI:
+		flush_tlb_power7(vcpu);
+		break;
+	default:
+		handled = 0;
+	}
+
+	/*
+	 * See if OPAL has already handled the condition.
+	 * We assume that if the condition is recovered then OPAL
+	 * will have generated an error log event that we will pick
+	 * up and log later.
+	 */
+	opal_evt = local_paca->opal_mc_evt;
+	if (opal_evt->version == OpalMCE_V1 &&
+	    (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
+	     opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
+		handled = 1;
+
+	if (handled)
+		opal_evt->in_use = 0;
+
+	return handled;
+}
+
+long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_206))
+		return kvmppc_realmode_mc_power7(vcpu);
+
+	return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b48bd53dd771..10b6c358dd77 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -27,6 +27,7 @@
 #include <asm/asm-offsets.h>
 #include <asm/exception-64s.h>
 #include <asm/kvm_book3s_asm.h>
+#include <asm/mmu-hash64.h>
 
 /*****************************************************************************
  *                                                                           *
@@ -678,8 +679,7 @@ BEGIN_FTR_SECTION
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
-nohpte_cont:
-hcall_real_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
+guest_exit_cont:		/* r9 = vcpu, r12 = trap, r13 = paca */
 	/* Save DEC */
 	mfspr	r5,SPRN_DEC
 	mftb	r6
@@ -700,6 +700,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r6, VCPU_FAULT_DAR(r9)
 	stw	r7, VCPU_FAULT_DSISR(r9)
 
+	/* See if it is a machine check */
+	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+	beq	machine_check_realmode
+mc_cont:
+
 	/* Save guest CTRL register, set runlatch to 1 */
 6:	mfspr	r6,SPRN_CTRLF
 	stw	r6,VCPU_CTRL(r9)
@@ -1112,38 +1117,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
 	 * For external and machine check interrupts, we need
 	 * to call the Linux handler to process the interrupt.
-	 * We do that by jumping to the interrupt vector address
-	 * which we have in r12.  The [h]rfid at the end of the
+	 * We do that by jumping to absolute address 0x500 for
+	 * external interrupts, or the machine_check_fwnmi label
+	 * for machine checks (since firmware might have patched
+	 * the vector area at 0x200).  The [h]rfid at the end of the
 	 * handler will return to the book3s_hv_interrupts.S code.
 	 * For other interrupts we do the rfid to get back
-	 * to the book3s_interrupts.S code here.
+	 * to the book3s_hv_interrupts.S code here.
 	 */
 	ld	r8, HSTATE_VMHANDLER(r13)
 	ld	r7, HSTATE_HOST_MSR(r13)
 
+	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
+BEGIN_FTR_SECTION
 	beq	11f
-	cmpwi	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* RFI into the highmem handler, or branch to interrupt handler */
-12:	mfmsr	r6
-	mtctr	r12
+	mfmsr	r6
 	li	r0, MSR_RI
 	andc	r6, r6, r0
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	beqctr
+	beqa	0x500			/* external interrupt (PPC970) */
+	beq	cr1, 13f		/* machine check */
 	RFI
 
-11:
-BEGIN_FTR_SECTION
-	b	12b
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-	mtspr	SPRN_HSRR0, r8
+	/* On POWER7, we have external interrupts set to use HSRR0/1 */
+11:	mtspr	SPRN_HSRR0, r8
 	mtspr	SPRN_HSRR1, r7
 	ba	0x500
 
+13:	b	machine_check_fwnmi
+
 /*
  * Check whether an HDSI is an HPTE not found fault or something else.
  * If it is an HPTE not found fault that is due to the guest accessing
@@ -1176,7 +1184,7 @@ kvmppc_hdsi:
 	cmpdi	r3, 0			/* retry the instruction */
 	beq	6f
 	cmpdi	r3, -1			/* handle in kernel mode */
-	beq	nohpte_cont
+	beq	guest_exit_cont
 	cmpdi	r3, -2			/* MMIO emulation; need instr word */
 	beq	2f
 
@@ -1190,6 +1198,7 @@ kvmppc_hdsi:
 	li	r10, BOOK3S_INTERRUPT_DATA_STORAGE
 	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
 	rotldi	r11, r11, 63
+fast_interrupt_c_return:
 6:	ld	r7, VCPU_CTR(r9)
 	lwz	r8, VCPU_XER(r9)
 	mtctr	r7
@@ -1222,7 +1231,7 @@ kvmppc_hdsi:
 	/* Unset guest mode. */
 	li	r0, KVM_GUEST_MODE_NONE
 	stb	r0, HSTATE_IN_GUEST(r13)
-	b	nohpte_cont
+	b	guest_exit_cont
 
 /*
  * Similarly for an HISI, reflect it to the guest as an ISI unless
@@ -1248,9 +1257,9 @@ kvmppc_hisi:
 	ld	r11, VCPU_MSR(r9)
 	li	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	cmpdi	r3, 0			/* retry the instruction */
-	beq	6f
+	beq	fast_interrupt_c_return
 	cmpdi	r3, -1			/* handle in kernel mode */
-	beq	nohpte_cont
+	beq	guest_exit_cont
 
 	/* Synthesize an ISI for the guest */
 	mr	r11, r3
@@ -1259,12 +1268,7 @@ kvmppc_hisi:
 	li	r10, BOOK3S_INTERRUPT_INST_STORAGE
 	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
 	rotldi	r11, r11, 63
-6:	ld	r7, VCPU_CTR(r9)
-	lwz	r8, VCPU_XER(r9)
-	mtctr	r7
-	mtxer	r8
-	mr	r4, r9
-	b	fast_guest_return
+	b	fast_interrupt_c_return
 
 3:	ld	r6, VCPU_KVM(r9)	/* not relocated, use VRMA */
 	ld	r5, KVM_VRMA_SLB_V(r6)
@@ -1280,14 +1284,14 @@ kvmppc_hisi:
 hcall_try_real_mode:
 	ld	r3,VCPU_GPR(R3)(r9)
 	andi.	r0,r11,MSR_PR
-	bne	hcall_real_cont
+	bne	guest_exit_cont
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
-	bge	hcall_real_cont
+	bge	guest_exit_cont
 	LOAD_REG_ADDR(r4, hcall_real_table)
 	lwzx	r3,r3,r4
 	cmpwi	r3,0
-	beq	hcall_real_cont
+	beq	guest_exit_cont
 	add	r3,r3,r4
 	mtctr	r3
 	mr	r3,r9		/* get vcpu pointer */
@@ -1308,7 +1312,7 @@ hcall_real_fallback:
 	li	r12,BOOK3S_INTERRUPT_SYSCALL
 	ld	r9, HSTATE_KVM_VCPU(r13)
 
-	b	hcall_real_cont
+	b	guest_exit_cont
 
 	.globl	hcall_real_table
 hcall_real_table:
@@ -1567,6 +1571,21 @@ kvm_cede_exit:
 	li	r3,H_TOO_HARD
 	blr
 
+	/* Try to handle a machine check in real mode */
+machine_check_realmode:
+	mr	r3, r9		/* get vcpu pointer */
+	bl	.kvmppc_realmode_machine_check
+	nop
+	cmpdi	r3, 0		/* continue exiting from guest? */
+	ld	r9, HSTATE_KVM_VCPU(r13)
+	li	r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+	beq	mc_cont
+	/* If not, deliver a machine check.  SRR0/1 are already set */
+	li	r10, BOOK3S_INTERRUPT_MACHINE_CHECK
+	li	r11, (MSR_ME << 1) | 1	/* synthesize MSR_SF | MSR_ME */
+	rotldi	r11, r11, 63
+	b	fast_interrupt_c_return
+
 secondary_too_late:
 	ld	r5,HSTATE_KVM_VCORE(r13)
 	HMT_LOW
-- 
cgit v1.2.3


From 910040b82de872af453bf3ecc59de8f0abd22697 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:18 +0000
Subject: KVM: PPC: e500: Silence bogus GCC warning in tlb code

64-bit GCC 4.5.1 warns about an uninitialized variable which was guarded
by a flag. Initialize the variable to make it happy.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: reword comment]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500_tlb.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 6305ee692ef7..5532bfb15464 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -415,7 +415,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 	struct tlbe_ref *ref)
 {
 	struct kvm_memory_slot *slot;
-	unsigned long pfn, hva;
+	unsigned long pfn = 0; /* silence GCC warning */
+	unsigned long hva;
 	int pfnmap = 0;
 	int tsize = BOOK3E_PAGESZ_4K;
 
-- 
cgit v1.2.3


From b50df19cccdd169d5345b5169699446b80ee051a Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:19 +0000
Subject: KVM: PPC: booke: Fix get_tb() compile error on 64-bit

Include header file for get_tb() declaration.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3d1f35dc7862..7c9c3891a14a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,6 +36,7 @@
 #include <asm/dbell.h>
 #include <asm/hw_irq.h>
 #include <asm/irq.h>
+#include <asm/time.h>
 
 #include "timing.h"
 #include "booke.h"
-- 
cgit v1.2.3


From ff594746845877c0a6402be23897df659188eacb Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:20 +0000
Subject: KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler

GET_VCPU define will not be implemented for 64-bit for performance reasons
so get rid of it also on 32-bit.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/bookehv_interrupts.S | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 099fe8272b57..fa6d5529ebfb 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -32,9 +32,6 @@
 
 #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
 
-#define GET_VCPU(vcpu, thread)	\
-	PPC_LL	vcpu, THREAD_KVM_VCPU(thread)
-
 #define LONGBYTES		(BITS_PER_LONG / 8)
 
 #define VCPU_GUEST_SPRG(n)	(VCPU_GUEST_SPRGS + (n * LONGBYTES))
@@ -206,7 +203,7 @@
  */
 .macro kvm_handler intno srr0, srr1, flags
 _GLOBAL(kvmppc_handler_\intno\()_\srr1)
-	GET_VCPU(r11, r10)
+	PPC_LL	r11, THREAD_KVM_VCPU(r10)
 	PPC_STL r3, VCPU_GPR(R3)(r11)
 	mfspr	r3, SPRN_SPRG_RSCRATCH0
 	PPC_STL	r4, VCPU_GPR(R4)(r11)
@@ -233,7 +230,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
 .macro kvm_lvl_handler intno scratch srr0, srr1, flags
 _GLOBAL(kvmppc_handler_\intno\()_\srr1)
 	mfspr	r10, SPRN_SPRG_THREAD
-	GET_VCPU(r11, r10)
+	PPC_LL	r11, THREAD_KVM_VCPU(r10)
 	PPC_STL r3, VCPU_GPR(R3)(r11)
 	mfspr	r3, \scratch
 	PPC_STL	r4, VCPU_GPR(R4)(r11)
-- 
cgit v1.2.3


From e51f8f32d6b82f4a34dbb5781769c79b813e5694 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:21 +0000
Subject: KVM: PPC: bookehv64: Add support for interrupt handling

Add interrupt handling support for 64-bit bookehv hosts. Unify 32 and 64 bit
implementations using a common stack layout and a common execution flow starting
from kvm_handler_common macro. Update documentation for 64-bit input register
values. This patch only address the bolted TLB miss exception handlers version.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_booke_hv_asm.h |  25 +++++
 arch/powerpc/kvm/bookehv_interrupts.S       | 138 ++++++++++++++++++++++++++--
 2 files changed, 155 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index a37a12a9a7d7..3a79f5325712 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -17,6 +17,7 @@
  * there are no exceptions for which we fall through directly to
  * the normal host handler.
  *
+ * 32-bit host
  * Expected inputs (normal exceptions):
  *   SCRATCH0 = saved r10
  *   r10 = thread struct
@@ -33,6 +34,30 @@
  *   *(r8 + GPR9) = saved r9
  *   *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
  *   *(r8 + GPR11) = saved r11
+ *
+ * 64-bit host
+ * Expected inputs (GEN/GDBELL/DBG/MC exception types):
+ *  r10 = saved CR
+ *  r13 = PACA_POINTER
+ *  *(r13 + PACA_EX##type + EX_R10) = saved r10
+ *  *(r13 + PACA_EX##type + EX_R11) = saved r11
+ *  SPRN_SPRG_##type##_SCRATCH = saved r13
+ *
+  * Expected inputs (CRIT exception type):
+ *  r10 = saved CR
+ *  r13 = PACA_POINTER
+ *  *(r13 + PACA_EX##type + EX_R10) = saved r10
+ *  *(r13 + PACA_EX##type + EX_R11) = saved r11
+ *  *(r13 + PACA_EX##type + EX_R13) = saved r13
+ *
+ * Expected inputs (TLB exception type):
+ *  r10 = saved CR
+ *  r13 = PACA_POINTER
+ *  *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10
+ *  *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11
+ *  SPRN_SPRG_GEN_SCRATCH = saved r13
+ *
+ * Only the bolted version of TLB miss exception handlers is supported now.
  */
 .macro DO_KVM intno srr1
 #ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index fa6d5529ebfb..e8ed7d659c55 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -16,6 +16,7 @@
  *
  * Author: Varun Sethi <varun.sethi@freescale.com>
  * Author: Scott Wood <scotwood@freescale.com>
+ * Author: Mihai Caraman <mihai.caraman@freescale.com>
  *
  * This file is derived from arch/powerpc/kvm/booke_interrupts.S
  */
@@ -30,28 +31,33 @@
 #include <asm/bitsperlong.h>
 #include <asm/thread_info.h>
 
+#ifdef CONFIG_64BIT
+#include <asm/exception-64e.h>
+#else
 #include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
+#endif
 
 #define LONGBYTES		(BITS_PER_LONG / 8)
 
 #define VCPU_GUEST_SPRG(n)	(VCPU_GUEST_SPRGS + (n * LONGBYTES))
 
 /* The host stack layout: */
-#define HOST_R1         (0 * LONGBYTES) /* Implied by stwu. */
-#define HOST_CALLEE_LR  (1 * LONGBYTES)
-#define HOST_RUN        (2 * LONGBYTES) /* struct kvm_run */
+#define HOST_R1         0 /* Implied by stwu. */
+#define HOST_CALLEE_LR  PPC_LR_STKOFF
+#define HOST_RUN        (HOST_CALLEE_LR + LONGBYTES)
 /*
  * r2 is special: it holds 'current', and it made nonvolatile in the
  * kernel with the -ffixed-r2 gcc option.
  */
-#define HOST_R2         (3 * LONGBYTES)
-#define HOST_CR         (4 * LONGBYTES)
-#define HOST_NV_GPRS    (5 * LONGBYTES)
+#define HOST_R2         (HOST_RUN + LONGBYTES)
+#define HOST_CR         (HOST_R2 + LONGBYTES)
+#define HOST_NV_GPRS    (HOST_CR + LONGBYTES)
 #define __HOST_NV_GPR(n)  (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
 #define HOST_NV_GPR(n)  __HOST_NV_GPR(__REG_##n)
 #define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
 #define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
-#define HOST_STACK_LR   (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */
+/* LR in caller stack frame. */
+#define HOST_STACK_LR	(HOST_STACK_SIZE + PPC_LR_STKOFF)
 
 #define NEED_EMU		0x00000001 /* emulation -- save nv regs */
 #define NEED_DEAR		0x00000002 /* save faulting DEAR */
@@ -198,6 +204,122 @@
 	b	kvmppc_resume_host
 .endm
 
+#ifdef CONFIG_64BIT
+/* Exception types */
+#define EX_GEN			1
+#define EX_GDBELL		2
+#define EX_DBG			3
+#define EX_MC			4
+#define EX_CRIT			5
+#define EX_TLB			6
+
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
+ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
+	mr	r11, r4
+	/*
+	 * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
+	 */
+	PPC_LL	r4, PACACURRENT(r13)
+	PPC_LL	r4, (THREAD + THREAD_KVM_VCPU)(r4)
+	stw	r10, VCPU_CR(r4)
+	PPC_STL r11, VCPU_GPR(R4)(r4)
+	PPC_STL	r5, VCPU_GPR(R5)(r4)
+	.if \type == EX_CRIT
+	PPC_LL	r5, (\paca_ex + EX_R13)(r13)
+	.else
+	mfspr	r5, \scratch
+	.endif
+	PPC_STL	r6, VCPU_GPR(R6)(r4)
+	PPC_STL	r8, VCPU_GPR(R8)(r4)
+	PPC_STL	r9, VCPU_GPR(R9)(r4)
+	PPC_STL r5, VCPU_GPR(R13)(r4)
+	PPC_LL	r6, (\paca_ex + \ex_r10)(r13)
+	PPC_LL	r8, (\paca_ex + \ex_r11)(r13)
+	PPC_STL r3, VCPU_GPR(R3)(r4)
+	PPC_STL r7, VCPU_GPR(R7)(r4)
+	PPC_STL r12, VCPU_GPR(R12)(r4)
+	PPC_STL r6, VCPU_GPR(R10)(r4)
+	PPC_STL r8, VCPU_GPR(R11)(r4)
+	mfctr	r5
+	PPC_STL	r5, VCPU_CTR(r4)
+	mfspr	r5, \srr0
+	mfspr	r6, \srr1
+	kvm_handler_common \intno, \srr0, \flags
+.endm
+
+#define EX_PARAMS(type) \
+	EX_##type, \
+	SPRN_SPRG_##type##_SCRATCH, \
+	PACA_EX##type, \
+	EX_R10, \
+	EX_R11
+
+#define EX_PARAMS_TLB \
+	EX_TLB, \
+	SPRN_SPRG_GEN_SCRATCH, \
+	PACA_EXTLB, \
+	EX_TLB_R10, \
+	EX_TLB_R11
+
+kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
+	SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1,NEED_ESR
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
+	SPRN_CSRR0, SPRN_CSRR1, 0
+/*
+ * Only bolted TLB miss exception handlers are supported for now
+ */
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
+	SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
+	SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
+	SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
+	SPRN_DSRR0, SPRN_DSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
+	SPRN_CSRR0, SPRN_CSRR1, 0
+#else
 /*
  * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
  */
@@ -292,7 +414,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
 	SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
 kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
 	SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
-
+#endif
 
 /* Registers:
  *  SPRG_SCRATCH0: guest r10
-- 
cgit v1.2.3


From 7cdd7a95c66a6309ae6156471033fb5375cbcfca Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:22 +0000
Subject: KVM: PPC: e500: Add emulation helper for getting instruction ea

Add emulation helper for getting instruction ea and refactor tlb instruction
emulation to use it.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: keep rt variable around]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h | 11 +++++++++++
 arch/powerpc/kvm/e500.h            |  6 +++---
 arch/powerpc/kvm/e500_emulate.c    | 14 ++++++++++----
 arch/powerpc/kvm/e500_tlb.c        | 33 +++++++++++----------------------
 4 files changed, 35 insertions(+), 29 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 1ca31e92ee75..d55a2b28706e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -295,4 +295,15 @@ static inline void kvmppc_lazy_ee_enable(void)
 #endif
 }
 
+static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+	ulong ea;
+
+	ea = kvmppc_get_gpr(vcpu, rb);
+	if (ra)
+		ea += kvmppc_get_gpr(vcpu, ra);
+
+	return ea;
+}
+
 #endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index d1622864549e..32e98a72b0ac 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -129,9 +129,9 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
 				ulong value);
 int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
 int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
-int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb);
-int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb);
-int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb);
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
 int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
 void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
 
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e04b0ef55ce0..e78f353a836a 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -89,6 +89,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int ra = get_ra(inst);
 	int rb = get_rb(inst);
 	int rt = get_rt(inst);
+	gva_t ea;
 
 	switch (get_op(inst)) {
 	case 31:
@@ -113,15 +114,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			break;
 
 		case XOP_TLBSX:
-			emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+			ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+			emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
 			break;
 
-		case XOP_TLBILX:
-			emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb);
+		case XOP_TLBILX: {
+			int type = rt & 0x3;
+			ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+			emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
 			break;
+		}
 
 		case XOP_TLBIVAX:
-			emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+			ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+			emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
 			break;
 
 		default:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 5532bfb15464..7a1472163120 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -689,14 +689,11 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
 	return EMULATE_DONE;
 }
 
-int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	unsigned int ia;
 	int esel, tlbsel;
-	gva_t ea;
-
-	ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
 
 	ia = (ea >> 2) & 0x1;
 
@@ -723,7 +720,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
 }
 
 static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
-		       int pid, int rt)
+		       int pid, int type)
 {
 	struct kvm_book3e_206_tlb_entry *tlbe;
 	int tid, esel;
@@ -732,7 +729,7 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
 	for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
 		tlbe = get_entry(vcpu_e500, tlbsel, esel);
 		tid = get_tlb_tid(tlbe);
-		if (rt == 0 || tid == pid) {
+		if (type == 0 || tid == pid) {
 			inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
 			kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
 		}
@@ -740,14 +737,9 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
 }
 
 static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
-		       int ra, int rb)
+		       gva_t ea)
 {
 	int tlbsel, esel;
-	gva_t ea;
-
-	ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
-	if (ra)
-		ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
 
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
 		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
@@ -759,16 +751,16 @@ static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
 	}
 }
 
-int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb)
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int pid = get_cur_spid(vcpu);
 
-	if (rt == 0 || rt == 1) {
-		tlbilx_all(vcpu_e500, 0, pid, rt);
-		tlbilx_all(vcpu_e500, 1, pid, rt);
-	} else if (rt == 3) {
-		tlbilx_one(vcpu_e500, pid, ra, rb);
+	if (type == 0 || type == 1) {
+		tlbilx_all(vcpu_e500, 0, pid, type);
+		tlbilx_all(vcpu_e500, 1, pid, type);
+	} else if (type == 3) {
+		tlbilx_one(vcpu_e500, pid, ea);
 	}
 
 	return EMULATE_DONE;
@@ -793,16 +785,13 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
 	return EMULATE_DONE;
 }
 
-int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
 {
 	struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
 	int as = !!get_cur_sas(vcpu);
 	unsigned int pid = get_cur_spid(vcpu);
 	int esel, tlbsel;
 	struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
-	gva_t ea;
-
-	ea = kvmppc_get_gpr(vcpu, rb);
 
 	for (tlbsel = 0; tlbsel < 2; tlbsel++) {
 		esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
-- 
cgit v1.2.3


From 8823a8fd0d730612f12a87102503622c01eb2468 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:23 +0000
Subject: KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation

Mask high 32 bits of effective address in emulation layer for guests running
in 32-bit mode.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: fix indent]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_ppc.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index d55a2b28706e..572aa7530619 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -298,11 +298,21 @@ static inline void kvmppc_lazy_ee_enable(void)
 static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
 {
 	ulong ea;
+	ulong msr_64bit = 0;
 
 	ea = kvmppc_get_gpr(vcpu, rb);
 	if (ra)
 		ea += kvmppc_get_gpr(vcpu, ra);
 
+#if defined(CONFIG_PPC_BOOK3E_64)
+	msr_64bit = MSR_CM;
+#elif defined(CONFIG_PPC_BOOK3S_64)
+	msr_64bit = MSR_SF;
+#endif
+
+	if (!(vcpu->arch.shared->msr & msr_64bit))
+		ea = (uint32_t)ea;
+
 	return ea;
 }
 
-- 
cgit v1.2.3


From 9e2fa646936160eca525bcb80c2cce05faa9b208 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:24 +0000
Subject: KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation

Mask high 32 bits of MAS2's effective page number in tlbwe emulation for guests
running in 32-bit mode.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/e500_tlb.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 7a1472163120..cf3f18012371 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -871,6 +871,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
 
 	gtlbe->mas1 = vcpu->arch.shared->mas1;
 	gtlbe->mas2 = vcpu->arch.shared->mas2;
+	if (!(vcpu->arch.shared->msr & MSR_CM))
+		gtlbe->mas2 &= 0xffffffffUL;
 	gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
 
 	trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
-- 
cgit v1.2.3


From e9666ea1b3d11509b76f8ff5b9776d8d30709b19 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:25 +0000
Subject: KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit

Extend MAS2 EPN mask to retain most significant bits on 64-bit hosts.
Use this mask in tlb effective address accessor.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/mmu-book3e.h | 2 +-
 arch/powerpc/kvm/e500.h               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index eeabcdbc30f7..99d43e0c1e4a 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -59,7 +59,7 @@
 #define MAS1_TSIZE_SHIFT	7
 #define MAS1_TSIZE(x)		(((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
 
-#define MAS2_EPN		0xFFFFF000
+#define MAS2_EPN		(~0xFFFUL)
 #define MAS2_X0			0x00000040
 #define MAS2_X1			0x00000020
 #define MAS2_W			0x00000010
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 32e98a72b0ac..c70d37ed770a 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -154,7 +154,7 @@ get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
 
 static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
 {
-	return tlbe->mas2 & 0xfffff000;
+	return tlbe->mas2 & MAS2_EPN;
 }
 
 static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
-- 
cgit v1.2.3


From 62b4db0042aa753810e0d4f184481cc107c925ba Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sat, 1 Dec 2012 14:50:26 +0100
Subject: KVM: PPC: Make EPCR a valid field for booke64 and bookehv

In BookE, EPCR is defined and valid when either the HV or the 64bit
category are implemented. Reflect this in the field definition.

Today the only KVM target on 64bit is HV enabled, so there is no
change in actual source code, but this keeps the code closer to the
spec and doesn't build up artificial road blocks for a PR KVM
on 64bit.

Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_host.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 62fbd38b15fa..ca9bf459db6a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -406,13 +406,18 @@ struct kvm_vcpu_arch {
 	u32 host_mas4;
 	u32 host_mas6;
 	u32 shadow_epcr;
-	u32 epcr;
 	u32 shadow_msrp;
 	u32 eplc;
 	u32 epsc;
 	u32 oldpir;
 #endif
 
+#if defined(CONFIG_BOOKE)
+#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
+	u32 epcr;
+#endif
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S
 	/* For Gekko paired singles */
 	u32 qpr[32];
-- 
cgit v1.2.3


From 95e90b43c9c648bde607101e5a158941eec8e514 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:26 +0000
Subject: KVM: PPC: bookehv: Add guest computation mode for irq delivery

When delivering guest IRQs, update MSR computation mode according to guest
interrupt computation mode found in EPCR.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: remove HV dependency in the code]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7c9c3891a14a..9457fb1b41c9 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -312,6 +312,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 	bool crit;
 	bool keep_irq = false;
 	enum int_class int_class;
+	ulong new_msr = vcpu->arch.shared->msr;
 
 	/* Truncate crit indicators in 32 bit mode */
 	if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -407,7 +408,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 			set_guest_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
 			set_guest_dear(vcpu, vcpu->arch.queued_dear);
-		kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
+
+		new_msr &= msr_mask;
+#if defined(CONFIG_64BIT)
+		if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+			new_msr |= MSR_CM;
+#endif
+		kvmppc_set_msr(vcpu, new_msr);
 
 		if (!keep_irq)
 			clear_bit(priority, &vcpu->arch.pending_exceptions);
-- 
cgit v1.2.3


From 38f988240c611f9d2595feb1b8ddcb80b0e97dec Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:27 +0000
Subject: KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation

Add EPCR support in booke mtspr/mfspr emulation. EPCR register is defined only
for 64-bit and HV categories, we will expose it at this point only to 64-bit
virtual processors running on 64-bit HV hosts.
Define a reusable setter function for vcpu's EPCR.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: move HV dependency in the code]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/kvm/booke.c         | 12 ++++++++++++
 arch/powerpc/kvm/booke.h         |  1 +
 arch/powerpc/kvm/booke_emulate.c | 14 +++++++++++++-
 3 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 9457fb1b41c9..037d045db3f1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1473,6 +1473,18 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
 }
 
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
+{
+#if defined(CONFIG_64BIT)
+	vcpu->arch.epcr = new_epcr;
+#ifdef CONFIG_KVM_BOOKE_HV
+	vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
+	if (vcpu->arch.epcr  & SPRN_EPCR_ICM)
+		vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
+#endif
+#endif
+}
+
 void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
 {
 	vcpu->arch.tcr = new_tcr;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index ba61974c1e20..e9b88e433f64 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -69,6 +69,7 @@ extern unsigned long kvmppc_booke_handlers;
 void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
 void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
 
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
 void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
 void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
 void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 514790f41aba..4685b8cf2249 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -240,7 +240,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_MCSR:
 		vcpu->arch.mcsr &= ~spr_val;
 		break;
-
+#if defined(CONFIG_64BIT)
+	case SPRN_EPCR:
+		kvmppc_set_epcr(vcpu, spr_val);
+#ifdef CONFIG_KVM_BOOKE_HV
+		mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+#endif
+		break;
+#endif
 	default:
 		emulated = EMULATE_FAIL;
 	}
@@ -335,6 +342,11 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
 	case SPRN_MCSR:
 		*spr_val = vcpu->arch.mcsr;
 		break;
+#if defined(CONFIG_64BIT)
+	case SPRN_EPCR:
+		*spr_val = vcpu->arch.epcr;
+		break;
+#endif
 
 	default:
 		emulated = EMULATE_FAIL;
-- 
cgit v1.2.3


From 352df1deb2e3c40e65ff94c8d7c62d9144446b1c Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Thu, 11 Oct 2012 06:13:29 +0000
Subject: KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface

Implement ONE_REG interface for EPCR register adding KVM_REG_PPC_EPCR to
the list of ONE_REG PPC supported registers.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: remove HV dependency, use get/put_user]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/uapi/asm/kvm.h |  2 ++
 arch/powerpc/kvm/booke.c            | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 514883dd311e..2fba8a66fb10 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -411,4 +411,6 @@ struct kvm_get_htab_header {
 #define KVM_REG_PPC_VPA_SLB	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
 #define KVM_REG_PPC_VPA_DTL	(KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
 
+#define KVM_REG_PPC_EPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
+
 #endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 037d045db3f1..69f114015780 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1388,6 +1388,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 				 &vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
 		break;
 	}
+#if defined(CONFIG_64BIT)
+	case KVM_REG_PPC_EPCR:
+		r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
+		break;
+#endif
 	default:
 		break;
 	}
@@ -1415,6 +1420,15 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 			     (u64 __user *)(long)reg->addr, sizeof(u64));
 		break;
 	}
+#if defined(CONFIG_64BIT)
+	case KVM_REG_PPC_EPCR: {
+		u32 new_epcr;
+		r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
+		if (r == 0)
+			kvmppc_set_epcr(vcpu, new_epcr);
+		break;
+	}
+#endif
 	default:
 		break;
 	}
-- 
cgit v1.2.3