From 6b7e2d0991489559a1df4500d77f7b76c4607ed0 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 12 Jan 2011 15:40:31 +0800
Subject: KVM: Add "exiting guest mode" state

Currently we keep track of only two states: guest mode and host
mode.  This patch adds an "exiting guest mode" state that tells
us that an IPI will happen soon, so unless we need to wait for the
IPI, we can avoid it completely.

Also
1: No need atomically to read/write ->mode in vcpu's thread

2: reorganize struct kvm_vcpu to make ->mode and ->requests
   in the same cache line explicitly

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b5021db21858..b99eacd988ab 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -98,19 +98,26 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+enum {
+	OUTSIDE_GUEST_MODE,
+	IN_GUEST_MODE,
+	EXITING_GUEST_MODE
+};
+
 struct kvm_vcpu {
 	struct kvm *kvm;
 #ifdef CONFIG_PREEMPT_NOTIFIERS
 	struct preempt_notifier preempt_notifier;
 #endif
+	int cpu;
 	int vcpu_id;
-	struct mutex mutex;
-	int   cpu;
-	atomic_t guest_mode;
-	struct kvm_run *run;
+	int srcu_idx;
+	int mode;
 	unsigned long requests;
 	unsigned long guest_debug;
-	int srcu_idx;
+
+	struct mutex mutex;
+	struct kvm_run *run;
 
 	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
@@ -140,6 +147,11 @@ struct kvm_vcpu {
 	struct kvm_vcpu_arch arch;
 };
 
+static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
+{
+	return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
+}
+
 /*
  * Some of the bitops functions do not support too long bitmaps.
  * This number must be determined not to exceed such limits.
-- 
cgit v1.2.3


From 3cba41307a2b1344ab8c1b9f55202d1e9d7bf81b Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 12 Jan 2011 15:41:22 +0800
Subject: KVM: make make_all_cpus_request() lockless

Now, we have 'vcpu->mode' to judge whether need to send ipi to other
cpus, this way is very exact, so checking request bit is needless,
then we can drop the spinlock let it's collateral

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b99eacd988ab..c8dee22b1945 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -224,7 +224,6 @@ struct kvm_memslots {
 
 struct kvm {
 	spinlock_t mmu_lock;
-	raw_spinlock_t requests_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
 	struct kvm_memslots *memslots;
@@ -731,11 +730,6 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 	set_bit(req, &vcpu->requests);
 }
 
-static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu)
-{
-	return test_and_set_bit(req, &vcpu->requests);
-}
-
 static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 {
 	if (test_bit(req, &vcpu->requests)) {
-- 
cgit v1.2.3


From 0014bd990e69063b0fb78940b35439d7980ce3ee Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sun, 30 Jan 2011 11:15:47 +0800
Subject: mm: export __get_user_pages

In most cases, get_user_pages and get_user_pages_fast should be used
to pin user pages in memory.  But sometimes, some special flags except
FOLL_GET, FOLL_WRITE and FOLL_FORCE are needed, for example in
following patch, KVM needs FOLL_HWPOISON.  To support these users,
__get_user_pages is exported directly.

There are some symbol name conflicts in infiniband driver, fixed them too.

Signed-off-by: Huang Ying <ying.huang@intel.com>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Michel Lespinasse <walken@google.com>
CC: Roland Dreier <roland@kernel.org>
CC: Ralph Campbell <infinipath@qlogic.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/mm.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 679300c050f5..46150c66318e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -965,6 +965,10 @@ static inline int handle_mm_fault(struct mm_struct *mm,
 extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 
+int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		     unsigned long start, int len, unsigned int foll_flags,
+		     struct page **pages, struct vm_area_struct **vmas,
+		     int *nonblocking);
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			unsigned long start, int nr_pages, int write, int force,
 			struct page **pages, struct vm_area_struct **vmas);
-- 
cgit v1.2.3


From 69ebb83e13e514222b0ae4f8bd813a17679ed876 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sun, 30 Jan 2011 11:15:48 +0800
Subject: mm: make __get_user_pages return -EHWPOISON for HWPOISON page
 optionally

Make __get_user_pages return -EHWPOISON for HWPOISON page only if
FOLL_HWPOISON is specified.  With this patch, the interested callers
can distinguish HWPOISON pages from general FAULT pages, while other
callers will still get -EFAULT for all these pages, so the user space
interface need not to be changed.

This feature is needed by KVM, where UCR MCE should be relayed to
guest for HWPOISON page, while instruction emulation and MMIO will be
tried for general FAULT page.

The idea comes from Andrew Morton.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/asm-generic/errno.h | 2 ++
 include/linux/mm.h          | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/errno.h b/include/asm-generic/errno.h
index 28cc03bf19e6..a1331ce50445 100644
--- a/include/asm-generic/errno.h
+++ b/include/asm-generic/errno.h
@@ -108,4 +108,6 @@
 
 #define ERFKILL		132	/* Operation not possible due to RF-kill */
 
+#define EHWPOISON	133	/* Memory page has hardware error */
+
 #endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 46150c66318e..a77c82c56e05 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1532,6 +1532,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address,
 #define FOLL_FORCE	0x10	/* get_user_pages read/write w/o permission */
 #define FOLL_MLOCK	0x40	/* mark page as mlocked */
 #define FOLL_SPLIT	0x80	/* don't return transhuge pages, split them */
+#define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
-- 
cgit v1.2.3


From f58c9df78c0360f0eb3852b9cc3a61e689bc2dd1 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Sun, 30 Jan 2011 11:15:49 +0800
Subject: mm: remove is_hwpoison_address

Unused.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 include/linux/mm.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a77c82c56e05..78219887308e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1625,14 +1625,6 @@ extern int sysctl_memory_failure_recovery;
 extern void shake_page(struct page *p, int access);
 extern atomic_long_t mce_bad_pages;
 extern int soft_offline_page(struct page *page, int flags);
-#ifdef CONFIG_MEMORY_FAILURE
-int is_hwpoison_address(unsigned long addr);
-#else
-static inline int is_hwpoison_address(unsigned long addr)
-{
-	return 0;
-}
-#endif
 
 extern void dump_page(struct page *page);
 
-- 
cgit v1.2.3


From 34bb10b79de7df118de832f6832efb630e646577 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 1 Feb 2011 09:52:41 -0500
Subject: KVM: keep track of which task is running a KVM vcpu

Keep track of which task is running a KVM vcpu.  This helps us
figure out later what task to wake up if we want to boost a
vcpu that got preempted.

Unfortunately there are no guarantees that the same task
always keeps the same vcpu, so we can only track the task
across a single "run" of the vcpu.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c8dee22b1945..4721b11b922a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -122,6 +122,7 @@ struct kvm_vcpu {
 	int fpu_active;
 	int guest_fpu_loaded, guest_xcr0_loaded;
 	wait_queue_head_t wq;
+	struct pid *pid;
 	int sigset_active;
 	sigset_t sigset;
 	struct kvm_vcpu_stat stat;
-- 
cgit v1.2.3


From 217ece6129f2d3b4fdd18d9e79be9e43d8d14a42 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Tue, 1 Feb 2011 09:53:28 -0500
Subject: KVM: use yield_to instead of sleep in kvm_vcpu_on_spin

Instead of sleeping in kvm_vcpu_on_spin, which can cause gigantic
slowdowns of certain workloads, we instead use yield_to to get
another VCPU in the same KVM guest to run sooner.

This seems to give a 10-15% speedup in certain workloads.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4721b11b922a..3751ea0d1f92 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -235,6 +235,7 @@ struct kvm {
 #endif
 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
 	atomic_t online_vcpus;
+	int last_boosted_vcpu;
 	struct list_head vm_list;
 	struct mutex lock;
 	struct kvm_io_bus *buses[KVM_NR_BUSES];
-- 
cgit v1.2.3


From f86368493ec038218e8663cc1b6e5393cd8e008a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 3 Feb 2011 15:07:07 +0200
Subject: KVM: Fix race between nmi injection and enabling nmi window

The interrupt injection logic looks something like

  if an nmi is pending, and nmi injection allowed
    inject nmi
  if an nmi is pending
    request exit on nmi window

the problem is that "nmi is pending" can be set asynchronously by
the PIT; if it happens to fire between the two if statements, we
will request an nmi window even though nmi injection is allowed.  On
SVM, this has disasterous results, since it causes eflags.TF to be
set in random guest code.

The fix is simple; make nmi_pending synchronous using the standard
vcpu->requests mechanism; this ensures the code above is completely
synchronous wrt nmi_pending.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3751ea0d1f92..ab428552af8e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,6 +43,7 @@
 #define KVM_REQ_DEACTIVATE_FPU    10
 #define KVM_REQ_EVENT             11
 #define KVM_REQ_APF_HALT          12
+#define KVM_REQ_NMI               13
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID	0
 
-- 
cgit v1.2.3