From 6b7e2d0991489559a1df4500d77f7b76c4607ed0 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 12 Jan 2011 15:40:31 +0800 Subject: KVM: Add "exiting guest mode" state Currently we keep track of only two states: guest mode and host mode. This patch adds an "exiting guest mode" state that tells us that an IPI will happen soon, so unless we need to wait for the IPI, we can avoid it completely. Also 1: No need atomically to read/write ->mode in vcpu's thread 2: reorganize struct kvm_vcpu to make ->mode and ->requests in the same cache line explicitly Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b5021db21858..b99eacd988ab 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -98,19 +98,26 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn, int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu); #endif +enum { + OUTSIDE_GUEST_MODE, + IN_GUEST_MODE, + EXITING_GUEST_MODE +}; + struct kvm_vcpu { struct kvm *kvm; #ifdef CONFIG_PREEMPT_NOTIFIERS struct preempt_notifier preempt_notifier; #endif + int cpu; int vcpu_id; - struct mutex mutex; - int cpu; - atomic_t guest_mode; - struct kvm_run *run; + int srcu_idx; + int mode; unsigned long requests; unsigned long guest_debug; - int srcu_idx; + + struct mutex mutex; + struct kvm_run *run; int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; @@ -140,6 +147,11 @@ struct kvm_vcpu { struct kvm_vcpu_arch arch; }; +static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) +{ + return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); +} + /* * Some of the bitops functions do not support too long bitmaps. * This number must be determined not to exceed such limits. -- cgit v1.2.3 From 3cba41307a2b1344ab8c1b9f55202d1e9d7bf81b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 12 Jan 2011 15:41:22 +0800 Subject: KVM: make make_all_cpus_request() lockless Now, we have 'vcpu->mode' to judge whether need to send ipi to other cpus, this way is very exact, so checking request bit is needless, then we can drop the spinlock let it's collateral Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b99eacd988ab..c8dee22b1945 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -224,7 +224,6 @@ struct kvm_memslots { struct kvm { spinlock_t mmu_lock; - raw_spinlock_t requests_lock; struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots *memslots; @@ -731,11 +730,6 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) set_bit(req, &vcpu->requests); } -static inline bool kvm_make_check_request(int req, struct kvm_vcpu *vcpu) -{ - return test_and_set_bit(req, &vcpu->requests); -} - static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) { if (test_bit(req, &vcpu->requests)) { -- cgit v1.2.3 From 0014bd990e69063b0fb78940b35439d7980ce3ee Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 30 Jan 2011 11:15:47 +0800 Subject: mm: export __get_user_pages In most cases, get_user_pages and get_user_pages_fast should be used to pin user pages in memory. But sometimes, some special flags except FOLL_GET, FOLL_WRITE and FOLL_FORCE are needed, for example in following patch, KVM needs FOLL_HWPOISON. To support these users, __get_user_pages is exported directly. There are some symbol name conflicts in infiniband driver, fixed them too. Signed-off-by: Huang Ying CC: Andrew Morton CC: Michel Lespinasse CC: Roland Dreier CC: Ralph Campbell Signed-off-by: Marcelo Tosatti --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 679300c050f5..46150c66318e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -965,6 +965,10 @@ static inline int handle_mm_fault(struct mm_struct *mm, extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); +int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, + unsigned long start, int len, unsigned int foll_flags, + struct page **pages, struct vm_area_struct **vmas, + int *nonblocking); int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -- cgit v1.2.3 From 69ebb83e13e514222b0ae4f8bd813a17679ed876 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 30 Jan 2011 11:15:48 +0800 Subject: mm: make __get_user_pages return -EHWPOISON for HWPOISON page optionally Make __get_user_pages return -EHWPOISON for HWPOISON page only if FOLL_HWPOISON is specified. With this patch, the interested callers can distinguish HWPOISON pages from general FAULT pages, while other callers will still get -EFAULT for all these pages, so the user space interface need not to be changed. This feature is needed by KVM, where UCR MCE should be relayed to guest for HWPOISON page, while instruction emulation and MMIO will be tried for general FAULT page. The idea comes from Andrew Morton. Signed-off-by: Huang Ying Cc: Andrew Morton Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/asm-generic/errno.h | 2 ++ include/linux/mm.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/asm-generic/errno.h b/include/asm-generic/errno.h index 28cc03bf19e6..a1331ce50445 100644 --- a/include/asm-generic/errno.h +++ b/include/asm-generic/errno.h @@ -108,4 +108,6 @@ #define ERFKILL 132 /* Operation not possible due to RF-kill */ +#define EHWPOISON 133 /* Memory page has hardware error */ + #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index 46150c66318e..a77c82c56e05 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1532,6 +1532,7 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_FORCE 0x10 /* get_user_pages read/write w/o permission */ #define FOLL_MLOCK 0x40 /* mark page as mlocked */ #define FOLL_SPLIT 0x80 /* don't return transhuge pages, split them */ +#define FOLL_HWPOISON 0x100 /* check page is hwpoisoned */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); -- cgit v1.2.3 From f58c9df78c0360f0eb3852b9cc3a61e689bc2dd1 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Sun, 30 Jan 2011 11:15:49 +0800 Subject: mm: remove is_hwpoison_address Unused. Signed-off-by: Huang Ying Signed-off-by: Marcelo Tosatti --- include/linux/mm.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a77c82c56e05..78219887308e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1625,14 +1625,6 @@ extern int sysctl_memory_failure_recovery; extern void shake_page(struct page *p, int access); extern atomic_long_t mce_bad_pages; extern int soft_offline_page(struct page *page, int flags); -#ifdef CONFIG_MEMORY_FAILURE -int is_hwpoison_address(unsigned long addr); -#else -static inline int is_hwpoison_address(unsigned long addr) -{ - return 0; -} -#endif extern void dump_page(struct page *page); -- cgit v1.2.3 From 34bb10b79de7df118de832f6832efb630e646577 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 1 Feb 2011 09:52:41 -0500 Subject: KVM: keep track of which task is running a KVM vcpu Keep track of which task is running a KVM vcpu. This helps us figure out later what task to wake up if we want to boost a vcpu that got preempted. Unfortunately there are no guarantees that the same task always keeps the same vcpu, so we can only track the task across a single "run" of the vcpu. Signed-off-by: Rik van Riel Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c8dee22b1945..4721b11b922a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -122,6 +122,7 @@ struct kvm_vcpu { int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; wait_queue_head_t wq; + struct pid *pid; int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; -- cgit v1.2.3 From 217ece6129f2d3b4fdd18d9e79be9e43d8d14a42 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 1 Feb 2011 09:53:28 -0500 Subject: KVM: use yield_to instead of sleep in kvm_vcpu_on_spin Instead of sleeping in kvm_vcpu_on_spin, which can cause gigantic slowdowns of certain workloads, we instead use yield_to to get another VCPU in the same KVM guest to run sooner. This seems to give a 10-15% speedup in certain workloads. Signed-off-by: Rik van Riel Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4721b11b922a..3751ea0d1f92 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -235,6 +235,7 @@ struct kvm { #endif struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; atomic_t online_vcpus; + int last_boosted_vcpu; struct list_head vm_list; struct mutex lock; struct kvm_io_bus *buses[KVM_NR_BUSES]; -- cgit v1.2.3 From f86368493ec038218e8663cc1b6e5393cd8e008a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 3 Feb 2011 15:07:07 +0200 Subject: KVM: Fix race between nmi injection and enabling nmi window The interrupt injection logic looks something like if an nmi is pending, and nmi injection allowed inject nmi if an nmi is pending request exit on nmi window the problem is that "nmi is pending" can be set asynchronously by the PIT; if it happens to fire between the two if statements, we will request an nmi window even though nmi injection is allowed. On SVM, this has disasterous results, since it causes eflags.TF to be set in random guest code. The fix is simple; make nmi_pending synchronous using the standard vcpu->requests mechanism; this ensures the code above is completely synchronous wrt nmi_pending. Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3751ea0d1f92..ab428552af8e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -43,6 +43,7 @@ #define KVM_REQ_DEACTIVATE_FPU 10 #define KVM_REQ_EVENT 11 #define KVM_REQ_APF_HALT 12 +#define KVM_REQ_NMI 13 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 -- cgit v1.2.3