summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2018-10-23 02:36:47 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2018-12-14 12:34:19 +0100
commit2a31b9db153530df4aa02dac8c32837bf5f47019 (patch)
tree0cd6fe156ec696e6a55a0d7117794f590ec76958 /arch
parent8fe65a8299f9e1f40cb95308ab7b3c4ad80bf801 (diff)
downloadlinux-2a31b9db153530df4aa02dac8c32837bf5f47019.tar.gz
linux-2a31b9db153530df4aa02dac8c32837bf5f47019.tar.bz2
linux-2a31b9db153530df4aa02dac8c32837bf5f47019.zip
kvm: introduce manual dirty log reprotect
There are two problems with KVM_GET_DIRTY_LOG. First, and less important, it can take kvm->mmu_lock for an extended period of time. Second, its user can actually see many false positives in some cases. The latter is due to a benign race like this: 1. KVM_GET_DIRTY_LOG returns a set of dirty pages and write protects them. 2. The guest modifies the pages, causing them to be marked ditry. 3. Userspace actually copies the pages. 4. KVM_GET_DIRTY_LOG returns those pages as dirty again, even though they were not written to since (3). This is especially a problem for large guests, where the time between (1) and (3) can be substantial. This patch introduces a new capability which, when enabled, makes KVM_GET_DIRTY_LOG not write-protect the pages it returns. Instead, userspace has to explicitly clear the dirty log bits just before using the content of the page. The new KVM_CLEAR_DIRTY_LOG ioctl can also operate on a 64-page granularity rather than requiring to sync a full memslot; this way, the mmu_lock is taken for small amounts of time, and only a small amount of time will pass between write protection of pages and the sending of their content. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/mips/kvm/mips.c23
-rw-r--r--arch/x86/kvm/x86.c27
2 files changed, 50 insertions, 0 deletions
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 3898e657952e..3734cd58895e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1023,6 +1023,29 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return r;
}
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ bool flush = false;
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ r = kvm_clear_dirty_log_protect(kvm, log, &flush);
+
+ if (flush) {
+ slots = kvm_memslots(kvm);
+ memslot = id_to_memslot(slots, log->slot);
+
+ /* Let implementation handle TLB/GVA invalidation */
+ kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot);
+ }
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg)
{
long r;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 448f011aa317..6af846c54660 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4418,6 +4418,33 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
return r;
}
+int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct kvm_clear_dirty_log *log)
+{
+ bool flush = false;
+ int r;
+
+ mutex_lock(&kvm->slots_lock);
+
+ /*
+ * Flush potentially hardware-cached dirty pages to dirty_bitmap.
+ */
+ if (kvm_x86_ops->flush_log_dirty)
+ kvm_x86_ops->flush_log_dirty(kvm);
+
+ r = kvm_clear_dirty_log_protect(kvm, log, &flush);
+
+ /*
+ * All the TLBs can be flushed out of mmu lock, see the comments in
+ * kvm_mmu_slot_remove_write_access().
+ */
+ lockdep_assert_held(&kvm->slots_lock);
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+
+ mutex_unlock(&kvm->slots_lock);
+ return r;
+}
+
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
bool line_status)
{