From d56f5136b01020155b6b0a29f69d924687529bee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Jun 2020 15:16:52 +0200 Subject: KVM: let kvm_destroy_vm_debugfs clean up vCPU debugfs directories After commit 63d0434 ("KVM: x86: move kvm_create_vcpu_debugfs after last failure point") we are creating the pre-vCPU debugfs files after the creation of the vCPU file descriptor. This makes it possible for userspace to reach kvm_vcpu_release before kvm_create_vcpu_debugfs has finished. The vcpu->debugfs_dentry then does not have any associated inode anymore, and this causes a NULL-pointer dereference in debugfs_create_file. The solution is simply to avoid removing the files; they are cleaned up when the VM file descriptor is closed (and that must be after KVM_CREATE_VCPU returns). We can stop storing the dentry in struct kvm_vcpu too, because it is not needed anywhere after kvm_create_vcpu_debugfs returns. Reported-by: syzbot+705f4401d5a93a59b87d@syzkaller.appspotmail.com Fixes: 63d04348371b ("KVM: x86: move kvm_create_vcpu_debugfs after last failure point") Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7fa1e38e1659..3577eb84eac0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2973,7 +2973,6 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; - debugfs_remove_recursive(vcpu->debugfs_dentry); kvm_put_kvm(vcpu->kvm); return 0; } @@ -3000,16 +2999,17 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS + struct dentry *debugfs_dentry; char dir_name[ITOA_MAX_LEN * 2]; if (!debugfs_initialized()) return; snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); - vcpu->debugfs_dentry = debugfs_create_dir(dir_name, - vcpu->kvm->debugfs_dentry); + debugfs_dentry = debugfs_create_dir(dir_name, + vcpu->kvm->debugfs_dentry); - kvm_arch_create_vcpu_debugfs(vcpu); + kvm_arch_create_vcpu_debugfs(vcpu, debugfs_dentry); #endif } -- cgit v1.2.3 From 7ec28e264f2e52089c14c6f8eba1ce7b6501e59b Mon Sep 17 00:00:00 2001 From: Denis Efremov Date: Wed, 3 Jun 2020 13:11:31 +0300 Subject: KVM: Use vmemdup_user() Replace opencoded alloc and copy with vmemdup_user(). Signed-off-by: Denis Efremov Message-Id: <20200603101131.2107303-1-efremov@linux.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3577eb84eac0..4db151f6101e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3746,21 +3746,18 @@ static long kvm_vm_ioctl(struct file *filp, if (routing.flags) goto out; if (routing.nr) { - r = -ENOMEM; - entries = vmalloc(array_size(sizeof(*entries), - routing.nr)); - if (!entries) - goto out; - r = -EFAULT; urouting = argp; - if (copy_from_user(entries, urouting->entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; + entries = vmemdup_user(urouting->entries, + array_size(sizeof(*entries), + routing.nr)); + if (IS_ERR(entries)) { + r = PTR_ERR(entries); + goto out; + } } r = kvm_set_irq_routing(kvm, entries, routing.nr, routing.flags); -out_free_irq_routing: - vfree(entries); + kvfree(entries); break; } #endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ -- cgit v1.2.3 From e649b3f0188f8fd34dd0dde8d43fd3312b902fb2 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Sat, 6 Jun 2020 13:26:27 +0900 Subject: KVM: x86: Fix APIC page invalidation race Commit b1394e745b94 ("KVM: x86: fix APIC page invalidation") tried to fix inappropriate APIC page invalidation by re-introducing arch specific kvm_arch_mmu_notifier_invalidate_range() and calling it from kvm_mmu_notifier_invalidate_range_start. However, the patch left a possible race where the VMCS APIC address cache is updated *before* it is unmapped: (Invalidator) kvm_mmu_notifier_invalidate_range_start() (Invalidator) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD) (KVM VCPU) vcpu_enter_guest() (KVM VCPU) kvm_vcpu_reload_apic_access_page() (Invalidator) actually unmap page Because of the above race, there can be a mismatch between the host physical address stored in the APIC_ACCESS_PAGE VMCS field and the host physical address stored in the EPT entry for the APIC GPA (0xfee0000). When this happens, the processor will not trap APIC accesses, and will instead show the raw contents of the APIC-access page. Because Windows OS periodically checks for unexpected modifications to the LAPIC register, this will show up as a BSOD crash with BugCheck CRITICAL_STRUCTURE_CORRUPTION (109) we are currently seeing in https://bugzilla.redhat.com/show_bug.cgi?id=1751017. The root cause of the issue is that kvm_arch_mmu_notifier_invalidate_range() cannot guarantee that no additional references are taken to the pages in the range before kvm_mmu_notifier_invalidate_range_end(). Fortunately, this case is supported by the MMU notifier API, as documented in include/linux/mmu_notifier.h: * If the subsystem * can't guarantee that no additional references are taken to * the pages in the range, it has to implement the * invalidate_range() notifier to remove any references taken * after invalidate_range_start(). The fix therefore is to reload the APIC-access page field in the VMCS from kvm_mmu_notifier_invalidate_range() instead of ..._range_start(). Cc: stable@vger.kernel.org Fixes: b1394e745b94 ("KVM: x86: fix APIC page invalidation") Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=197951 Signed-off-by: Eiichi Tsukata Message-Id: <20200606042627.61070-1-eiichi.tsukata@nutanix.com> Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'virt/kvm/kvm_main.c') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4db151f6101e..7b6013f2ba19 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -155,10 +155,9 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); static unsigned long long kvm_createvm_count; static unsigned long long kvm_active_vms; -__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, bool blockable) +__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) { - return 0; } bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) @@ -384,6 +383,18 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) return container_of(mn, struct kvm, mmu_notifier); } +static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int idx; + + idx = srcu_read_lock(&kvm->srcu); + kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); + srcu_read_unlock(&kvm->srcu, idx); +} + static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address, @@ -408,7 +419,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, { struct kvm *kvm = mmu_notifier_to_kvm(mn); int need_tlb_flush = 0, idx; - int ret; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); @@ -425,14 +435,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); - - ret = kvm_arch_mmu_notifier_invalidate_range(kvm, range->start, - range->end, - mmu_notifier_range_blockable(range)); - srcu_read_unlock(&kvm->srcu, idx); - return ret; + return 0; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -538,6 +543,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_range = kvm_mmu_notifier_invalidate_range, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, -- cgit v1.2.3