diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
-rw-r--r-- | virt/kvm/kvm_main.c | 298 |
1 files changed, 206 insertions, 92 deletions
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2884baf1d5f9..897bff3b7df9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -59,9 +59,18 @@ #include "irq.h" #endif +#define CREATE_TRACE_POINTS +#include <trace/events/kvm.h> + MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); +/* + * Ordering of locks: + * + * kvm->slots_lock --> kvm->lock --> kvm->irq_lock + */ + DEFINE_SPINLOCK(kvm_lock); LIST_HEAD(vm_list); @@ -79,6 +88,8 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl, static bool kvm_rebooting; +static bool largepages_enabled = true; + #ifdef KVM_CAP_DEVICE_ASSIGNMENT static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, int assigned_dev_id) @@ -120,17 +131,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) { struct kvm_assigned_dev_kernel *assigned_dev; struct kvm *kvm; - int irq, i; + int i; assigned_dev = container_of(work, struct kvm_assigned_dev_kernel, interrupt_work); kvm = assigned_dev->kvm; - /* This is taken to safely inject irq inside the guest. When - * the interrupt injection (or the ioapic code) uses a - * finer-grained lock, update this - */ - mutex_lock(&kvm->lock); + mutex_lock(&kvm->irq_lock); spin_lock_irq(&assigned_dev->assigned_dev_lock); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { struct kvm_guest_msix_entry *guest_entries = @@ -143,23 +150,13 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, guest_entries[i].vector, 1); - irq = assigned_dev->host_msix_entries[i].vector; - if (irq != 0) - enable_irq(irq); - assigned_dev->host_irq_disabled = false; } - } else { + } else kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, assigned_dev->guest_irq, 1); - if (assigned_dev->irq_requested_type & - KVM_DEV_IRQ_GUEST_MSI) { - enable_irq(assigned_dev->host_irq); - assigned_dev->host_irq_disabled = false; - } - } spin_unlock_irq(&assigned_dev->assigned_dev_lock); - mutex_unlock(&assigned_dev->kvm->lock); + mutex_unlock(&assigned_dev->kvm->irq_lock); } static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) @@ -179,8 +176,10 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) schedule_work(&assigned_dev->interrupt_work); - disable_irq_nosync(irq); - assigned_dev->host_irq_disabled = true; + if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { + disable_irq_nosync(irq); + assigned_dev->host_irq_disabled = true; + } out: spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags); @@ -215,7 +214,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) static void deassign_guest_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { - kvm_unregister_irq_ack_notifier(&assigned_dev->ack_notifier); + kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); assigned_dev->ack_notifier.gsi = -1; if (assigned_dev->irq_source_id != -1) @@ -417,6 +416,7 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; return 0; } #endif @@ -427,6 +427,7 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; + dev->host_irq_disabled = false; return 0; } #endif @@ -693,11 +694,6 @@ out: } #endif -static inline int valid_vcpu(int n) -{ - return likely(n >= 0 && n < KVM_MAX_VCPUS); -} - inline int kvm_is_mmio_pfn(pfn_t pfn) { if (pfn_valid(pfn)) { @@ -745,12 +741,9 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) if (alloc_cpumask_var(&cpus, GFP_ATOMIC)) cpumask_clear(cpus); - me = get_cpu(); spin_lock(&kvm->requests_lock); - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (!vcpu) - continue; + me = smp_processor_id(); + kvm_for_each_vcpu(i, vcpu, kvm) { if (test_and_set_bit(req, &vcpu->requests)) continue; cpu = vcpu->cpu; @@ -764,7 +757,6 @@ static bool make_all_cpus_request(struct kvm *kvm, unsigned int req) else called = false; spin_unlock(&kvm->requests_lock); - put_cpu(); free_cpumask_var(cpus); return called; } @@ -986,7 +978,9 @@ static struct kvm *kvm_create_vm(void) spin_lock_init(&kvm->mmu_lock); spin_lock_init(&kvm->requests_lock); kvm_io_bus_init(&kvm->pio_bus); + kvm_eventfd_init(kvm); mutex_init(&kvm->lock); + mutex_init(&kvm->irq_lock); kvm_io_bus_init(&kvm->mmio_bus); init_rwsem(&kvm->slots_lock); atomic_set(&kvm->users_count, 1); @@ -1006,19 +1000,25 @@ out: static void kvm_free_physmem_slot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { + int i; + if (!dont || free->rmap != dont->rmap) vfree(free->rmap); if (!dont || free->dirty_bitmap != dont->dirty_bitmap) vfree(free->dirty_bitmap); - if (!dont || free->lpage_info != dont->lpage_info) - vfree(free->lpage_info); + + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { + vfree(free->lpage_info[i]); + free->lpage_info[i] = NULL; + } + } free->npages = 0; free->dirty_bitmap = NULL; free->rmap = NULL; - free->lpage_info = NULL; } void kvm_free_physmem(struct kvm *kvm) @@ -1071,6 +1071,8 @@ static int kvm_vm_release(struct inode *inode, struct file *filp) { struct kvm *kvm = filp->private_data; + kvm_irqfd_release(kvm); + kvm_put_kvm(kvm); return 0; } @@ -1089,8 +1091,8 @@ int __kvm_set_memory_region(struct kvm *kvm, { int r; gfn_t base_gfn; - unsigned long npages, ugfn; - unsigned long largepages, i; + unsigned long npages; + unsigned long i; struct kvm_memory_slot *memslot; struct kvm_memory_slot old, new; @@ -1164,31 +1166,51 @@ int __kvm_set_memory_region(struct kvm *kvm, else new.userspace_addr = 0; } - if (npages && !new.lpage_info) { - largepages = 1 + (base_gfn + npages - 1) / KVM_PAGES_PER_HPAGE; - largepages -= base_gfn / KVM_PAGES_PER_HPAGE; + if (!npages) + goto skip_lpage; - new.lpage_info = vmalloc(largepages * sizeof(*new.lpage_info)); + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + unsigned long ugfn; + unsigned long j; + int lpages; + int level = i + 2; - if (!new.lpage_info) + /* Avoid unused variable warning if no large pages */ + (void)level; + + if (new.lpage_info[i]) + continue; + + lpages = 1 + (base_gfn + npages - 1) / + KVM_PAGES_PER_HPAGE(level); + lpages -= base_gfn / KVM_PAGES_PER_HPAGE(level); + + new.lpage_info[i] = vmalloc(lpages * sizeof(*new.lpage_info[i])); + + if (!new.lpage_info[i]) goto out_free; - memset(new.lpage_info, 0, largepages * sizeof(*new.lpage_info)); + memset(new.lpage_info[i], 0, + lpages * sizeof(*new.lpage_info[i])); - if (base_gfn % KVM_PAGES_PER_HPAGE) - new.lpage_info[0].write_count = 1; - if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE) - new.lpage_info[largepages-1].write_count = 1; + if (base_gfn % KVM_PAGES_PER_HPAGE(level)) + new.lpage_info[i][0].write_count = 1; + if ((base_gfn+npages) % KVM_PAGES_PER_HPAGE(level)) + new.lpage_info[i][lpages - 1].write_count = 1; ugfn = new.userspace_addr >> PAGE_SHIFT; /* * If the gfn and userspace address are not aligned wrt each - * other, disable large page support for this slot + * other, or if explicitly asked to, disable large page + * support for this slot */ - if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE - 1)) - for (i = 0; i < largepages; ++i) - new.lpage_info[i].write_count = 1; + if ((base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || + !largepages_enabled) + for (j = 0; j < lpages; ++j) + new.lpage_info[i][j].write_count = 1; } +skip_lpage: + /* Allocate page dirty bitmap if needed */ if ((new.flags & KVM_MEM_LOG_DIRTY_PAGES) && !new.dirty_bitmap) { unsigned dirty_bytes = ALIGN(npages, BITS_PER_LONG) / 8; @@ -1200,6 +1222,10 @@ int __kvm_set_memory_region(struct kvm *kvm, if (old.npages) kvm_arch_flush_shadow(kvm); } +#else /* not defined CONFIG_S390 */ + new.user_alloc = user_alloc; + if (user_alloc) + new.userspace_addr = mem->userspace_addr; #endif /* not defined CONFIG_S390 */ if (!npages) @@ -1299,6 +1325,12 @@ out: return r; } +void kvm_disable_largepages(void) +{ + largepages_enabled = false; +} +EXPORT_SYMBOL_GPL(kvm_disable_largepages); + int is_error_page(struct page *page) { return page == bad_page; @@ -1635,9 +1667,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); - if ((kvm_arch_interrupt_allowed(vcpu) && - kvm_cpu_has_interrupt(vcpu)) || - kvm_arch_vcpu_runnable(vcpu)) { + if (kvm_arch_vcpu_runnable(vcpu)) { set_bit(KVM_REQ_UNHALT, &vcpu->requests); break; } @@ -1714,24 +1744,18 @@ static struct file_operations kvm_vcpu_fops = { */ static int create_vcpu_fd(struct kvm_vcpu *vcpu) { - int fd = anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); - if (fd < 0) - kvm_put_kvm(vcpu->kvm); - return fd; + return anon_inode_getfd("kvm-vcpu", &kvm_vcpu_fops, vcpu, 0); } /* * Creates some virtual cpus. Good luck creating more than one. */ -static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) +static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) { int r; - struct kvm_vcpu *vcpu; - - if (!valid_vcpu(n)) - return -EINVAL; + struct kvm_vcpu *vcpu, *v; - vcpu = kvm_arch_vcpu_create(kvm, n); + vcpu = kvm_arch_vcpu_create(kvm, id); if (IS_ERR(vcpu)) return PTR_ERR(vcpu); @@ -1742,23 +1766,38 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int n) return r; mutex_lock(&kvm->lock); - if (kvm->vcpus[n]) { - r = -EEXIST; + if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { + r = -EINVAL; goto vcpu_destroy; } - kvm->vcpus[n] = vcpu; - mutex_unlock(&kvm->lock); + + kvm_for_each_vcpu(r, v, kvm) + if (v->vcpu_id == id) { + r = -EEXIST; + goto vcpu_destroy; + } + + BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); /* Now it's all set up, let userspace reach it */ kvm_get_kvm(kvm); r = create_vcpu_fd(vcpu); - if (r < 0) - goto unlink; + if (r < 0) { + kvm_put_kvm(kvm); + goto vcpu_destroy; + } + + kvm->vcpus[atomic_read(&kvm->online_vcpus)] = vcpu; + smp_wmb(); + atomic_inc(&kvm->online_vcpus); + +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + if (kvm->bsp_vcpu_id == id) + kvm->bsp_vcpu = vcpu; +#endif + mutex_unlock(&kvm->lock); return r; -unlink: - mutex_lock(&kvm->lock); - kvm->vcpus[n] = NULL; vcpu_destroy: mutex_unlock(&kvm->lock); kvm_arch_vcpu_destroy(vcpu); @@ -2199,6 +2238,7 @@ static long kvm_vm_ioctl(struct file *filp, vfree(entries); break; } +#endif /* KVM_CAP_IRQ_ROUTING */ #ifdef __KVM_HAVE_MSIX case KVM_ASSIGN_SET_MSIX_NR: { struct kvm_assigned_msix_nr entry_nr; @@ -2221,7 +2261,35 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif -#endif /* KVM_CAP_IRQ_ROUTING */ + case KVM_IRQFD: { + struct kvm_irqfd data; + + r = -EFAULT; + if (copy_from_user(&data, argp, sizeof data)) + goto out; + r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); + break; + } + case KVM_IOEVENTFD: { + struct kvm_ioeventfd data; + + r = -EFAULT; + if (copy_from_user(&data, argp, sizeof data)) + goto out; + r = kvm_ioeventfd(kvm, &data); + break; + } +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_SET_BOOT_CPU_ID: + r = 0; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus) != 0) + r = -EBUSY; + else + kvm->bsp_vcpu_id = arg; + mutex_unlock(&kvm->lock); + break; +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } @@ -2288,6 +2356,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) case KVM_CAP_USER_MEMORY: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS: +#ifdef CONFIG_KVM_APIC_ARCHITECTURE + case KVM_CAP_SET_BOOT_CPU_ID: +#endif return 1; #ifdef CONFIG_HAVE_KVM_IRQCHIP case KVM_CAP_IRQ_ROUTING: @@ -2335,7 +2406,7 @@ static long kvm_dev_ioctl(struct file *filp, case KVM_TRACE_ENABLE: case KVM_TRACE_PAUSE: case KVM_TRACE_DISABLE: - r = kvm_trace_ioctl(ioctl, arg); + r = -EOPNOTSUPP; break; default: return kvm_arch_dev_ioctl(filp, ioctl, arg); @@ -2449,26 +2520,71 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) } } -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, - gpa_t addr, int len, int is_write) +/* kvm_io_bus_write - called under kvm->slots_lock */ +int kvm_io_bus_write(struct kvm_io_bus *bus, gpa_t addr, + int len, const void *val) { int i; + for (i = 0; i < bus->dev_count; i++) + if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) + return 0; + return -EOPNOTSUPP; +} - for (i = 0; i < bus->dev_count; i++) { - struct kvm_io_device *pos = bus->devs[i]; +/* kvm_io_bus_read - called under kvm->slots_lock */ +int kvm_io_bus_read(struct kvm_io_bus *bus, gpa_t addr, int len, void *val) +{ + int i; + for (i = 0; i < bus->dev_count; i++) + if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) + return 0; + return -EOPNOTSUPP; +} - if (pos->in_range(pos, addr, len, is_write)) - return pos; - } +int kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, + struct kvm_io_device *dev) +{ + int ret; - return NULL; + down_write(&kvm->slots_lock); + ret = __kvm_io_bus_register_dev(bus, dev); + up_write(&kvm->slots_lock); + + return ret; } -void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) +/* An unlocked version. Caller must have write lock on slots_lock. */ +int __kvm_io_bus_register_dev(struct kvm_io_bus *bus, + struct kvm_io_device *dev) { - BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); + if (bus->dev_count > NR_IOBUS_DEVS-1) + return -ENOSPC; bus->devs[bus->dev_count++] = dev; + + return 0; +} + +void kvm_io_bus_unregister_dev(struct kvm *kvm, + struct kvm_io_bus *bus, + struct kvm_io_device *dev) +{ + down_write(&kvm->slots_lock); + __kvm_io_bus_unregister_dev(bus, dev); + up_write(&kvm->slots_lock); +} + +/* An unlocked version. Caller must have write lock on slots_lock. */ +void __kvm_io_bus_unregister_dev(struct kvm_io_bus *bus, + struct kvm_io_device *dev) +{ + int i; + + for (i = 0; i < bus->dev_count; i++) + if (bus->devs[i] == dev) { + bus->devs[i] = bus->devs[--bus->dev_count]; + break; + } } static struct notifier_block kvm_cpu_notifier = { @@ -2501,11 +2617,9 @@ static int vcpu_stat_get(void *_offset, u64 *val) *val = 0; spin_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - for (i = 0; i < KVM_MAX_VCPUS; ++i) { - vcpu = kvm->vcpus[i]; - if (vcpu) - *val += *(u32 *)((void *)vcpu + offset); - } + kvm_for_each_vcpu(i, vcpu, kvm) + *val += *(u32 *)((void *)vcpu + offset); + spin_unlock(&kvm_lock); return 0; } @@ -2679,15 +2793,15 @@ out_free_0: __free_page(bad_page); out: kvm_arch_exit(); - kvm_exit_debug(); out_fail: + kvm_exit_debug(); return r; } EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { - kvm_trace_cleanup(); + tracepoint_synchronize_unregister(); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); sysdev_unregister(&kvm_sysdev); |