/* * KVM Microsoft Hyper-V emulation * * derived from arch/x86/kvm/x86.c * * Copyright (C) 2006 Qumranet, Inc. * Copyright (C) 2008 Qumranet, Inc. * Copyright IBM Corporation, 2008 * Copyright 2010 Red Hat, Inc. and/or its affiliates. * Copyright (C) 2015 Andrey Smetanin * * Authors: * Avi Kivity * Yaniv Kamay * Amit Shah * Ben-Ami Yassour * Andrey Smetanin * * This work is licensed under the terms of the GNU GPL, version 2. See * the COPYING file in the top-level directory. * */ #include "x86.h" #include "lapic.h" #include "ioapic.h" #include "hyperv.h" #include #include #include #include "trace.h" static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) { return atomic64_read(&synic->sint[sint]); } static inline int synic_get_sint_vector(u64 sint_value) { if (sint_value & HV_SYNIC_SINT_MASKED) return -1; return sint_value & HV_SYNIC_SINT_VECTOR_MASK; } static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, int vector) { int i; for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) return true; } return false; } static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, int vector) { int i; u64 sint_value; for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { sint_value = synic_read_sint(synic, i); if (synic_get_sint_vector(sint_value) == vector && sint_value & HV_SYNIC_SINT_AUTO_EOI) return true; } return false; } static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data) { int vector; vector = data & HV_SYNIC_SINT_VECTOR_MASK; if (vector < 16) return 1; /* * Guest may configure multiple SINTs to use the same vector, so * we maintain a bitmap of vectors handled by synic, and a * bitmap of vectors with auto-eoi behavior. The bitmaps are * updated here, and atomically queried on fast paths. */ atomic64_set(&synic->sint[sint], data); if (synic_has_vector_connected(synic, vector)) __set_bit(vector, synic->vec_bitmap); else __clear_bit(vector, synic->vec_bitmap); if (synic_has_vector_auto_eoi(synic, vector)) __set_bit(vector, synic->auto_eoi_bitmap); else __clear_bit(vector, synic->auto_eoi_bitmap); /* Load SynIC vectors into EOI exit bitmap */ kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); return 0; } static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) { struct kvm_vcpu *vcpu; struct kvm_vcpu_hv_synic *synic; if (vcpu_id >= atomic_read(&kvm->online_vcpus)) return NULL; vcpu = kvm_get_vcpu(kvm, vcpu_id); if (!vcpu) return NULL; synic = vcpu_to_synic(vcpu); return (synic->active) ? synic : NULL; } static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) { struct kvm *kvm = vcpu->kvm; int gsi, idx; vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint); idx = srcu_read_lock(&kvm->irq_srcu); gsi = atomic_read(&vcpu_to_synic(vcpu)->sint_to_gsi[sint]); if (gsi != -1) kvm_notify_acked_gsi(kvm, gsi); srcu_read_unlock(&kvm->irq_srcu, idx); } static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) { struct kvm_vcpu *vcpu = synic_to_vcpu(synic); struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; hv_vcpu->exit.u.synic.msr = msr; hv_vcpu->exit.u.synic.control = synic->control; hv_vcpu->exit.u.synic.evt_page = synic->evt_page; hv_vcpu->exit.u.synic.msg_page = synic->msg_page; kvm_make_request(KVM_REQ_HV_EXIT, vcpu); } static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 data, bool host) { struct kvm_vcpu *vcpu = synic_to_vcpu(synic); int ret; if (!synic->active) return 1; vcpu_debug(vcpu, "Hyper-V SynIC set msr 0x%x 0x%llx host %d\n", msr, data, host); ret = 0; switch (msr) { case HV_X64_MSR_SCONTROL: synic->control = data; if (!host) synic_exit(synic, msr); break; case HV_X64_MSR_SVERSION: if (!host) { ret = 1; break; } synic->version = data; break; case HV_X64_MSR_SIEFP: if (data & HV_SYNIC_SIEFP_ENABLE) if (kvm_clear_guest(vcpu->kvm, data & PAGE_MASK, PAGE_SIZE)) { ret = 1; break; } synic->evt_page = data; if (!host) synic_exit(synic, msr); break; case HV_X64_MSR_SIMP: if (data & HV_SYNIC_SIMP_ENABLE) if (kvm_clear_guest(vcpu->kvm, data & PAGE_MASK, PAGE_SIZE)) { ret = 1; break; } synic->msg_page = data; if (!host) synic_exit(synic, msr); break; case HV_X64_MSR_EOM: { int i; for (i = 0; i < ARRAY_SIZE(synic->sint); i++) kvm_hv_notify_acked_sint(vcpu, i); break; } case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data); break; default: ret = 1; break; } return ret; } static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) { int ret; if (!synic->active) return 1; ret = 0; switch (msr) { case HV_X64_MSR_SCONTROL: *pdata = synic->control; break; case HV_X64_MSR_SVERSION: *pdata = synic->version; break; case HV_X64_MSR_SIEFP: *pdata = synic->evt_page; break; case HV_X64_MSR_SIMP: *pdata = synic->msg_page; break; case HV_X64_MSR_EOM: *pdata = 0; break; case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); break; default: ret = 1; break; } return ret; } int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) { struct kvm_vcpu *vcpu = synic_to_vcpu(synic); struct kvm_lapic_irq irq; int ret, vector; if (sint >= ARRAY_SIZE(synic->sint)) return -EINVAL; vector = synic_get_sint_vector(synic_read_sint(synic, sint)); if (vector < 0) return -ENOENT; memset(&irq, 0, sizeof(irq)); irq.dest_id = kvm_apic_id(vcpu->arch.apic); irq.dest_mode = APIC_DEST_PHYSICAL; irq.delivery_mode = APIC_DM_FIXED; irq.vector = vector; irq.level = 1; ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); vcpu_debug(vcpu, "Hyper-V SynIC set irq ret %d\n", ret); return ret; } int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) { struct kvm_vcpu_hv_synic *synic; synic = synic_get(kvm, vcpu_id); if (!synic) return -EINVAL; return synic_set_irq(synic, sint); } void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) { struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); int i; vcpu_debug(vcpu, "Hyper-V SynIC send eoi vec %d\n", vector); for (i = 0; i < ARRAY_SIZE(synic->sint); i++) if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) kvm_hv_notify_acked_sint(vcpu, i); } static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) { struct kvm_vcpu_hv_synic *synic; synic = synic_get(kvm, vcpu_id); if (!synic) return -EINVAL; if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) return -EINVAL; atomic_set(&synic->sint_to_gsi[sint], gsi); return 0; } void kvm_hv_irq_routing_update(struct kvm *kvm) { struct kvm_irq_routing_table *irq_rt; struct kvm_kernel_irq_routing_entry *e; u32 gsi; irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, lockdep_is_held(&kvm->irq_lock)); for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { hlist_for_each_entry(e, &irq_rt->map[gsi], link) { if (e->type == KVM_IRQ_ROUTING_HV_SINT) kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, e->hv_sint.sint, gsi); } } } static void synic_init(struct kvm_vcpu_hv_synic *synic) { int i; memset(synic, 0, sizeof(*synic)); synic->version = HV_SYNIC_VERSION_1; for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); atomic_set(&synic->sint_to_gsi[i], -1); } } static u64 get_time_ref_counter(struct kvm *kvm) { return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); } void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) { synic_init(vcpu_to_synic(vcpu)); } int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) { /* * Hyper-V SynIC auto EOI SINT's are * not compatible with APICV, so deactivate APICV */ kvm_vcpu_deactivate_apicv(vcpu); vcpu_to_synic(vcpu)->active = true; return 0; } static bool kvm_hv_msr_partition_wide(u32 msr) { bool r = false; switch (msr) { case HV_X64_MSR_GUEST_OS_ID: case HV_X64_MSR_HYPERCALL: case HV_X64_MSR_REFERENCE_TSC: case HV_X64_MSR_TIME_REF_COUNT: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_RESET: r = true; break; } return r; } static int kvm_hv_msr_get_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) return -EINVAL; *pdata = hv->hv_crash_param[index]; return 0; } static int kvm_hv_msr_get_crash_ctl(struct kvm_vcpu *vcpu, u64 *pdata) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; *pdata = hv->hv_crash_ctl; return 0; } static int kvm_hv_msr_set_crash_ctl(struct kvm_vcpu *vcpu, u64 data, bool host) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; if (host) hv->hv_crash_ctl = data & HV_X64_MSR_CRASH_CTL_NOTIFY; if (!host && (data & HV_X64_MSR_CRASH_CTL_NOTIFY)) { vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n", hv->hv_crash_param[0], hv->hv_crash_param[1], hv->hv_crash_param[2], hv->hv_crash_param[3], hv->hv_crash_param[4]); /* Send notification about crash to user space */ kvm_make_request(KVM_REQ_HV_CRASH, vcpu); } return 0; } static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, u32 index, u64 data) { struct kvm_hv *hv = &vcpu->kvm->arch.hyperv; if (WARN_ON_ONCE(index >= ARRAY_SIZE(hv->hv_crash_param))) return -EINVAL; hv->hv_crash_param[index] = data; return 0; } static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { struct kvm *kvm = vcpu->kvm; struct kvm_hv *hv = &kvm->arch.hyperv; switch (msr) { case HV_X64_MSR_GUEST_OS_ID: hv->hv_guest_os_id = data; /* setting guest os id to zero disables hypercall page */ if (!hv->hv_guest_os_id) hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE; break; case HV_X64_MSR_HYPERCALL: { u64 gfn; unsigned long addr; u8 instructions[4]; /* if guest os id is not set hypercall should remain disabled */ if (!hv->hv_guest_os_id) break; if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) { hv->hv_hypercall = data; break; } gfn = data >> HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT; addr = gfn_to_hva(kvm, gfn); if (kvm_is_error_hva(addr)) return 1; kvm_x86_ops->patch_hypercall(vcpu, instructions); ((unsigned char *)instructions)[3] = 0xc3; /* ret */ if (__copy_to_user((void __user *)addr, instructions, 4)) return 1; hv->hv_hypercall = data; mark_page_dirty(kvm, gfn); break; } case HV_X64_MSR_REFERENCE_TSC: { u64 gfn; HV_REFERENCE_TSC_PAGE tsc_ref; memset(&tsc_ref, 0, sizeof(tsc_ref)); hv->hv_tsc_page = data; if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) break; gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; if (kvm_write_guest( kvm, gfn << HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT, &tsc_ref, sizeof(tsc_ref))) return 1; mark_page_dirty(kvm, gfn); break; } case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_set_crash_data(vcpu, msr - HV_X64_MSR_CRASH_P0, data); case HV_X64_MSR_CRASH_CTL: return kvm_hv_msr_set_crash_ctl(vcpu, data, host); case HV_X64_MSR_RESET: if (data == 1) { vcpu_debug(vcpu, "hyper-v reset requested\n"); kvm_make_request(KVM_REQ_HV_RESET, vcpu); } break; default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } return 0; } /* Calculate cpu time spent by current task in 100ns units */ static u64 current_task_runtime_100ns(void) { cputime_t utime, stime; task_cputime_adjusted(current, &utime, &stime); return div_u64(cputime_to_nsecs(utime + stime), 100); } static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; switch (msr) { case HV_X64_MSR_APIC_ASSIST_PAGE: { u64 gfn; unsigned long addr; if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { hv->hv_vapic = data; if (kvm_lapic_enable_pv_eoi(vcpu, 0)) return 1; break; } gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; addr = kvm_vcpu_gfn_to_hva(vcpu, gfn); if (kvm_is_error_hva(addr)) return 1; if (__clear_user((void __user *)addr, PAGE_SIZE)) return 1; hv->hv_vapic = data; kvm_vcpu_mark_page_dirty(vcpu, gfn); if (kvm_lapic_enable_pv_eoi(vcpu, gfn_to_gpa(gfn) | KVM_MSR_ENABLED)) return 1; break; } case HV_X64_MSR_EOI: return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data); case HV_X64_MSR_ICR: return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data); case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data); case HV_X64_MSR_VP_RUNTIME: if (!host) return 1; hv->runtime_offset = data - current_task_runtime_100ns(); break; case HV_X64_MSR_SCONTROL: case HV_X64_MSR_SVERSION: case HV_X64_MSR_SIEFP: case HV_X64_MSR_SIMP: case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } return 0; } static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 data = 0; struct kvm *kvm = vcpu->kvm; struct kvm_hv *hv = &kvm->arch.hyperv; switch (msr) { case HV_X64_MSR_GUEST_OS_ID: data = hv->hv_guest_os_id; break; case HV_X64_MSR_HYPERCALL: data = hv->hv_hypercall; break; case HV_X64_MSR_TIME_REF_COUNT: data = get_time_ref_counter(kvm); break; case HV_X64_MSR_REFERENCE_TSC: data = hv->hv_tsc_page; break; case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_get_crash_data(vcpu, msr - HV_X64_MSR_CRASH_P0, pdata); case HV_X64_MSR_CRASH_CTL: return kvm_hv_msr_get_crash_ctl(vcpu, pdata); case HV_X64_MSR_RESET: data = 0; break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; } *pdata = data; return 0; } static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { u64 data = 0; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; switch (msr) { case HV_X64_MSR_VP_INDEX: { int r; struct kvm_vcpu *v; kvm_for_each_vcpu(r, v, vcpu->kvm) { if (v == vcpu) { data = r; break; } } break; } case HV_X64_MSR_EOI: return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); case HV_X64_MSR_ICR: return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata); case HV_X64_MSR_TPR: return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata); case HV_X64_MSR_APIC_ASSIST_PAGE: data = hv->hv_vapic; break; case HV_X64_MSR_VP_RUNTIME: data = current_task_runtime_100ns() + hv->runtime_offset; break; case HV_X64_MSR_SCONTROL: case HV_X64_MSR_SVERSION: case HV_X64_MSR_SIEFP: case HV_X64_MSR_SIMP: case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; } *pdata = data; return 0; } int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) { if (kvm_hv_msr_partition_wide(msr)) { int r; mutex_lock(&vcpu->kvm->lock); r = kvm_hv_set_msr_pw(vcpu, msr, data, host); mutex_unlock(&vcpu->kvm->lock); return r; } else return kvm_hv_set_msr(vcpu, msr, data, host); } int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) { if (kvm_hv_msr_partition_wide(msr)) { int r; mutex_lock(&vcpu->kvm->lock); r = kvm_hv_get_msr_pw(vcpu, msr, pdata); mutex_unlock(&vcpu->kvm->lock); return r; } else return kvm_hv_get_msr(vcpu, msr, pdata); } bool kvm_hv_hypercall_enabled(struct kvm *kvm) { return kvm->arch.hyperv.hv_hypercall & HV_X64_MSR_HYPERCALL_ENABLE; } int kvm_hv_hypercall(struct kvm_vcpu *vcpu) { u64 param, ingpa, outgpa, ret; uint16_t code, rep_idx, rep_cnt, res = HV_STATUS_SUCCESS, rep_done = 0; bool fast, longmode; /* * hypercall generates UD from non zero cpl and real mode * per HYPER-V spec */ if (kvm_x86_ops->get_cpl(vcpu) != 0 || !is_protmode(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); return 0; } longmode = is_64_bit_mode(vcpu); if (!longmode) { param = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDX) << 32) | (kvm_register_read(vcpu, VCPU_REGS_RAX) & 0xffffffff); ingpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RBX) << 32) | (kvm_register_read(vcpu, VCPU_REGS_RCX) & 0xffffffff); outgpa = ((u64)kvm_register_read(vcpu, VCPU_REGS_RDI) << 32) | (kvm_register_read(vcpu, VCPU_REGS_RSI) & 0xffffffff); } #ifdef CONFIG_X86_64 else { param = kvm_register_read(vcpu, VCPU_REGS_RCX); ingpa = kvm_register_read(vcpu, VCPU_REGS_RDX); outgpa = kvm_register_read(vcpu, VCPU_REGS_R8); } #endif code = param & 0xffff; fast = (param >> 16) & 0x1; rep_cnt = (param >> 32) & 0xfff; rep_idx = (param >> 48) & 0xfff; trace_kvm_hv_hypercall(code, fast, rep_cnt, rep_idx, ingpa, outgpa); switch (code) { case HV_X64_HV_NOTIFY_LONG_SPIN_WAIT: kvm_vcpu_on_spin(vcpu); break; default: res = HV_STATUS_INVALID_HYPERCALL_CODE; break; } ret = res | (((u64)rep_done & 0xfff) << 32); if (longmode) { kvm_register_write(vcpu, VCPU_REGS_RAX, ret); } else { kvm_register_write(vcpu, VCPU_REGS_RDX, ret >> 32); kvm_register_write(vcpu, VCPU_REGS_RAX, ret & 0xffffffff); } return 1; }