summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/x86.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r--arch/x86/kvm/x86.c878
1 files changed, 498 insertions, 380 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1b404e4d7dd8..2a20ce60152e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -29,6 +29,7 @@
#include "pmu.h"
#include "hyperv.h"
#include "lapic.h"
+#include "xen.h"
#include <linux/clocksource.h>
#include <linux/interrupt.h>
@@ -114,11 +115,21 @@ static int sync_regs(struct kvm_vcpu *vcpu);
struct kvm_x86_ops kvm_x86_ops __read_mostly;
EXPORT_SYMBOL_GPL(kvm_x86_ops);
+#define KVM_X86_OP(func) \
+ DEFINE_STATIC_CALL_NULL(kvm_x86_##func, \
+ *(((struct kvm_x86_ops *)0)->func));
+#define KVM_X86_OP_NULL KVM_X86_OP
+#include <asm/kvm-x86-ops.h>
+EXPORT_STATIC_CALL_GPL(kvm_x86_get_cs_db_l_bits);
+EXPORT_STATIC_CALL_GPL(kvm_x86_cache_reg);
+EXPORT_STATIC_CALL_GPL(kvm_x86_tlb_flush_current);
+
static bool __read_mostly ignore_msrs = 0;
module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
-static bool __read_mostly report_ignored_msrs = true;
+bool __read_mostly report_ignored_msrs = true;
module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR);
+EXPORT_SYMBOL_GPL(report_ignored_msrs);
unsigned int min_timer_period_us = 200;
module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
@@ -136,6 +147,8 @@ u64 __read_mostly kvm_max_tsc_scaling_ratio;
EXPORT_SYMBOL_GPL(kvm_max_tsc_scaling_ratio);
u64 __read_mostly kvm_default_tsc_scaling_ratio;
EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
+bool __read_mostly kvm_has_bus_lock_exit;
+EXPORT_SYMBOL_GPL(kvm_has_bus_lock_exit);
/* tsc tolerance in parts per million - default to 1/2 of the NTP threshold */
static u32 __read_mostly tsc_tolerance_ppm = 250;
@@ -234,7 +247,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
VM_STAT("mmu_pte_write", mmu_pte_write),
- VM_STAT("mmu_pte_updated", mmu_pte_updated),
VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
VM_STAT("mmu_flooded", mmu_flooded),
VM_STAT("mmu_recycled", mmu_recycled),
@@ -395,7 +407,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
{
enum lapic_mode old_mode = kvm_get_apic_mode(vcpu);
enum lapic_mode new_mode = kvm_apic_mode(msr_info->data);
- u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | 0x2ff |
+ u64 reserved_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu) | 0x2ff |
(guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) ? 0 : X2APIC_ENABLE);
if ((msr_info->data & reserved_bits) != 0 || new_mode == LAPIC_MODE_INVALID)
@@ -484,19 +496,24 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu)
*/
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
/*
- * DR6.RTM is set by all #DB exceptions that don't clear it.
+ * In order to reflect the #DB exception payload in guest
+ * dr6, three components need to be considered: active low
+ * bit, FIXED_1 bits and active high bits (e.g. DR6_BD,
+ * DR6_BS and DR6_BT)
+ * DR6_ACTIVE_LOW contains the FIXED_1 and active low bits.
+ * In the target guest dr6:
+ * FIXED_1 bits should always be set.
+ * Active low bits should be cleared if 1-setting in payload.
+ * Active high bits should be set if 1-setting in payload.
+ *
+ * Note, the payload is compatible with the pending debug
+ * exceptions/exit qualification under VMX, that active_low bits
+ * are active high in payload.
+ * So they need to be flipped for DR6.
*/
- vcpu->arch.dr6 |= DR6_RTM;
+ vcpu->arch.dr6 |= DR6_ACTIVE_LOW;
vcpu->arch.dr6 |= payload;
- /*
- * Bit 16 should be set in the payload whenever the #DB
- * exception should clear DR6.RTM. This makes the payload
- * compatible with the pending debug exceptions under VMX.
- * Though not currently documented in the SDM, this also
- * makes the payload compatible with the exit qualification
- * for #DB exceptions under VMX.
- */
- vcpu->arch.dr6 ^= payload & DR6_RTM;
+ vcpu->arch.dr6 ^= payload & DR6_ACTIVE_LOW;
/*
* The #DB payload is defined as compatible with the 'pending
@@ -692,7 +709,7 @@ EXPORT_SYMBOL_GPL(kvm_requeue_exception_e);
*/
bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
{
- if (kvm_x86_ops.get_cpl(vcpu) <= required_cpl)
+ if (static_call(kvm_x86_get_cpl)(vcpu) <= required_cpl)
return true;
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
return false;
@@ -742,8 +759,7 @@ static int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn,
static inline u64 pdptr_rsvd_bits(struct kvm_vcpu *vcpu)
{
- return rsvd_bits(cpuid_maxphyaddr(vcpu), 63) | rsvd_bits(5, 8) |
- rsvd_bits(1, 2);
+ return vcpu->arch.reserved_gpa_bits | rsvd_bits(5, 8) | rsvd_bits(1, 2);
}
/*
@@ -852,7 +868,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!is_pae(vcpu))
return 1;
- kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
if (cs_l)
return 1;
}
@@ -865,7 +881,7 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!(cr0 & X86_CR0_PG) && kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE))
return 1;
- kvm_x86_ops.set_cr0(vcpu, cr0);
+ static_call(kvm_x86_set_cr0)(vcpu, cr0);
kvm_post_set_cr0(vcpu, old_cr0, cr0);
@@ -970,12 +986,10 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
{
- if (kvm_x86_ops.get_cpl(vcpu) != 0 ||
- __kvm_set_xcr(vcpu, index, xcr)) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
- return 0;
+ if (static_call(kvm_x86_get_cpl)(vcpu) == 0)
+ return __kvm_set_xcr(vcpu, index, xcr);
+
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_set_xcr);
@@ -987,7 +1001,7 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
return false;
- return kvm_x86_ops.is_valid_cr4(vcpu, cr4);
+ return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
}
EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
@@ -1031,7 +1045,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
- kvm_x86_ops.set_cr4(vcpu, cr4);
+ static_call(kvm_x86_set_cr4)(vcpu, cr4);
kvm_post_set_cr4(vcpu, old_cr4, cr4);
@@ -1059,8 +1073,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
return 0;
}
- if (is_long_mode(vcpu) &&
- (cr3 & vcpu->arch.cr3_lm_rsvd_bits))
+ if (is_long_mode(vcpu) && kvm_vcpu_is_illegal_gpa(vcpu, cr3))
return 1;
else if (is_pae_paging(vcpu) &&
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
@@ -1114,7 +1127,7 @@ void kvm_update_dr7(struct kvm_vcpu *vcpu)
dr7 = vcpu->arch.guest_debug_dr7;
else
dr7 = vcpu->arch.dr7;
- kvm_x86_ops.set_dr7(vcpu, dr7);
+ static_call(kvm_x86_set_dr7)(vcpu, dr7);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED;
if (dr7 & DR7_BP_EN_MASK)
vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED;
@@ -1130,7 +1143,7 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
return fixed;
}
-static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
+int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
{
size_t size = ARRAY_SIZE(vcpu->arch.db);
@@ -1143,13 +1156,13 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
case 4:
case 6:
if (!kvm_dr6_valid(val))
- return -1; /* #GP */
+ return 1; /* #GP */
vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu);
break;
case 5:
default: /* 7 */
if (!kvm_dr7_valid(val))
- return -1; /* #GP */
+ return 1; /* #GP */
vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
kvm_update_dr7(vcpu);
break;
@@ -1157,18 +1170,9 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
return 0;
}
-
-int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
-{
- if (__kvm_set_dr(vcpu, dr, val)) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
- return 0;
-}
EXPORT_SYMBOL_GPL(kvm_set_dr);
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
{
size_t size = ARRAY_SIZE(vcpu->arch.db);
@@ -1185,7 +1189,6 @@ int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
*val = vcpu->arch.dr7;
break;
}
- return 0;
}
EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1426,7 +1429,7 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
rdmsrl_safe(msr->index, &msr->data);
break;
default:
- return kvm_x86_ops.get_msr_feature(msr);
+ return static_call(kvm_x86_get_msr_feature)(msr);
}
return 0;
}
@@ -1502,7 +1505,7 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
efer &= ~EFER_LMA;
efer |= vcpu->arch.efer & EFER_LMA;
- r = kvm_x86_ops.set_efer(vcpu, efer);
+ r = static_call(kvm_x86_set_efer)(vcpu, efer);
if (r) {
WARN_ON(r > 0);
return r;
@@ -1599,7 +1602,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
msr.index = index;
msr.host_initiated = host_initiated;
- return kvm_x86_ops.set_msr(vcpu, &msr);
+ return static_call(kvm_x86_set_msr)(vcpu, &msr);
}
static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
@@ -1632,7 +1635,7 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
msr.index = index;
msr.host_initiated = host_initiated;
- ret = kvm_x86_ops.get_msr(vcpu, &msr);
+ ret = static_call(kvm_x86_get_msr)(vcpu, &msr);
if (!ret)
*data = msr.data;
return ret;
@@ -1673,12 +1676,12 @@ static int complete_emulated_rdmsr(struct kvm_vcpu *vcpu)
kvm_rdx_write(vcpu, vcpu->run->msr.data >> 32);
}
- return kvm_x86_ops.complete_emulated_msr(vcpu, err);
+ return static_call(kvm_x86_complete_emulated_msr)(vcpu, err);
}
static int complete_emulated_wrmsr(struct kvm_vcpu *vcpu)
{
- return kvm_x86_ops.complete_emulated_msr(vcpu, vcpu->run->msr.error);
+ return static_call(kvm_x86_complete_emulated_msr)(vcpu, vcpu->run->msr.error);
}
static u64 kvm_msr_reason(int r)
@@ -1750,7 +1753,7 @@ int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu)
trace_kvm_msr_read_ex(ecx);
}
- return kvm_x86_ops.complete_emulated_msr(vcpu, r);
+ return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
}
EXPORT_SYMBOL_GPL(kvm_emulate_rdmsr);
@@ -1776,16 +1779,16 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu)
else
trace_kvm_msr_write_ex(ecx, data);
- return kvm_x86_ops.complete_emulated_msr(vcpu, r);
+ return static_call(kvm_x86_complete_emulated_msr)(vcpu, r);
}
EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
-bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
{
+ xfer_to_guest_mode_prepare();
return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) ||
xfer_to_guest_mode_work_pending();
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request);
/*
* The fast path for frequent and performance sensitive wrmsr emulation,
@@ -1935,15 +1938,14 @@ static s64 get_kvmclock_base_ns(void)
}
#endif
-static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
+void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs)
{
int version;
int r;
struct pvclock_wall_clock wc;
+ u32 wc_sec_hi;
u64 wall_nsec;
- kvm->arch.wall_clock = wall_clock;
-
if (!wall_clock)
return;
@@ -1972,6 +1974,12 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
+ if (sec_hi_ofs) {
+ wc_sec_hi = wall_nsec >> 32;
+ kvm_write_guest(kvm, wall_clock + sec_hi_ofs,
+ &wc_sec_hi, sizeof(wc_sec_hi));
+ }
+
version++;
kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
}
@@ -2208,7 +2216,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc);
static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
{
vcpu->arch.l1_tsc_offset = offset;
- vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset);
+ vcpu->arch.tsc_offset = static_call(kvm_x86_write_l1_tsc_offset)(vcpu, offset);
}
static inline bool kvm_check_tsc_unstable(void)
@@ -2591,13 +2599,15 @@ u64 get_kvmclock_ns(struct kvm *kvm)
return ret;
}
-static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
+static void kvm_setup_pvclock_page(struct kvm_vcpu *v,
+ struct gfn_to_hva_cache *cache,
+ unsigned int offset)
{
struct kvm_vcpu_arch *vcpu = &v->arch;
struct pvclock_vcpu_time_info guest_hv_clock;
- if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
- &guest_hv_clock, sizeof(guest_hv_clock))))
+ if (unlikely(kvm_read_guest_offset_cached(v->kvm, cache,
+ &guest_hv_clock, offset, sizeof(guest_hv_clock))))
return;
/* This VCPU is paused, but it's legal for a guest to read another
@@ -2620,9 +2630,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
++guest_hv_clock.version; /* first time write, random junk */
vcpu->hv_clock.version = guest_hv_clock.version + 1;
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
+ kvm_write_guest_offset_cached(v->kvm, cache,
+ &vcpu->hv_clock, offset,
+ sizeof(vcpu->hv_clock.version));
smp_wmb();
@@ -2636,16 +2646,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
+ kvm_write_guest_offset_cached(v->kvm, cache,
+ &vcpu->hv_clock, offset,
+ sizeof(vcpu->hv_clock));
smp_wmb();
vcpu->hv_clock.version++;
- kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
- &vcpu->hv_clock,
- sizeof(vcpu->hv_clock.version));
+ kvm_write_guest_offset_cached(v->kvm, cache,
+ &vcpu->hv_clock, offset,
+ sizeof(vcpu->hv_clock.version));
}
static int kvm_guest_time_update(struct kvm_vcpu *v)
@@ -2732,7 +2742,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.flags = pvclock_flags;
if (vcpu->pv_time_enabled)
- kvm_setup_pvclock_page(v);
+ kvm_setup_pvclock_page(v, &vcpu->pv_time, 0);
+ if (vcpu->xen.vcpu_info_set)
+ kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_info_cache,
+ offsetof(struct compat_vcpu_info, time));
+ if (vcpu->xen.vcpu_time_info_set)
+ kvm_setup_pvclock_page(v, &vcpu->xen.vcpu_time_info_cache, 0);
if (v == kvm_get_vcpu(v->kvm, 0))
kvm_hv_setup_tsc_page(v->kvm, &vcpu->hv_clock);
return 0;
@@ -2857,32 +2872,6 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
}
-static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
-{
- struct kvm *kvm = vcpu->kvm;
- int lm = is_long_mode(vcpu);
- u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
- : (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
- u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
- : kvm->arch.xen_hvm_config.blob_size_32;
- u32 page_num = data & ~PAGE_MASK;
- u64 page_addr = data & PAGE_MASK;
- u8 *page;
-
- if (page_num >= blob_size)
- return 1;
-
- page = memdup_user(blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE);
- if (IS_ERR(page))
- return PTR_ERR(page);
-
- if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) {
- kfree(page);
- return 1;
- }
- return 0;
-}
-
static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu)
{
u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
@@ -2954,13 +2943,13 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu)
static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
- kvm_x86_ops.tlb_flush_all(vcpu);
+ static_call(kvm_x86_tlb_flush_all)(vcpu);
}
static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
- kvm_x86_ops.tlb_flush_guest(vcpu);
+ static_call(kvm_x86_tlb_flush_guest)(vcpu);
}
static void record_steal_time(struct kvm_vcpu *vcpu)
@@ -2968,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
struct kvm_steal_time *st;
+ if (kvm_xen_msr_enabled(vcpu->kvm)) {
+ kvm_xen_runstate_set_running(vcpu);
+ return;
+ }
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -3016,6 +3010,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
u32 msr = msr_info->index;
u64 data = msr_info->data;
+ if (msr && msr == vcpu->kvm->arch.xen_hvm_config.msr)
+ return kvm_xen_write_hypercall_page(vcpu, data);
+
switch (msr) {
case MSR_AMD64_NB_CFG:
case MSR_IA32_UCODE_WRITE:
@@ -3072,18 +3069,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
}
break;
- case MSR_IA32_DEBUGCTLMSR:
- if (!data) {
- /* We support the non-activated case already */
- break;
- } else if (data & ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_BTF)) {
- /* Values other than LBR and BTF are vendor-specific,
- thus reserved and should throw a #GP */
- return 1;
- } else if (report_ignored_msrs)
- vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTLMSR 0x%llx, nop\n",
- __func__, data);
- break;
case 0x200 ... 0x2ff:
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
@@ -3152,13 +3137,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
return 1;
- kvm_write_wall_clock(vcpu->kvm, data);
+ vcpu->kvm->arch.wall_clock = data;
+ kvm_write_wall_clock(vcpu->kvm, data, 0);
break;
case MSR_KVM_WALL_CLOCK:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE))
return 1;
- kvm_write_wall_clock(vcpu->kvm, data);
+ vcpu->kvm->arch.wall_clock = data;
+ kvm_write_wall_clock(vcpu->kvm, data, 0);
break;
case MSR_KVM_SYSTEM_TIME_NEW:
if (!guest_pv_has(vcpu, KVM_FEATURE_CLOCKSOURCE2))
@@ -3303,8 +3290,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vcpu->arch.msr_misc_features_enables = data;
break;
default:
- if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
- return xen_hvm_config(vcpu, data);
if (kvm_pmu_is_valid_msr(vcpu, msr))
return kvm_pmu_set_msr(vcpu, msr_info);
return KVM_MSR_RET_INVALID;
@@ -3356,7 +3341,6 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
switch (msr_info->index) {
case MSR_IA32_PLATFORM_ID:
case MSR_IA32_EBL_CR_POWERON:
- case MSR_IA32_DEBUGCTLMSR:
case MSR_IA32_LASTBRANCHFROMIP:
case MSR_IA32_LASTBRANCHTOIP:
case MSR_IA32_LASTINTFROMIP:
@@ -3738,7 +3722,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
- case KVM_CAP_XEN_HVM:
case KVM_CAP_VCPU_EVENTS:
case KVM_CAP_HYPERV:
case KVM_CAP_HYPERV_VAPIC:
@@ -3778,6 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1;
break;
+#ifdef CONFIG_KVM_XEN
+ case KVM_CAP_XEN_HVM:
+ r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
+ KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
+ KVM_XEN_HVM_CONFIG_SHARED_INFO;
+ if (sched_info_on())
+ r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
+ break;
+#endif
case KVM_CAP_SYNC_REGS:
r = KVM_SYNC_X86_VALID_FIELDS;
break;
@@ -3799,10 +3791,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* fringe case that is not enabled except via specific settings
* of the module parameters.
*/
- r = kvm_x86_ops.has_emulated_msr(kvm, MSR_IA32_SMBASE);
+ r = static_call(kvm_x86_has_emulated_msr)(kvm, MSR_IA32_SMBASE);
break;
case KVM_CAP_VAPIC:
- r = !kvm_x86_ops.cpu_has_accelerated_tpr();
+ r = !static_call(kvm_x86_cpu_has_accelerated_tpr)();
break;
case KVM_CAP_NR_VCPUS:
r = KVM_SOFT_MAX_VCPUS;
@@ -3844,6 +3836,13 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_STEAL_TIME:
r = sched_info_on();
break;
+ case KVM_CAP_X86_BUS_LOCK_EXIT:
+ if (kvm_has_bus_lock_exit)
+ r = KVM_BUS_LOCK_DETECTION_OFF |
+ KVM_BUS_LOCK_DETECTION_EXIT;
+ else
+ r = 0;
+ break;
default:
break;
}
@@ -3961,14 +3960,14 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Address WBINVD may be executed by guest */
if (need_emulate_wbinvd(vcpu)) {
- if (kvm_x86_ops.has_wbinvd_exit())
+ if (static_call(kvm_x86_has_wbinvd_exit)())
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
else if (vcpu->cpu != -1 && vcpu->cpu != cpu)
smp_call_function_single(vcpu->cpu,
wbinvd_ipi, NULL, 1);
}
- kvm_x86_ops.vcpu_load(vcpu, cpu);
+ static_call(kvm_x86_vcpu_load)(vcpu, cpu);
/* Save host pkru register if supported */
vcpu->arch.host_pkru = read_pkru();
@@ -4014,6 +4013,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
{
struct kvm_host_map map;
struct kvm_steal_time *st;
+ int idx;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -4021,9 +4021,15 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
if (vcpu->arch.st.preempted)
return;
+ /*
+ * Take the srcu lock as memslots will be accessed to check the gfn
+ * cache generation against the memslots generation.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
if (kvm_map_gfn(vcpu, vcpu->arch.st.msr_val >> PAGE_SHIFT, &map,
&vcpu->arch.st.cache, true))
- return;
+ goto out;
st = map.hva +
offset_in_page(vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS);
@@ -4031,33 +4037,22 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu)
st->preempted = vcpu->arch.st.preempted = KVM_VCPU_PREEMPTED;
kvm_unmap_gfn(vcpu, &map, &vcpu->arch.st.cache, true, true);
+
+out:
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
}
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
- int idx;
-
if (vcpu->preempted && !vcpu->arch.guest_state_protected)
- vcpu->arch.preempted_in_kernel = !kvm_x86_ops.get_cpl(vcpu);
+ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
- /*
- * Disable page faults because we're in atomic context here.
- * kvm_write_guest_offset_cached() would call might_fault()
- * that relies on pagefault_disable() to tell if there's a
- * bug. NOTE: the write to guest memory may not go through if
- * during postcopy live migration or if there's heavy guest
- * paging.
- */
- pagefault_disable();
- /*
- * kvm_memslots() will be called by
- * kvm_write_guest_offset_cached() so take the srcu lock.
- */
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- kvm_steal_time_set_preempted(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- pagefault_enable();
- kvm_x86_ops.vcpu_put(vcpu);
+ if (kvm_xen_msr_enabled(vcpu->kvm))
+ kvm_xen_runstate_set_preempted(vcpu);
+ else
+ kvm_steal_time_set_preempted(vcpu);
+
+ static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
/*
* If userspace has set any breakpoints or watchpoints, dr6 is restored
@@ -4071,7 +4066,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
struct kvm_lapic_state *s)
{
if (vcpu->arch.apicv_active)
- kvm_x86_ops.sync_pir_to_irr(vcpu);
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);
return kvm_apic_get_state(vcpu, s);
}
@@ -4181,7 +4176,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
for (bank = 0; bank < bank_num; bank++)
vcpu->arch.mce_banks[bank*4] = ~(u64)0;
- kvm_x86_ops.setup_mce(vcpu);
+ static_call(kvm_x86_setup_mce)(vcpu);
out:
return r;
}
@@ -4288,11 +4283,11 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
events->interrupt.nr = vcpu->arch.interrupt.nr;
events->interrupt.soft = 0;
- events->interrupt.shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
+ events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending != 0;
- events->nmi.masked = kvm_x86_ops.get_nmi_mask(vcpu);
+ events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
events->nmi.pad = 0;
events->sipi_vector = 0; /* never valid when reporting to user space */
@@ -4359,13 +4354,13 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
vcpu->arch.interrupt.nr = events->interrupt.nr;
vcpu->arch.interrupt.soft = events->interrupt.soft;
if (events->flags & KVM_VCPUEVENT_VALID_SHADOW)
- kvm_x86_ops.set_interrupt_shadow(vcpu,
- events->interrupt.shadow);
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu,
+ events->interrupt.shadow);
vcpu->arch.nmi_injected = events->nmi.injected;
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
vcpu->arch.nmi_pending = events->nmi.pending;
- kvm_x86_ops.set_nmi_mask(vcpu, events->nmi.masked);
+ static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR &&
lapic_in_kernel(vcpu))
@@ -4421,9 +4416,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
if (dbgregs->flags)
return -EINVAL;
- if (dbgregs->dr6 & ~0xffffffffull)
+ if (!kvm_dr6_valid(dbgregs->dr6))
return -EINVAL;
- if (dbgregs->dr7 & ~0xffffffffull)
+ if (!kvm_dr7_valid(dbgregs->dr7))
return -EINVAL;
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
@@ -4660,7 +4655,7 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
if (!kvm_x86_ops.enable_direct_tlbflush)
return -ENOTTY;
- return kvm_x86_ops.enable_direct_tlbflush(vcpu);
+ return static_call(kvm_x86_enable_direct_tlbflush)(vcpu);
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
vcpu->arch.pv_cpuid.enforce = cap->args[0];
@@ -5031,6 +5026,28 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
break;
+#ifdef CONFIG_KVM_XEN
+ case KVM_XEN_VCPU_GET_ATTR: {
+ struct kvm_xen_vcpu_attr xva;
+
+ r = -EFAULT;
+ if (copy_from_user(&xva, argp, sizeof(xva)))
+ goto out;
+ r = kvm_xen_vcpu_get_attr(vcpu, &xva);
+ if (!r && copy_to_user(argp, &xva, sizeof(xva)))
+ r = -EFAULT;
+ break;
+ }
+ case KVM_XEN_VCPU_SET_ATTR: {
+ struct kvm_xen_vcpu_attr xva;
+
+ r = -EFAULT;
+ if (copy_from_user(&xva, argp, sizeof(xva)))
+ goto out;
+ r = kvm_xen_vcpu_set_attr(vcpu, &xva);
+ break;
+ }
+#endif
default:
r = -EINVAL;
}
@@ -5052,14 +5069,14 @@ static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
if (addr > (unsigned int)(-3 * PAGE_SIZE))
return -EINVAL;
- ret = kvm_x86_ops.set_tss_addr(kvm, addr);
+ ret = static_call(kvm_x86_set_tss_addr)(kvm, addr);
return ret;
}
static int kvm_vm_ioctl_set_identity_map_addr(struct kvm *kvm,
u64 ident_addr)
{
- return kvm_x86_ops.set_identity_map_addr(kvm, ident_addr);
+ return static_call(kvm_x86_set_identity_map_addr)(kvm, ident_addr);
}
static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
@@ -5213,11 +5230,18 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
+
/*
- * Flush potentially hardware-cached dirty pages to dirty_bitmap.
+ * Flush all CPUs' dirty log buffers to the dirty_bitmap. Called
+ * before reporting dirty_bitmap to userspace. KVM flushes the buffers
+ * on all VM-Exits, thus we only need to kick running vCPUs to force a
+ * VM-Exit.
*/
- if (kvm_x86_ops.flush_log_dirty)
- kvm_x86_ops.flush_log_dirty(kvm);
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vcpu_kick(vcpu);
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
@@ -5307,6 +5331,20 @@ split_irqchip_unlock:
kvm->arch.user_space_msr_mask = cap->args[0];
r = 0;
break;
+ case KVM_CAP_X86_BUS_LOCK_EXIT:
+ r = -EINVAL;
+ if (cap->args[0] & ~KVM_BUS_LOCK_DETECTION_VALID_MODE)
+ break;
+
+ if ((cap->args[0] & KVM_BUS_LOCK_DETECTION_OFF) &&
+ (cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT))
+ break;
+
+ if (kvm_has_bus_lock_exit &&
+ cap->args[0] & KVM_BUS_LOCK_DETECTION_EXIT)
+ kvm->arch.bus_lock_detection_enabled = true;
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -5631,18 +5669,36 @@ set_pit2_out:
kvm->arch.bsp_vcpu_id = arg;
mutex_unlock(&kvm->lock);
break;
+#ifdef CONFIG_KVM_XEN
case KVM_XEN_HVM_CONFIG: {
struct kvm_xen_hvm_config xhc;
r = -EFAULT;
if (copy_from_user(&xhc, argp, sizeof(xhc)))
goto out;
- r = -EINVAL;
- if (xhc.flags)
+ r = kvm_xen_hvm_config(kvm, &xhc);
+ break;
+ }
+ case KVM_XEN_HVM_GET_ATTR: {
+ struct kvm_xen_hvm_attr xha;
+
+ r = -EFAULT;
+ if (copy_from_user(&xha, argp, sizeof(xha)))
goto out;
- memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
- r = 0;
+ r = kvm_xen_hvm_get_attr(kvm, &xha);
+ if (!r && copy_to_user(argp, &xha, sizeof(xha)))
+ r = -EFAULT;
+ break;
+ }
+ case KVM_XEN_HVM_SET_ATTR: {
+ struct kvm_xen_hvm_attr xha;
+
+ r = -EFAULT;
+ if (copy_from_user(&xha, argp, sizeof(xha)))
+ goto out;
+ r = kvm_xen_hvm_set_attr(kvm, &xha);
break;
}
+#endif
case KVM_SET_CLOCK: {
struct kvm_clock_data user_ns;
u64 now_ns;
@@ -5685,7 +5741,7 @@ set_pit2_out:
case KVM_MEMORY_ENCRYPT_OP: {
r = -ENOTTY;
if (kvm_x86_ops.mem_enc_op)
- r = kvm_x86_ops.mem_enc_op(kvm, argp);
+ r = static_call(kvm_x86_mem_enc_op)(kvm, argp);
break;
}
case KVM_MEMORY_ENCRYPT_REG_REGION: {
@@ -5697,7 +5753,7 @@ set_pit2_out:
r = -ENOTTY;
if (kvm_x86_ops.mem_enc_reg_region)
- r = kvm_x86_ops.mem_enc_reg_region(kvm, &region);
+ r = static_call(kvm_x86_mem_enc_reg_region)(kvm, &region);
break;
}
case KVM_MEMORY_ENCRYPT_UNREG_REGION: {
@@ -5709,7 +5765,7 @@ set_pit2_out:
r = -ENOTTY;
if (kvm_x86_ops.mem_enc_unreg_region)
- r = kvm_x86_ops.mem_enc_unreg_region(kvm, &region);
+ r = static_call(kvm_x86_mem_enc_unreg_region)(kvm, &region);
break;
}
case KVM_HYPERV_EVENTFD: {
@@ -5811,7 +5867,7 @@ static void kvm_init_msr_list(void)
}
for (i = 0; i < ARRAY_SIZE(emulated_msrs_all); i++) {
- if (!kvm_x86_ops.has_emulated_msr(NULL, emulated_msrs_all[i]))
+ if (!static_call(kvm_x86_has_emulated_msr)(NULL, emulated_msrs_all[i]))
continue;
emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i];
@@ -5874,13 +5930,13 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
static void kvm_set_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
- kvm_x86_ops.set_segment(vcpu, var, seg);
+ static_call(kvm_x86_set_segment)(vcpu, var, seg);
}
void kvm_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg)
{
- kvm_x86_ops.get_segment(vcpu, var, seg);
+ static_call(kvm_x86_get_segment)(vcpu, var, seg);
}
gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
@@ -5900,14 +5956,14 @@ gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_FETCH_MASK;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
@@ -5915,7 +5971,7 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
access |= PFERR_WRITE_MASK;
return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
}
@@ -5964,7 +6020,7 @@ static int kvm_fetch_guest_virt(struct x86_emulate_ctxt *ctxt,
struct x86_exception *exception)
{
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
- u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
unsigned offset;
int ret;
@@ -5989,7 +6045,7 @@ int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
gva_t addr, void *val, unsigned int bytes,
struct x86_exception *exception)
{
- u32 access = (kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
/*
* FIXME: this should call handle_emulation_failure if X86EMUL_IO_NEEDED
@@ -6010,7 +6066,7 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = 0;
- if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
+ if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
access |= PFERR_USER_MASK;
return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
@@ -6063,7 +6119,7 @@ static int emulator_write_std(struct x86_emulate_ctxt *ctxt, gva_t addr, void *v
struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
u32 access = PFERR_WRITE_MASK;
- if (!system && kvm_x86_ops.get_cpl(vcpu) == 3)
+ if (!system && static_call(kvm_x86_get_cpl)(vcpu) == 3)
access |= PFERR_USER_MASK;
return kvm_write_guest_virt_helper(addr, val, bytes, vcpu,
@@ -6088,7 +6144,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
- if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, NULL, 0)))
+ if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, NULL, 0)))
return 1;
if (force_emulation_prefix &&
@@ -6122,7 +6178,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
gpa_t *gpa, struct x86_exception *exception,
bool write)
{
- u32 access = ((kvm_x86_ops.get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
+ u32 access = ((static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0)
| (write ? PFERR_WRITE_MASK : 0);
/*
@@ -6530,7 +6586,7 @@ static int emulator_pio_out_emulated(struct x86_emulate_ctxt *ctxt,
static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
{
- return kvm_x86_ops.get_segment_base(vcpu, seg);
+ return static_call(kvm_x86_get_segment_base)(vcpu, seg);
}
static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
@@ -6543,7 +6599,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
- if (kvm_x86_ops.has_wbinvd_exit()) {
+ if (static_call(kvm_x86_has_wbinvd_exit)()) {
int cpu = get_cpu();
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
@@ -6570,17 +6626,17 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
}
-static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
- unsigned long *dest)
+static void emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+ unsigned long *dest)
{
- return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+ kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
}
static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long value)
{
- return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
+ return kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
}
static u64 mk_cr_64(u64 curr_cr, u32 new_val)
@@ -6648,27 +6704,27 @@ static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val)
static int emulator_get_cpl(struct x86_emulate_ctxt *ctxt)
{
- return kvm_x86_ops.get_cpl(emul_to_vcpu(ctxt));
+ return static_call(kvm_x86_get_cpl)(emul_to_vcpu(ctxt));
}
static void emulator_get_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops.get_gdt(emul_to_vcpu(ctxt), dt);
+ static_call(kvm_x86_get_gdt)(emul_to_vcpu(ctxt), dt);
}
static void emulator_get_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops.get_idt(emul_to_vcpu(ctxt), dt);
+ static_call(kvm_x86_get_idt)(emul_to_vcpu(ctxt), dt);
}
static void emulator_set_gdt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops.set_gdt(emul_to_vcpu(ctxt), dt);
+ static_call(kvm_x86_set_gdt)(emul_to_vcpu(ctxt), dt);
}
static void emulator_set_idt(struct x86_emulate_ctxt *ctxt, struct desc_ptr *dt)
{
- kvm_x86_ops.set_idt(emul_to_vcpu(ctxt), dt);
+ static_call(kvm_x86_set_idt)(emul_to_vcpu(ctxt), dt);
}
static unsigned long emulator_get_cached_segment_base(
@@ -6810,7 +6866,7 @@ static int emulator_intercept(struct x86_emulate_ctxt *ctxt,
struct x86_instruction_info *info,
enum x86_intercept_stage stage)
{
- return kvm_x86_ops.check_intercept(emul_to_vcpu(ctxt), info, stage,
+ return static_call(kvm_x86_check_intercept)(emul_to_vcpu(ctxt), info, stage,
&ctxt->exception);
}
@@ -6848,7 +6904,7 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon
static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
{
- kvm_x86_ops.set_nmi_mask(emul_to_vcpu(ctxt), masked);
+ static_call(kvm_x86_set_nmi_mask)(emul_to_vcpu(ctxt), masked);
}
static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
@@ -6864,7 +6920,7 @@ static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_fla
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
const char *smstate)
{
- return kvm_x86_ops.pre_leave_smm(emul_to_vcpu(ctxt), smstate);
+ return static_call(kvm_x86_pre_leave_smm)(emul_to_vcpu(ctxt), smstate);
}
static void emulator_post_leave_smm(struct x86_emulate_ctxt *ctxt)
@@ -6926,7 +6982,7 @@ static const struct x86_emulate_ops emulate_ops = {
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
{
- u32 int_shadow = kvm_x86_ops.get_interrupt_shadow(vcpu);
+ u32 int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
/*
* an sti; sti; sequence only disable interrupts for the first
* instruction. So, if the last instruction, be it emulated or
@@ -6937,7 +6993,7 @@ static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
if (int_shadow & mask)
mask = 0;
if (unlikely(int_shadow || mask)) {
- kvm_x86_ops.set_interrupt_shadow(vcpu, mask);
+ static_call(kvm_x86_set_interrupt_shadow)(vcpu, mask);
if (!mask)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
@@ -6979,7 +7035,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
int cs_db, cs_l;
- kvm_x86_ops.get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+ static_call(kvm_x86_get_cs_db_l_bits)(vcpu, &cs_db, &cs_l);
ctxt->gpa_available = false;
ctxt->eflags = kvm_get_rflags(vcpu);
@@ -7040,7 +7096,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu, int emulation_type)
kvm_queue_exception(vcpu, UD_VECTOR);
- if (!is_guest_mode(vcpu) && kvm_x86_ops.get_cpl(vcpu) == 0) {
+ if (!is_guest_mode(vcpu) && static_call(kvm_x86_get_cpl)(vcpu) == 0) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
vcpu->run->internal.ndata = 0;
@@ -7100,9 +7156,9 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
if (vcpu->arch.mmu->direct_map) {
unsigned int indirect_shadow_pages;
- spin_lock(&vcpu->kvm->mmu_lock);
+ write_lock(&vcpu->kvm->mmu_lock);
indirect_shadow_pages = vcpu->kvm->arch.indirect_shadow_pages;
- spin_unlock(&vcpu->kvm->mmu_lock);
+ write_unlock(&vcpu->kvm->mmu_lock);
if (indirect_shadow_pages)
kvm_mmu_unprotect_page(vcpu->kvm, gpa_to_gfn(gpa));
@@ -7209,7 +7265,7 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_ACTIVE_LOW;
kvm_run->debug.arch.pc = kvm_get_linear_rip(vcpu);
kvm_run->debug.arch.exception = DB_VECTOR;
kvm_run->exit_reason = KVM_EXIT_DEBUG;
@@ -7221,10 +7277,10 @@ static int kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu)
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
- unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
+ unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
int r;
- r = kvm_x86_ops.skip_emulated_instruction(vcpu);
+ r = static_call(kvm_x86_skip_emulated_instruction)(vcpu);
if (unlikely(!r))
return 0;
@@ -7253,7 +7309,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
vcpu->arch.eff_db);
if (dr6 != 0) {
- kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.dr6 = dr6 | DR6_ACTIVE_LOW;
kvm_run->debug.arch.pc = eip;
kvm_run->debug.arch.exception = DB_VECTOR;
kvm_run->exit_reason = KVM_EXIT_DEBUG;
@@ -7310,6 +7366,42 @@ static bool is_vmware_backdoor_opcode(struct x86_emulate_ctxt *ctxt)
return false;
}
+/*
+ * Decode to be emulated instruction. Return EMULATION_OK if success.
+ */
+int x86_decode_emulated_instruction(struct kvm_vcpu *vcpu, int emulation_type,
+ void *insn, int insn_len)
+{
+ int r = EMULATION_OK;
+ struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt;
+
+ init_emulate_ctxt(vcpu);
+
+ /*
+ * We will reenter on the same instruction since we do not set
+ * complete_userspace_io. This does not handle watchpoints yet,
+ * those would be handled in the emulate_ops.
+ */
+ if (!(emulation_type & EMULTYPE_SKIP) &&
+ kvm_vcpu_check_breakpoint(vcpu, &r))
+ return r;
+
+ ctxt->interruptibility = 0;
+ ctxt->have_exception = false;
+ ctxt->exception.vector = -1;
+ ctxt->perm_ok = false;
+
+ ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
+
+ r = x86_decode_insn(ctxt, insn, insn_len);
+
+ trace_kvm_emulate_insn_start(vcpu);
+ ++vcpu->stat.insn_emulation;
+
+ return r;
+}
+EXPORT_SYMBOL_GPL(x86_decode_emulated_instruction);
+
int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
int emulation_type, void *insn, int insn_len)
{
@@ -7318,7 +7410,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
bool writeback = true;
bool write_fault_to_spt;
- if (unlikely(!kvm_x86_ops.can_emulate_instruction(vcpu, insn, insn_len)))
+ if (unlikely(!static_call(kvm_x86_can_emulate_instruction)(vcpu, insn, insn_len)))
return 1;
vcpu->arch.l1tf_flush_l1d = true;
@@ -7329,32 +7421,12 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
*/
write_fault_to_spt = vcpu->arch.write_fault_to_shadow_pgtable;
vcpu->arch.write_fault_to_shadow_pgtable = false;
- kvm_clear_exception_queue(vcpu);
if (!(emulation_type & EMULTYPE_NO_DECODE)) {
- init_emulate_ctxt(vcpu);
-
- /*
- * We will reenter on the same instruction since
- * we do not set complete_userspace_io. This does not
- * handle watchpoints yet, those would be handled in
- * the emulate_ops.
- */
- if (!(emulation_type & EMULTYPE_SKIP) &&
- kvm_vcpu_check_breakpoint(vcpu, &r))
- return r;
-
- ctxt->interruptibility = 0;
- ctxt->have_exception = false;
- ctxt->exception.vector = -1;
- ctxt->perm_ok = false;
+ kvm_clear_exception_queue(vcpu);
- ctxt->ud = emulation_type & EMULTYPE_TRAP_UD;
-
- r = x86_decode_insn(ctxt, insn, insn_len);
-
- trace_kvm_emulate_insn_start(vcpu);
- ++vcpu->stat.insn_emulation;
+ r = x86_decode_emulated_instruction(vcpu, emulation_type,
+ insn, insn_len);
if (r != EMULATION_OK) {
if ((emulation_type & EMULTYPE_TRAP_UD) ||
(emulation_type & EMULTYPE_TRAP_UD_FORCED)) {
@@ -7461,7 +7533,7 @@ restart:
r = 1;
if (writeback) {
- unsigned long rflags = kvm_x86_ops.get_rflags(vcpu);
+ unsigned long rflags = static_call(kvm_x86_get_rflags)(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
if (!ctxt->have_exception ||
@@ -7470,7 +7542,7 @@ restart:
if (r && (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
r = kvm_vcpu_do_singlestep(vcpu);
if (kvm_x86_ops.update_emulated_instruction)
- kvm_x86_ops.update_emulated_instruction(vcpu);
+ static_call(kvm_x86_update_emulated_instruction)(vcpu);
__kvm_set_rflags(vcpu, ctxt->eflags);
}
@@ -7799,7 +7871,7 @@ static int kvm_is_user_mode(void)
int user_mode = 3;
if (__this_cpu_read(current_vcpu))
- user_mode = kvm_x86_ops.get_cpl(__this_cpu_read(current_vcpu));
+ user_mode = static_call(kvm_x86_get_cpl)(__this_cpu_read(current_vcpu));
return user_mode != 0;
}
@@ -7944,7 +8016,6 @@ int kvm_arch_init(void *opaque)
supported_xcr0 = host_xcr0 & KVM_SUPPORTED_XCR0;
}
- kvm_lapic_init();
if (pi_inject_timer == -1)
pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
#ifdef CONFIG_X86_64
@@ -7986,6 +8057,10 @@ void kvm_arch_exit(void)
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_fpu_cache);
+#ifdef CONFIG_KVM_XEN
+ static_key_deferred_flush(&kvm_xen_enabled);
+ WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
+#endif
}
static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
@@ -8037,7 +8112,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK)
return -KVM_EOPNOTSUPP;
- if (kvm_get_walltime_and_clockread(&ts, &cycle) == false)
+ if (!kvm_get_walltime_and_clockread(&ts, &cycle))
return -KVM_EOPNOTSUPP;
clock_pairing.sec = ts.tv_sec;
@@ -8113,7 +8188,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit;
- if (kvm_hv_hypercall_enabled(vcpu->kvm))
+ if (kvm_xen_hypercall_enabled(vcpu->kvm))
+ return kvm_xen_hypercall(vcpu);
+
+ if (kvm_hv_hypercall_enabled(vcpu))
return kvm_hv_hypercall(vcpu);
nr = kvm_rax_read(vcpu);
@@ -8133,7 +8211,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
a3 &= 0xFFFFFFFF;
}
- if (kvm_x86_ops.get_cpl(vcpu) != 0) {
+ if (static_call(kvm_x86_get_cpl)(vcpu) != 0) {
ret = -KVM_EPERM;
goto out;
}
@@ -8190,7 +8268,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
char instruction[3];
unsigned long rip = kvm_rip_read(vcpu);
- kvm_x86_ops.patch_hypercall(vcpu, instruction);
+ static_call(kvm_x86_patch_hypercall)(vcpu, instruction);
return emulator_write_emulated(ctxt, rip, instruction, 3,
&ctxt->exception);
@@ -8214,12 +8292,14 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
kvm_run->if_flag = !vcpu->arch.guest_state_protected
&& (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
- kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
kvm_run->cr8 = kvm_get_cr8(vcpu);
kvm_run->apic_base = kvm_get_apic_base(vcpu);
kvm_run->ready_for_interrupt_injection =
pic_in_kernel(vcpu->kvm) ||
kvm_vcpu_ready_for_interrupt_injection(vcpu);
+
+ if (is_smm(vcpu))
+ kvm_run->flags |= KVM_RUN_X86_SMM;
}
static void update_cr8_intercept(struct kvm_vcpu *vcpu)
@@ -8245,7 +8325,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
tpr = kvm_lapic_get_cr8(vcpu);
- kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr);
+ static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr);
}
static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
@@ -8256,7 +8336,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
/* try to reinject previous events if any */
if (vcpu->arch.exception.injected) {
- kvm_x86_ops.queue_exception(vcpu);
+ static_call(kvm_x86_queue_exception)(vcpu);
can_inject = false;
}
/*
@@ -8275,10 +8355,10 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
*/
else if (!vcpu->arch.exception.pending) {
if (vcpu->arch.nmi_injected) {
- kvm_x86_ops.set_nmi(vcpu);
+ static_call(kvm_x86_set_nmi)(vcpu);
can_inject = false;
} else if (vcpu->arch.interrupt.injected) {
- kvm_x86_ops.set_irq(vcpu);
+ static_call(kvm_x86_set_irq)(vcpu);
can_inject = false;
}
}
@@ -8319,7 +8399,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
}
}
- kvm_x86_ops.queue_exception(vcpu);
+ static_call(kvm_x86_queue_exception)(vcpu);
can_inject = false;
}
@@ -8335,7 +8415,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
* The kvm_x86_ops hooks communicate this by returning -EBUSY.
*/
if (vcpu->arch.smi_pending) {
- r = can_inject ? kvm_x86_ops.smi_allowed(vcpu, true) : -EBUSY;
+ r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
if (r < 0)
goto busy;
if (r) {
@@ -8344,35 +8424,35 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit
enter_smm(vcpu);
can_inject = false;
} else
- kvm_x86_ops.enable_smi_window(vcpu);
+ static_call(kvm_x86_enable_smi_window)(vcpu);
}
if (vcpu->arch.nmi_pending) {
- r = can_inject ? kvm_x86_ops.nmi_allowed(vcpu, true) : -EBUSY;
+ r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
if (r < 0)
goto busy;
if (r) {
--vcpu->arch.nmi_pending;
vcpu->arch.nmi_injected = true;
- kvm_x86_ops.set_nmi(vcpu);
+ static_call(kvm_x86_set_nmi)(vcpu);
can_inject = false;
- WARN_ON(kvm_x86_ops.nmi_allowed(vcpu, true) < 0);
+ WARN_ON(static_call(kvm_x86_nmi_allowed)(vcpu, true) < 0);
}
if (vcpu->arch.nmi_pending)
- kvm_x86_ops.enable_nmi_window(vcpu);
+ static_call(kvm_x86_enable_nmi_window)(vcpu);
}
if (kvm_cpu_has_injectable_intr(vcpu)) {
- r = can_inject ? kvm_x86_ops.interrupt_allowed(vcpu, true) : -EBUSY;
+ r = can_inject ? static_call(kvm_x86_interrupt_allowed)(vcpu, true) : -EBUSY;
if (r < 0)
goto busy;
if (r) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
- kvm_x86_ops.set_irq(vcpu);
- WARN_ON(kvm_x86_ops.interrupt_allowed(vcpu, true) < 0);
+ static_call(kvm_x86_set_irq)(vcpu);
+ WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
}
if (kvm_cpu_has_injectable_intr(vcpu))
- kvm_x86_ops.enable_irq_window(vcpu);
+ static_call(kvm_x86_enable_irq_window)(vcpu);
}
if (is_guest_mode(vcpu) &&
@@ -8397,7 +8477,7 @@ static void process_nmi(struct kvm_vcpu *vcpu)
* If an NMI is already in progress, limit further NMIs to just one.
* Otherwise, allow two (and we'll inject the first one immediately).
*/
- if (kvm_x86_ops.get_nmi_mask(vcpu) || vcpu->arch.nmi_injected)
+ if (static_call(kvm_x86_get_nmi_mask)(vcpu) || vcpu->arch.nmi_injected)
limit = 1;
vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
@@ -8487,11 +8567,11 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7f7c, seg.limit);
put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
- kvm_x86_ops.get_gdt(vcpu, &dt);
+ static_call(kvm_x86_get_gdt)(vcpu, &dt);
put_smstate(u32, buf, 0x7f74, dt.address);
put_smstate(u32, buf, 0x7f70, dt.size);
- kvm_x86_ops.get_idt(vcpu, &dt);
+ static_call(kvm_x86_get_idt)(vcpu, &dt);
put_smstate(u32, buf, 0x7f58, dt.address);
put_smstate(u32, buf, 0x7f54, dt.size);
@@ -8541,7 +8621,7 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7e94, seg.limit);
put_smstate(u64, buf, 0x7e98, seg.base);
- kvm_x86_ops.get_idt(vcpu, &dt);
+ static_call(kvm_x86_get_idt)(vcpu, &dt);
put_smstate(u32, buf, 0x7e84, dt.size);
put_smstate(u64, buf, 0x7e88, dt.address);
@@ -8551,7 +8631,7 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
put_smstate(u32, buf, 0x7e74, seg.limit);
put_smstate(u64, buf, 0x7e78, seg.base);
- kvm_x86_ops.get_gdt(vcpu, &dt);
+ static_call(kvm_x86_get_gdt)(vcpu, &dt);
put_smstate(u32, buf, 0x7e64, dt.size);
put_smstate(u64, buf, 0x7e68, dt.address);
@@ -8581,30 +8661,30 @@ static void enter_smm(struct kvm_vcpu *vcpu)
* vCPU state (e.g. leave guest mode) after we've saved the state into
* the SMM state-save area.
*/
- kvm_x86_ops.pre_enter_smm(vcpu, buf);
+ static_call(kvm_x86_pre_enter_smm)(vcpu, buf);
vcpu->arch.hflags |= HF_SMM_MASK;
kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
- if (kvm_x86_ops.get_nmi_mask(vcpu))
+ if (static_call(kvm_x86_get_nmi_mask)(vcpu))
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
else
- kvm_x86_ops.set_nmi_mask(vcpu, true);
+ static_call(kvm_x86_set_nmi_mask)(vcpu, true);
kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
kvm_rip_write(vcpu, 0x8000);
cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
- kvm_x86_ops.set_cr0(vcpu, cr0);
+ static_call(kvm_x86_set_cr0)(vcpu, cr0);
vcpu->arch.cr0 = cr0;
- kvm_x86_ops.set_cr4(vcpu, 0);
+ static_call(kvm_x86_set_cr4)(vcpu, 0);
/* Undocumented: IDT limit is set to zero on entry to SMM. */
dt.address = dt.size = 0;
- kvm_x86_ops.set_idt(vcpu, &dt);
+ static_call(kvm_x86_set_idt)(vcpu, &dt);
- __kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+ kvm_set_dr(vcpu, 7, DR7_FIXED_1);
cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
cs.base = vcpu->arch.smbase;
@@ -8633,7 +8713,7 @@ static void enter_smm(struct kvm_vcpu *vcpu)
#ifdef CONFIG_X86_64
if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
- kvm_x86_ops.set_efer(vcpu, 0);
+ static_call(kvm_x86_set_efer)(vcpu, 0);
#endif
kvm_update_cpuid_runtime(vcpu);
@@ -8671,7 +8751,7 @@ void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
vcpu->arch.apicv_active = kvm_apicv_activated(vcpu->kvm);
kvm_apic_update_apicv(vcpu);
- kvm_x86_ops.refresh_apicv_exec_ctrl(vcpu);
+ static_call(kvm_x86_refresh_apicv_exec_ctrl)(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
@@ -8688,7 +8768,7 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
unsigned long old, new, expected;
if (!kvm_x86_ops.check_apicv_inhibit_reasons ||
- !kvm_x86_ops.check_apicv_inhibit_reasons(bit))
+ !static_call(kvm_x86_check_apicv_inhibit_reasons)(bit))
return;
old = READ_ONCE(kvm->arch.apicv_inhibit_reasons);
@@ -8708,7 +8788,7 @@ void kvm_request_apicv_update(struct kvm *kvm, bool activate, ulong bit)
trace_kvm_apicv_update_request(activate, bit);
if (kvm_x86_ops.pre_update_apicv_exec_ctrl)
- kvm_x86_ops.pre_update_apicv_exec_ctrl(kvm, activate);
+ static_call(kvm_x86_pre_update_apicv_exec_ctrl)(kvm, activate);
/*
* Sending request to update APICV for all other vcpus,
@@ -8734,7 +8814,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
else {
if (vcpu->arch.apicv_active)
- kvm_x86_ops.sync_pir_to_irr(vcpu);
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);
if (ioapic_in_kernel(vcpu->kvm))
kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
}
@@ -8752,9 +8832,12 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu)
if (!kvm_apic_hw_enabled(vcpu->arch.apic))
return;
- bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
- vcpu_to_synic(vcpu)->vec_bitmap, 256);
- kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap);
+ if (to_hv_vcpu(vcpu))
+ bitmap_or((ulong *)eoi_exit_bitmap,
+ vcpu->arch.ioapic_handled_vectors,
+ to_hv_synic(vcpu)->vec_bitmap, 256);
+
+ static_call(kvm_x86_load_eoi_exitmap)(vcpu, eoi_exit_bitmap);
}
void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
@@ -8779,7 +8862,7 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
if (!kvm_x86_ops.set_apic_access_page_addr)
return;
- kvm_x86_ops.set_apic_access_page_addr(vcpu);
+ static_call(kvm_x86_set_apic_access_page_addr)(vcpu);
}
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
@@ -8904,8 +8987,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
+ struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
vcpu->run->exit_reason = KVM_EXIT_HYPERV;
- vcpu->run->hyperv = vcpu->arch.hyperv.exit;
+ vcpu->run->hyperv = hv_vcpu->exit;
r = 0;
goto out;
}
@@ -8922,10 +9007,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_APF_READY, vcpu))
kvm_check_async_pf_completion(vcpu);
if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
- kvm_x86_ops.msr_filter_changed(vcpu);
+ static_call(kvm_x86_msr_filter_changed)(vcpu);
+
+ if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
+ static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
}
- if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
+ if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
+ kvm_xen_has_interrupt(vcpu)) {
++vcpu->stat.req_event;
kvm_apic_accept_events(vcpu);
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
@@ -8935,7 +9024,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
inject_pending_event(vcpu, &req_immediate_exit);
if (req_int_win)
- kvm_x86_ops.enable_irq_window(vcpu);
+ static_call(kvm_x86_enable_irq_window)(vcpu);
if (kvm_lapic_enabled(vcpu)) {
update_cr8_intercept(vcpu);
@@ -8950,7 +9039,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
preempt_disable();
- kvm_x86_ops.prepare_guest_switch(vcpu);
+ static_call(kvm_x86_prepare_guest_switch)(vcpu);
/*
* Disable IRQs before setting IN_GUEST_MODE. Posted interrupt
@@ -8981,7 +9070,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
* notified with kvm_vcpu_kick.
*/
if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active)
- kvm_x86_ops.sync_pir_to_irr(vcpu);
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);
if (kvm_vcpu_exit_request(vcpu)) {
vcpu->mode = OUTSIDE_GUEST_MODE;
@@ -8995,7 +9084,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_x86_ops.request_immediate_exit(vcpu);
+ static_call(kvm_x86_request_immediate_exit)(vcpu);
}
fpregs_assert_state_consistent();
@@ -9012,7 +9101,19 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
- exit_fastpath = kvm_x86_ops.run(vcpu);
+ for (;;) {
+ exit_fastpath = static_call(kvm_x86_run)(vcpu);
+ if (likely(exit_fastpath != EXIT_FASTPATH_REENTER_GUEST))
+ break;
+
+ if (unlikely(kvm_vcpu_exit_request(vcpu))) {
+ exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED;
+ break;
+ }
+
+ if (vcpu->arch.apicv_active)
+ static_call(kvm_x86_sync_pir_to_irr)(vcpu);
+ }
/*
* Do this here before restoring debug registers on the host. And
@@ -9022,7 +9123,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
*/
if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) {
WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP);
- kvm_x86_ops.sync_dirty_debug_regs(vcpu);
+ static_call(kvm_x86_sync_dirty_debug_regs)(vcpu);
kvm_update_dr0123(vcpu);
kvm_update_dr7(vcpu);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
@@ -9044,7 +9145,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
- kvm_x86_ops.handle_exit_irqoff(vcpu);
+ static_call(kvm_x86_handle_exit_irqoff)(vcpu);
/*
* Consume any pending interrupts, including the possible source of
@@ -9086,13 +9187,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (vcpu->arch.apic_attention)
kvm_lapic_sync_from_vapic(vcpu);
- r = kvm_x86_ops.handle_exit(vcpu, exit_fastpath);
+ r = static_call(kvm_x86_handle_exit)(vcpu, exit_fastpath);
return r;
cancel_injection:
if (req_immediate_exit)
kvm_make_request(KVM_REQ_EVENT, vcpu);
- kvm_x86_ops.cancel_injection(vcpu);
+ static_call(kvm_x86_cancel_injection)(vcpu);
if (unlikely(vcpu->arch.apic_attention))
kvm_lapic_sync_from_vapic(vcpu);
out:
@@ -9102,13 +9203,13 @@ out:
static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
{
if (!kvm_arch_vcpu_runnable(vcpu) &&
- (!kvm_x86_ops.pre_block || kvm_x86_ops.pre_block(vcpu) == 0)) {
+ (!kvm_x86_ops.pre_block || static_call(kvm_x86_pre_block)(vcpu) == 0)) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
kvm_vcpu_block(vcpu);
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
if (kvm_x86_ops.post_block)
- kvm_x86_ops.post_block(vcpu);
+ static_call(kvm_x86_post_block)(vcpu);
if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
return 1;
@@ -9329,6 +9430,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
+ kvm_run->flags = 0;
kvm_load_guest_fpu(vcpu);
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
@@ -9503,10 +9605,10 @@ static void __get_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
- kvm_x86_ops.get_idt(vcpu, &dt);
+ static_call(kvm_x86_get_idt)(vcpu, &dt);
sregs->idt.limit = dt.size;
sregs->idt.base = dt.address;
- kvm_x86_ops.get_gdt(vcpu, &dt);
+ static_call(kvm_x86_get_gdt)(vcpu, &dt);
sregs->gdt.limit = dt.size;
sregs->gdt.base = dt.address;
@@ -9624,7 +9726,7 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
*/
if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
return false;
- if (sregs->cr3 & vcpu->arch.cr3_lm_rsvd_bits)
+ if (kvm_vcpu_is_illegal_gpa(vcpu, sregs->cr3))
return false;
} else {
/*
@@ -9659,10 +9761,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
dt.size = sregs->idt.limit;
dt.address = sregs->idt.base;
- kvm_x86_ops.set_idt(vcpu, &dt);
+ static_call(kvm_x86_set_idt)(vcpu, &dt);
dt.size = sregs->gdt.limit;
dt.address = sregs->gdt.base;
- kvm_x86_ops.set_gdt(vcpu, &dt);
+ static_call(kvm_x86_set_gdt)(vcpu, &dt);
vcpu->arch.cr2 = sregs->cr2;
mmu_reset_needed |= kvm_read_cr3(vcpu) != sregs->cr3;
@@ -9672,14 +9774,14 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_set_cr8(vcpu, sregs->cr8);
mmu_reset_needed |= vcpu->arch.efer != sregs->efer;
- kvm_x86_ops.set_efer(vcpu, sregs->efer);
+ static_call(kvm_x86_set_efer)(vcpu, sregs->efer);
mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0;
- kvm_x86_ops.set_cr0(vcpu, sregs->cr0);
+ static_call(kvm_x86_set_cr0)(vcpu, sregs->cr0);
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
- kvm_x86_ops.set_cr4(vcpu, sregs->cr4);
+ static_call(kvm_x86_set_cr4)(vcpu, sregs->cr4);
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (is_pae_paging(vcpu)) {
@@ -9787,7 +9889,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
*/
kvm_set_rflags(vcpu, rflags);
- kvm_x86_ops.update_exception_bitmap(vcpu);
+ static_call(kvm_x86_update_exception_bitmap)(vcpu);
r = 0;
@@ -9965,7 +10067,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
if (kvm_apicv_activated(vcpu->kvm))
vcpu->arch.apicv_active = true;
} else
- static_key_slow_inc(&kvm_no_apic_vcpu);
+ static_branch_inc(&kvm_has_noapic_vcpu);
r = -ENOMEM;
@@ -10003,7 +10105,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
- vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
+ vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
@@ -10013,9 +10115,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.pending_external_vector = -1;
vcpu->arch.preempted_in_kernel = false;
- kvm_hv_vcpu_init(vcpu);
-
- r = kvm_x86_ops.vcpu_create(vcpu);
+ r = static_call(kvm_x86_vcpu_create)(vcpu);
if (r)
goto free_guest_fpu;
@@ -10051,8 +10151,6 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
- kvm_hv_vcpu_postcreate(vcpu);
-
if (mutex_lock_killable(&vcpu->mutex))
return;
vcpu_load(vcpu);
@@ -10078,7 +10176,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvmclock_reset(vcpu);
- kvm_x86_ops.vcpu_free(vcpu);
+ static_call(kvm_x86_vcpu_free)(vcpu);
kmem_cache_free(x86_emulator_cache, vcpu->arch.emulate_ctxt);
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
@@ -10095,7 +10193,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
free_page((unsigned long)vcpu->arch.pio_data);
kvfree(vcpu->arch.cpuid_entries);
if (!lapic_in_kernel(vcpu))
- static_key_slow_dec(&kvm_no_apic_vcpu);
+ static_branch_dec(&kvm_has_noapic_vcpu);
}
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -10114,7 +10212,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
kvm_update_dr0123(vcpu);
- vcpu->arch.dr6 = DR6_INIT;
+ vcpu->arch.dr6 = DR6_ACTIVE_LOW;
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(vcpu);
@@ -10167,7 +10265,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.ia32_xss = 0;
- kvm_x86_ops.vcpu_reset(vcpu, init_event);
+ static_call(kvm_x86_vcpu_reset)(vcpu, init_event);
}
void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
@@ -10193,7 +10291,7 @@ int kvm_arch_hardware_enable(void)
bool stable, backwards_tsc = false;
kvm_user_return_msr_cpu_online();
- ret = kvm_x86_ops.hardware_enable();
+ ret = static_call(kvm_x86_hardware_enable)();
if (ret != 0)
return ret;
@@ -10275,7 +10373,7 @@ int kvm_arch_hardware_enable(void)
void kvm_arch_hardware_disable(void)
{
- kvm_x86_ops.hardware_disable();
+ static_call(kvm_x86_hardware_disable)();
drop_user_return_notifiers();
}
@@ -10294,6 +10392,7 @@ int kvm_arch_hardware_setup(void *opaque)
return r;
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
+ kvm_ops_static_call_update();
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
supported_xss = 0;
@@ -10322,7 +10421,7 @@ int kvm_arch_hardware_setup(void *opaque)
void kvm_arch_hardware_unsetup(void)
{
- kvm_x86_ops.hardware_unsetup();
+ static_call(kvm_x86_hardware_unsetup)();
}
int kvm_arch_check_processor_compat(void *opaque)
@@ -10350,8 +10449,8 @@ bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0;
}
-struct static_key kvm_no_apic_vcpu __read_mostly;
-EXPORT_SYMBOL_GPL(kvm_no_apic_vcpu);
+__read_mostly DEFINE_STATIC_KEY_FALSE(kvm_has_noapic_vcpu);
+EXPORT_SYMBOL_GPL(kvm_has_noapic_vcpu);
void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
{
@@ -10362,12 +10461,12 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
pmu->need_cleanup = true;
kvm_make_request(KVM_REQ_PMU, vcpu);
}
- kvm_x86_ops.sched_in(vcpu, cpu);
+ static_call(kvm_x86_sched_in)(vcpu, cpu);
}
void kvm_arch_free_vm(struct kvm *kvm)
{
- kfree(kvm->arch.hyperv.hv_pa_pg);
+ kfree(to_kvm_hv(kvm)->hv_pa_pg);
vfree(kvm);
}
@@ -10406,7 +10505,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_page_track_init(kvm);
kvm_mmu_init_vm(kvm);
- return kvm_x86_ops.vm_init(kvm);
+ return static_call(kvm_x86_vm_init)(kvm);
}
int kvm_arch_post_init_vm(struct kvm *kvm)
@@ -10551,8 +10650,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
__x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
mutex_unlock(&kvm->slots_lock);
}
- if (kvm_x86_ops.vm_destroy)
- kvm_x86_ops.vm_destroy(kvm);
+ static_call_cond(kvm_x86_vm_destroy)(kvm);
for (i = 0; i < kvm->arch.msr_filter.count; i++)
kfree(kvm->arch.msr_filter.ranges[i].bitmap);
kvm_pic_destroy(kvm);
@@ -10562,6 +10660,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
+ kvm_xen_destroy_vm(kvm);
kvm_hv_destroy_vm(kvm);
}
@@ -10680,75 +10779,96 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return 0;
}
+
+static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
+{
+ struct kvm_arch *ka = &kvm->arch;
+
+ if (!kvm_x86_ops.cpu_dirty_log_size)
+ return;
+
+ if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
+ (!enable && --ka->cpu_dirty_logging_count == 0))
+ kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
+
+ WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
+}
+
static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
struct kvm_memory_slot *old,
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
+ bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
+
/*
- * Nothing to do for RO slots or CREATE/MOVE/DELETE of a slot.
- * See comments below.
+ * Update CPU dirty logging if dirty logging is being toggled. This
+ * applies to all operations.
*/
- if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
- return;
+ if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
+ kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
/*
- * Dirty logging tracks sptes in 4k granularity, meaning that large
- * sptes have to be split. If live migration is successful, the guest
- * in the source machine will be destroyed and large sptes will be
- * created in the destination. However, if the guest continues to run
- * in the source machine (for example if live migration fails), small
- * sptes will remain around and cause bad performance.
+ * Nothing more to do for RO slots (which can't be dirtied and can't be
+ * made writable) or CREATE/MOVE/DELETE of a slot.
*
- * Scan sptes if dirty logging has been stopped, dropping those
- * which can be collapsed into a single large-page spte. Later
- * page faults will create the large-page sptes.
- *
- * There is no need to do this in any of the following cases:
+ * For a memslot with dirty logging disabled:
* CREATE: No dirty mappings will already exist.
* MOVE/DELETE: The old mappings will already have been cleaned up by
* kvm_arch_flush_shadow_memslot()
+ *
+ * For a memslot with dirty logging enabled:
+ * CREATE: No shadow pages exist, thus nothing to write-protect
+ * and no dirty bits to clear.
+ * MOVE/DELETE: The old mappings will already have been cleaned up by
+ * kvm_arch_flush_shadow_memslot().
*/
- if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
- !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
- kvm_mmu_zap_collapsible_sptes(kvm, new);
+ if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
+ return;
/*
- * Enable or disable dirty logging for the slot.
- *
- * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of the old
- * slot have been zapped so no dirty logging updates are needed for
- * the old slot.
- * For KVM_MR_CREATE and KVM_MR_MOVE, once the new slot is visible
- * any mappings that might be created in it will consume the
- * properties of the new slot and do not need to be updated here.
- *
- * When PML is enabled, the kvm_x86_ops dirty logging hooks are
- * called to enable/disable dirty logging.
- *
- * When disabling dirty logging with PML enabled, the D-bit is set
- * for sptes in the slot in order to prevent unnecessary GPA
- * logging in the PML buffer (and potential PML buffer full VMEXIT).
- * This guarantees leaving PML enabled for the guest's lifetime
- * won't have any additional overhead from PML when the guest is
- * running with dirty logging disabled.
- *
- * When enabling dirty logging, large sptes are write-protected
- * so they can be split on first write. New large sptes cannot
- * be created for this slot until the end of the logging.
- * See the comments in fast_page_fault().
- * For small sptes, nothing is done if the dirty log is in the
- * initial-all-set state. Otherwise, depending on whether pml
- * is enabled the D-bit or the W-bit will be cleared.
+ * READONLY and non-flags changes were filtered out above, and the only
+ * other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty
+ * logging isn't being toggled on or off.
*/
- if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
- if (kvm_x86_ops.slot_enable_log_dirty) {
- kvm_x86_ops.slot_enable_log_dirty(kvm, new);
- } else {
- int level =
- kvm_dirty_log_manual_protect_and_init_set(kvm) ?
- PG_LEVEL_2M : PG_LEVEL_4K;
+ if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
+ return;
+
+ if (!log_dirty_pages) {
+ /*
+ * Dirty logging tracks sptes in 4k granularity, meaning that
+ * large sptes have to be split. If live migration succeeds,
+ * the guest in the source machine will be destroyed and large
+ * sptes will be created in the destination. However, if the
+ * guest continues to run in the source machine (for example if
+ * live migration fails), small sptes will remain around and
+ * cause bad performance.
+ *
+ * Scan sptes if dirty logging has been stopped, dropping those
+ * which can be collapsed into a single large-page spte. Later
+ * page faults will create the large-page sptes.
+ */
+ kvm_mmu_zap_collapsible_sptes(kvm, new);
+ } else {
+ /* By default, write-protect everything to log writes. */
+ int level = PG_LEVEL_4K;
+ if (kvm_x86_ops.cpu_dirty_log_size) {
+ /*
+ * Clear all dirty bits, unless pages are treated as
+ * dirty from the get-go.
+ */
+ if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
+ kvm_mmu_slot_leaf_clear_dirty(kvm, new);
+
+ /*
+ * Write-protect large pages on write so that dirty
+ * logging happens at 4k granularity. No need to
+ * write-protect small SPTEs since write accesses are
+ * logged by the CPU via dirty bits.
+ */
+ level = PG_LEVEL_2M;
+ } else if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
/*
* If we're with initial-all-set, we don't need
* to write protect any small page because
@@ -10757,11 +10877,9 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* so that the page split can happen lazily on
* the first write to the huge page.
*/
- kvm_mmu_slot_remove_write_access(kvm, new, level);
+ level = PG_LEVEL_2M;
}
- } else {
- if (kvm_x86_ops.slot_disable_log_dirty)
- kvm_x86_ops.slot_disable_log_dirty(kvm, new);
+ kvm_mmu_slot_remove_write_access(kvm, new, level);
}
}
@@ -10800,7 +10918,7 @@ static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
{
return (is_guest_mode(vcpu) &&
kvm_x86_ops.guest_apic_has_interrupt &&
- kvm_x86_ops.guest_apic_has_interrupt(vcpu));
+ static_call(kvm_x86_guest_apic_has_interrupt)(vcpu));
}
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
@@ -10819,12 +10937,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
(vcpu->arch.nmi_pending &&
- kvm_x86_ops.nmi_allowed(vcpu, false)))
+ static_call(kvm_x86_nmi_allowed)(vcpu, false)))
return true;
if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
(vcpu->arch.smi_pending &&
- kvm_x86_ops.smi_allowed(vcpu, false)))
+ static_call(kvm_x86_smi_allowed)(vcpu, false)))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -10858,7 +10976,7 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
kvm_test_request(KVM_REQ_EVENT, vcpu))
return true;
- if (vcpu->arch.apicv_active && kvm_x86_ops.dy_apicv_has_pending_interrupt(vcpu))
+ if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
return true;
return false;
@@ -10876,7 +10994,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
{
- return kvm_x86_ops.interrupt_allowed(vcpu, false);
+ return static_call(kvm_x86_interrupt_allowed)(vcpu, false);
}
unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
@@ -10902,7 +11020,7 @@ unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
{
unsigned long rflags;
- rflags = kvm_x86_ops.get_rflags(vcpu);
+ rflags = static_call(kvm_x86_get_rflags)(vcpu);
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
rflags &= ~X86_EFLAGS_TF;
return rflags;
@@ -10914,7 +11032,7 @@ static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
kvm_is_linear_rip(vcpu, vcpu->arch.singlestep_rip))
rflags |= X86_EFLAGS_TF;
- kvm_x86_ops.set_rflags(vcpu, rflags);
+ static_call(kvm_x86_set_rflags)(vcpu, rflags);
}
void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
@@ -11044,7 +11162,7 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
return false;
if (!kvm_pv_async_pf_enabled(vcpu) ||
- (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0))
+ (vcpu->arch.apf.send_user_only && static_call(kvm_x86_get_cpl)(vcpu) == 0))
return false;
return true;
@@ -11189,7 +11307,7 @@ int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
irqfd->producer = prod;
kvm_arch_start_assignment(irqfd->kvm);
- ret = kvm_x86_ops.update_pi_irte(irqfd->kvm,
+ ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm,
prod->irq, irqfd->gsi, 1);
if (ret)
@@ -11214,7 +11332,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
* when the irq is masked/disabled or the consumer side (KVM
* int this case doesn't want to receive the interrupts.
*/
- ret = kvm_x86_ops.update_pi_irte(irqfd->kvm, prod->irq, irqfd->gsi, 0);
+ ret = static_call(kvm_x86_update_pi_irte)(irqfd->kvm, prod->irq, irqfd->gsi, 0);
if (ret)
printk(KERN_INFO "irq bypass consumer (token %p) unregistration"
" fails: %d\n", irqfd->consumer.token, ret);
@@ -11225,7 +11343,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set)
{
- return kvm_x86_ops.update_pi_irte(kvm, host_irq, guest_irq, set);
+ return static_call(kvm_x86_update_pi_irte)(kvm, host_irq, guest_irq, set);
}
bool kvm_vector_hashing_enabled(void)