summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/virt/kvm/x86/errata.rst11
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h52
-rw-r--r--arch/x86/kvm/lapic.c319
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/svm/avic.c370
-rw-r--r--arch/x86/kvm/svm/nested.c2
-rw-r--r--arch/x86/kvm/svm/svm.c8
-rw-r--r--arch/x86/kvm/svm/svm.h27
-rw-r--r--arch/x86/kvm/vmx/vmx.c58
-rw-r--r--arch/x86/kvm/x86.c29
11 files changed, 523 insertions, 356 deletions
diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst
index 410e0aa63493..49a05f24747b 100644
--- a/Documentation/virt/kvm/x86/errata.rst
+++ b/Documentation/virt/kvm/x86/errata.rst
@@ -37,3 +37,14 @@ Nested virtualization features
------------------------------
TBD
+
+x2APIC
+------
+When KVM_X2APIC_API_USE_32BIT_IDS is enabled, KVM activates a hack/quirk that
+allows sending events to a single vCPU using its x2APIC ID even if the target
+vCPU has legacy xAPIC enabled, e.g. to bring up hotplugged vCPUs via INIT-SIPI
+on VMs with > 255 vCPUs. A side effect of the quirk is that, if multiple vCPUs
+have the same physical APIC ID, KVM will deliver events targeting that APIC ID
+only to the vCPU with the lowest vCPU ID. If KVM_X2APIC_API_USE_32BIT_IDS is
+not enabled, KVM follows x86 architecture when processing interrupts (all vCPUs
+matching the target APIC ID receive the interrupt).
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index dba2909e5ae2..8dc345cc6318 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -77,7 +77,6 @@ KVM_X86_OP(set_nmi_mask)
KVM_X86_OP(enable_nmi_window)
KVM_X86_OP(enable_irq_window)
KVM_X86_OP_OPTIONAL(update_cr8_intercept)
-KVM_X86_OP(check_apicv_inhibit_reasons)
KVM_X86_OP(refresh_apicv_exec_ctrl)
KVM_X86_OP_OPTIONAL(hwapic_irr_update)
KVM_X86_OP_OPTIONAL(hwapic_isr_update)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8d0a0a7c34fc..4d2bc08794e4 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1022,19 +1022,30 @@ struct kvm_arch_memory_slot {
};
/*
- * We use as the mode the number of bits allocated in the LDR for the
- * logical processor ID. It happens that these are all powers of two.
- * This makes it is very easy to detect cases where the APICs are
- * configured for multiple modes; in that case, we cannot use the map and
- * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ * Track the mode of the optimized logical map, as the rules for decoding the
+ * destination vary per mode. Enabling the optimized logical map requires all
+ * software-enabled local APIs to be in the same mode, each addressable APIC to
+ * be mapped to only one MDA, and each MDA to map to at most one APIC.
*/
-#define KVM_APIC_MODE_XAPIC_CLUSTER 4
-#define KVM_APIC_MODE_XAPIC_FLAT 8
-#define KVM_APIC_MODE_X2APIC 16
+enum kvm_apic_logical_mode {
+ /* All local APICs are software disabled. */
+ KVM_APIC_MODE_SW_DISABLED,
+ /* All software enabled local APICs in xAPIC cluster addressing mode. */
+ KVM_APIC_MODE_XAPIC_CLUSTER,
+ /* All software enabled local APICs in xAPIC flat addressing mode. */
+ KVM_APIC_MODE_XAPIC_FLAT,
+ /* All software enabled local APICs in x2APIC mode. */
+ KVM_APIC_MODE_X2APIC,
+ /*
+ * Optimized map disabled, e.g. not all local APICs in the same logical
+ * mode, same logical ID assigned to multiple APICs, etc.
+ */
+ KVM_APIC_MODE_MAP_DISABLED,
+};
struct kvm_apic_map {
struct rcu_head rcu;
- u8 mode;
+ enum kvm_apic_logical_mode logical_mode;
u32 max_apic_id;
union {
struct kvm_lapic *xapic_flat_map[8];
@@ -1165,6 +1176,12 @@ enum kvm_apicv_inhibit {
APICV_INHIBIT_REASON_BLOCKIRQ,
/*
+ * APICv is disabled because not all vCPUs have a 1:1 mapping between
+ * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack.
+ */
+ APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED,
+
+ /*
* For simplicity, the APIC acceleration is inhibited
* first time either APIC ID or APIC base are changed by the guest
* from their reset values.
@@ -1202,6 +1219,12 @@ enum kvm_apicv_inhibit {
* AVIC is disabled because SEV doesn't support it.
*/
APICV_INHIBIT_REASON_SEV,
+
+ /*
+ * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1
+ * mapping between logical ID and vCPU.
+ */
+ APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
};
struct kvm_arch {
@@ -1250,10 +1273,11 @@ struct kvm_arch {
struct kvm_apic_map __rcu *apic_map;
atomic_t apic_map_dirty;
- /* Protects apic_access_memslot_enabled and apicv_inhibit_reasons */
- struct rw_semaphore apicv_update_lock;
-
bool apic_access_memslot_enabled;
+ bool apic_access_memslot_inhibited;
+
+ /* Protects apicv_inhibit_reasons */
+ struct rw_semaphore apicv_update_lock;
unsigned long apicv_inhibit_reasons;
gpa_t wall_clock;
@@ -1602,6 +1626,8 @@ struct kvm_x86_ops {
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
+ const unsigned long required_apicv_inhibits;
+ bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(int isr);
@@ -1976,7 +2002,7 @@ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
bool kvm_apicv_activated(struct kvm *kvm);
bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
-void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
+void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
enum kvm_apicv_inhibit reason, bool set);
void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index cfaf1d8c64ca..7cf4eebc9bcc 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -167,9 +167,19 @@ static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
}
+static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
+{
+ return ((id >> 4) << 16) | (1 << (id & 0xf));
+}
+
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
- switch (map->mode) {
+ switch (map->logical_mode) {
+ case KVM_APIC_MODE_SW_DISABLED:
+ /* Arbitrarily use the flat map so that @cluster isn't NULL. */
+ *cluster = map->xapic_flat_map;
+ *mask = 0;
+ return true;
case KVM_APIC_MODE_X2APIC: {
u32 offset = (dest_id >> 16) * 16;
u32 max_apic_id = map->max_apic_id;
@@ -194,8 +204,10 @@ static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
*cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
*mask = dest_id & 0xf;
return true;
+ case KVM_APIC_MODE_MAP_DISABLED:
+ return false;
default:
- /* Not optimized. */
+ WARN_ON_ONCE(1);
return false;
}
}
@@ -207,6 +219,134 @@ static void kvm_apic_map_free(struct rcu_head *rcu)
kvfree(map);
}
+static int kvm_recalculate_phys_map(struct kvm_apic_map *new,
+ struct kvm_vcpu *vcpu,
+ bool *xapic_id_mismatch)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ u32 x2apic_id = kvm_x2apic_id(apic);
+ u32 xapic_id = kvm_xapic_id(apic);
+ u32 physical_id;
+
+ /*
+ * Deliberately truncate the vCPU ID when detecting a mismatched APIC
+ * ID to avoid false positives if the vCPU ID, i.e. x2APIC ID, is a
+ * 32-bit value. Any unwanted aliasing due to truncation results will
+ * be detected below.
+ */
+ if (!apic_x2apic_mode(apic) && xapic_id != (u8)vcpu->vcpu_id)
+ *xapic_id_mismatch = true;
+
+ /*
+ * Apply KVM's hotplug hack if userspace has enable 32-bit APIC IDs.
+ * Allow sending events to vCPUs by their x2APIC ID even if the target
+ * vCPU is in legacy xAPIC mode, and silently ignore aliased xAPIC IDs
+ * (the x2APIC ID is truncated to 8 bits, causing IDs > 0xff to wrap
+ * and collide).
+ *
+ * Honor the architectural (and KVM's non-optimized) behavior if
+ * userspace has not enabled 32-bit x2APIC IDs. Each APIC is supposed
+ * to process messages independently. If multiple vCPUs have the same
+ * effective APIC ID, e.g. due to the x2APIC wrap or because the guest
+ * manually modified its xAPIC IDs, events targeting that ID are
+ * supposed to be recognized by all vCPUs with said ID.
+ */
+ if (vcpu->kvm->arch.x2apic_format) {
+ /* See also kvm_apic_match_physical_addr(). */
+ if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
+ x2apic_id <= new->max_apic_id)
+ new->phys_map[x2apic_id] = apic;
+
+ if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
+ new->phys_map[xapic_id] = apic;
+ } else {
+ /*
+ * Disable the optimized map if the physical APIC ID is already
+ * mapped, i.e. is aliased to multiple vCPUs. The optimized
+ * map requires a strict 1:1 mapping between IDs and vCPUs.
+ */
+ if (apic_x2apic_mode(apic))
+ physical_id = x2apic_id;
+ else
+ physical_id = xapic_id;
+
+ if (new->phys_map[physical_id])
+ return -EINVAL;
+
+ new->phys_map[physical_id] = apic;
+ }
+
+ return 0;
+}
+
+static void kvm_recalculate_logical_map(struct kvm_apic_map *new,
+ struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+ enum kvm_apic_logical_mode logical_mode;
+ struct kvm_lapic **cluster;
+ u16 mask;
+ u32 ldr;
+
+ if (new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
+ return;
+
+ if (!kvm_apic_sw_enabled(apic))
+ return;
+
+ ldr = kvm_lapic_get_reg(apic, APIC_LDR);
+ if (!ldr)
+ return;
+
+ if (apic_x2apic_mode(apic)) {
+ logical_mode = KVM_APIC_MODE_X2APIC;
+ } else {
+ ldr = GET_APIC_LOGICAL_ID(ldr);
+ if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+ logical_mode = KVM_APIC_MODE_XAPIC_FLAT;
+ else
+ logical_mode = KVM_APIC_MODE_XAPIC_CLUSTER;
+ }
+
+ /*
+ * To optimize logical mode delivery, all software-enabled APICs must
+ * be configured for the same mode.
+ */
+ if (new->logical_mode == KVM_APIC_MODE_SW_DISABLED) {
+ new->logical_mode = logical_mode;
+ } else if (new->logical_mode != logical_mode) {
+ new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
+ return;
+ }
+
+ /*
+ * In x2APIC mode, the LDR is read-only and derived directly from the
+ * x2APIC ID, thus is guaranteed to be addressable. KVM reuses
+ * kvm_apic_map.phys_map to optimize logical mode x2APIC interrupts by
+ * reversing the LDR calculation to get cluster of APICs, i.e. no
+ * additional work is required.
+ */
+ if (apic_x2apic_mode(apic)) {
+ WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic)));
+ return;
+ }
+
+ if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr,
+ &cluster, &mask))) {
+ new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
+ return;
+ }
+
+ if (!mask)
+ return;
+
+ ldr = ffs(mask) - 1;
+ if (!is_power_of_2(mask) || cluster[ldr])
+ new->logical_mode = KVM_APIC_MODE_MAP_DISABLED;
+ else
+ cluster[ldr] = apic;
+}
+
/*
* CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
*
@@ -225,6 +365,7 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
struct kvm_vcpu *vcpu;
unsigned long i;
u32 max_id = 255; /* enough space for any xAPIC ID */
+ bool xapic_id_mismatch = false;
/* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map. */
if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
@@ -257,54 +398,41 @@ void kvm_recalculate_apic_map(struct kvm *kvm)
goto out;
new->max_apic_id = max_id;
+ new->logical_mode = KVM_APIC_MODE_SW_DISABLED;
kvm_for_each_vcpu(i, vcpu, kvm) {
- struct kvm_lapic *apic = vcpu->arch.apic;
- struct kvm_lapic **cluster;
- u16 mask;
- u32 ldr;
- u8 xapic_id;
- u32 x2apic_id;
-
if (!kvm_apic_present(vcpu))
continue;
- xapic_id = kvm_xapic_id(apic);
- x2apic_id = kvm_x2apic_id(apic);
-
- /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
- if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
- x2apic_id <= new->max_apic_id)
- new->phys_map[x2apic_id] = apic;
- /*
- * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
- * prevent them from masking VCPUs with APIC ID <= 0xff.
- */
- if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
- new->phys_map[xapic_id] = apic;
-
- if (!kvm_apic_sw_enabled(apic))
- continue;
-
- ldr = kvm_lapic_get_reg(apic, APIC_LDR);
-
- if (apic_x2apic_mode(apic)) {
- new->mode |= KVM_APIC_MODE_X2APIC;
- } else if (ldr) {
- ldr = GET_APIC_LOGICAL_ID(ldr);
- if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
- new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
- else
- new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+ if (kvm_recalculate_phys_map(new, vcpu, &xapic_id_mismatch)) {
+ kvfree(new);
+ new = NULL;
+ goto out;
}
- if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
- continue;
-
- if (mask)
- cluster[ffs(mask) - 1] = apic;
+ kvm_recalculate_logical_map(new, vcpu);
}
out:
+ /*
+ * The optimized map is effectively KVM's internal version of APICv,
+ * and all unwanted aliasing that results in disabling the optimized
+ * map also applies to APICv.
+ */
+ if (!new)
+ kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
+ else
+ kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED);
+
+ if (!new || new->logical_mode == KVM_APIC_MODE_MAP_DISABLED)
+ kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
+ else
+ kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED);
+
+ if (xapic_id_mismatch)
+ kvm_set_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+ else
+ kvm_clear_apicv_inhibit(kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+
old = rcu_dereference_protected(kvm->arch.apic_map,
lockdep_is_held(&kvm->arch.apic_map_lock));
rcu_assign_pointer(kvm->arch.apic_map, new);
@@ -361,11 +489,6 @@ static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
}
-static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
-{
- return ((id >> 4) << 16) | (1 << (id & 0xf));
-}
-
static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
{
u32 ldr = kvm_apic_calc_x2apic_ldr(id);
@@ -951,7 +1074,7 @@ static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
{
if (kvm->arch.x2apic_broadcast_quirk_disabled) {
if ((irq->dest_id == APIC_BROADCAST &&
- map->mode != KVM_APIC_MODE_X2APIC))
+ map->logical_mode != KVM_APIC_MODE_X2APIC))
return true;
if (irq->dest_id == X2APIC_BROADCAST)
return true;
@@ -2068,19 +2191,6 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
}
}
-static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
-{
- struct kvm *kvm = apic->vcpu->kvm;
-
- if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
- return;
-
- if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
- return;
-
- kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
-}
-
static int get_lvt_index(u32 reg)
{
if (reg == APIC_LVTCMCI)
@@ -2101,7 +2211,6 @@ static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_ID: /* Local APIC ID */
if (!apic_x2apic_mode(apic)) {
kvm_apic_set_xapic_id(apic, val >> 24);
- kvm_lapic_xapic_id_updated(apic);
} else {
ret = 1;
}
@@ -2284,23 +2393,18 @@ void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
struct kvm_lapic *apic = vcpu->arch.apic;
u64 val;
- if (apic_x2apic_mode(apic)) {
- if (KVM_BUG_ON(kvm_lapic_msr_read(apic, offset, &val), vcpu->kvm))
- return;
- } else {
- val = kvm_lapic_get_reg(apic, offset);
- }
-
/*
* ICR is a single 64-bit register when x2APIC is enabled. For legacy
* xAPIC, ICR writes need to go down the common (slightly slower) path
* to get the upper half from ICR2.
*/
if (apic_x2apic_mode(apic) && offset == APIC_ICR) {
+ val = kvm_lapic_get_reg64(apic, APIC_ICR);
kvm_apic_send_ipi(apic, (u32)val, (u32)(val >> 32));
trace_kvm_apic_write(APIC_ICR, val);
} else {
/* TODO: optimize to just emulate side effect w/o one more write */
+ val = kvm_lapic_get_reg(apic, offset);
kvm_lapic_reg_write(apic, offset, (u32)val);
}
}
@@ -2398,7 +2502,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) {
- kvm_vcpu_update_apicv(vcpu);
+ kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
}
@@ -2429,6 +2533,78 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
*/
apic->isr_count = count_vectors(apic->regs + APIC_ISR);
}
+ apic->highest_isr_cache = -1;
+}
+
+int kvm_alloc_apic_access_page(struct kvm *kvm)
+{
+ struct page *page;
+ void __user *hva;
+ int ret = 0;
+
+ mutex_lock(&kvm->slots_lock);
+ if (kvm->arch.apic_access_memslot_enabled ||
+ kvm->arch.apic_access_memslot_inhibited)
+ goto out;
+
+ hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
+ if (IS_ERR(hva)) {
+ ret = PTR_ERR(hva);
+ goto out;
+ }
+
+ page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+ if (is_error_page(page)) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ /*
+ * Do not pin the page in memory, so that memory hot-unplug
+ * is able to migrate it.
+ */
+ put_page(page);
+ kvm->arch.apic_access_memslot_enabled = true;
+out:
+ mutex_unlock(&kvm->slots_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kvm_alloc_apic_access_page);
+
+void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+
+ if (!kvm->arch.apic_access_memslot_enabled)
+ return;
+
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
+ mutex_lock(&kvm->slots_lock);
+
+ if (kvm->arch.apic_access_memslot_enabled) {
+ __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+ /*
+ * Clear "enabled" after the memslot is deleted so that a
+ * different vCPU doesn't get a false negative when checking
+ * the flag out of slots_lock. No additional memory barrier is
+ * needed as modifying memslots requires waiting other vCPUs to
+ * drop SRCU (see above), and false positives are ok as the
+ * flag is rechecked after acquiring slots_lock.
+ */
+ kvm->arch.apic_access_memslot_enabled = false;
+
+ /*
+ * Mark the memslot as inhibited to prevent reallocating the
+ * memslot during vCPU creation, e.g. if a vCPU is hotplugged.
+ */
+ kvm->arch.apic_access_memslot_inhibited = true;
+ }
+
+ mutex_unlock(&kvm->slots_lock);
+
+ kvm_vcpu_srcu_read_lock(vcpu);
}
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
@@ -2484,7 +2660,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
kvm_apic_update_apicv(vcpu);
- apic->highest_isr_cache = -1;
update_divide_count(apic);
atomic_set(&apic->lapic_timer.pending, 0);
@@ -2756,9 +2931,6 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
}
memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
- if (!apic_x2apic_mode(apic))
- kvm_lapic_xapic_id_updated(apic);
-
atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
kvm_recalculate_apic_map(vcpu->kvm);
kvm_apic_set_version(vcpu);
@@ -2772,7 +2944,6 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
__start_apic_timer(apic, APIC_TMCCT);
kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
kvm_apic_update_apicv(vcpu);
- apic->highest_isr_cache = -1;
if (apic->apicv_active) {
static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 58c3242fcc7a..df316ede7546 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -112,6 +112,8 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu);
+int kvm_alloc_apic_access_page(struct kvm *kvm);
+void kvm_inhibit_apic_access_page(struct kvm_vcpu *vcpu);
bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map);
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index f52f5e0dd465..b3928150a37c 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -53,7 +53,7 @@ static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
static u32 next_vm_id = 0;
static bool next_vm_id_wrapped = 0;
static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
-enum avic_modes avic_mode;
+bool x2avic_enabled;
/*
* This is a wrapper of struct amd_iommu_ir_data.
@@ -72,20 +72,25 @@ static void avic_activate_vmcb(struct vcpu_svm *svm)
vmcb->control.int_ctl |= AVIC_ENABLE_MASK;
- /* Note:
- * KVM can support hybrid-AVIC mode, where KVM emulates x2APIC
- * MSR accesses, while interrupt injection to a running vCPU
- * can be achieved using AVIC doorbell. The AVIC hardware still
- * accelerate MMIO accesses, but this does not cause any harm
- * as the guest is not supposed to access xAPIC mmio when uses x2APIC.
+ /*
+ * Note: KVM supports hybrid-AVIC mode, where KVM emulates x2APIC MSR
+ * accesses, while interrupt injection to a running vCPU can be
+ * achieved using AVIC doorbell. KVM disables the APIC access page
+ * (deletes the memslot) if any vCPU has x2APIC enabled, thus enabling
+ * AVIC in hybrid mode activates only the doorbell mechanism.
*/
- if (apic_x2apic_mode(svm->vcpu.arch.apic) &&
- avic_mode == AVIC_MODE_X2) {
+ if (x2avic_enabled && apic_x2apic_mode(svm->vcpu.arch.apic)) {
vmcb->control.int_ctl |= X2APIC_MODE_MASK;
vmcb->control.avic_physical_id |= X2AVIC_MAX_PHYSICAL_ID;
/* Disabling MSR intercept for x2APIC registers */
svm_set_x2apic_msr_interception(svm, false);
} else {
+ /*
+ * Flush the TLB, the guest may have inserted a non-APIC
+ * mapping into the TLB while AVIC was disabled.
+ */
+ kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, &svm->vcpu);
+
/* For xAVIC and hybrid-xAVIC modes */
vmcb->control.avic_physical_id |= AVIC_MAX_PHYSICAL_ID;
/* Enabling MSR intercept for x2APIC registers */
@@ -241,8 +246,8 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
u64 *avic_physical_id_table;
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
- if ((avic_mode == AVIC_MODE_X1 && index > AVIC_MAX_PHYSICAL_ID) ||
- (avic_mode == AVIC_MODE_X2 && index > X2AVIC_MAX_PHYSICAL_ID))
+ if ((!x2avic_enabled && index > AVIC_MAX_PHYSICAL_ID) ||
+ (index > X2AVIC_MAX_PHYSICAL_ID))
return NULL;
avic_physical_id_table = page_address(kvm_svm->avic_physical_id_table_page);
@@ -250,47 +255,14 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
return &avic_physical_id_table[index];
}
-/*
- * Note:
- * AVIC hardware walks the nested page table to check permissions,
- * but does not use the SPA address specified in the leaf page
- * table entry since it uses address in the AVIC_BACKING_PAGE pointer
- * field of the VMCB. Therefore, we set up the
- * APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (4KB) here.
- */
-static int avic_alloc_access_page(struct kvm *kvm)
-{
- void __user *ret;
- int r = 0;
-
- mutex_lock(&kvm->slots_lock);
-
- if (kvm->arch.apic_access_memslot_enabled)
- goto out;
-
- ret = __x86_set_memory_region(kvm,
- APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
- APIC_DEFAULT_PHYS_BASE,
- PAGE_SIZE);
- if (IS_ERR(ret)) {
- r = PTR_ERR(ret);
- goto out;
- }
-
- kvm->arch.apic_access_memslot_enabled = true;
-out:
- mutex_unlock(&kvm->slots_lock);
- return r;
-}
-
static int avic_init_backing_page(struct kvm_vcpu *vcpu)
{
u64 *entry, new_entry;
int id = vcpu->vcpu_id;
struct vcpu_svm *svm = to_svm(vcpu);
- if ((avic_mode == AVIC_MODE_X1 && id > AVIC_MAX_PHYSICAL_ID) ||
- (avic_mode == AVIC_MODE_X2 && id > X2AVIC_MAX_PHYSICAL_ID))
+ if ((!x2avic_enabled && id > AVIC_MAX_PHYSICAL_ID) ||
+ (id > X2AVIC_MAX_PHYSICAL_ID))
return -EINVAL;
if (!vcpu->arch.apic->regs)
@@ -299,7 +271,13 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
if (kvm_apicv_activated(vcpu->kvm)) {
int ret;
- ret = avic_alloc_access_page(vcpu->kvm);
+ /*
+ * Note, AVIC hardware walks the nested page table to check
+ * permissions, but does not use the SPA address specified in
+ * the leaf SPTE since it uses address in the AVIC_BACKING_PAGE
+ * pointer field of the VMCB.
+ */
+ ret = kvm_alloc_apic_access_page(vcpu->kvm);
if (ret)
return ret;
}
@@ -339,6 +317,60 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
put_cpu();
}
+
+static void avic_kick_vcpu(struct kvm_vcpu *vcpu, u32 icrl)
+{
+ vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+}
+
+static void avic_kick_vcpu_by_physical_id(struct kvm *kvm, u32 physical_id,
+ u32 icrl)
+{
+ /*
+ * KVM inhibits AVIC if any vCPU ID diverges from the vCPUs APIC ID,
+ * i.e. APIC ID == vCPU ID.
+ */
+ struct kvm_vcpu *target_vcpu = kvm_get_vcpu_by_id(kvm, physical_id);
+
+ /* Once again, nothing to do if the target vCPU doesn't exist. */
+ if (unlikely(!target_vcpu))
+ return;
+
+ avic_kick_vcpu(target_vcpu, icrl);
+}
+
+static void avic_kick_vcpu_by_logical_id(struct kvm *kvm, u32 *avic_logical_id_table,
+ u32 logid_index, u32 icrl)
+{
+ u32 physical_id;
+
+ if (avic_logical_id_table) {
+ u32 logid_entry = avic_logical_id_table[logid_index];
+
+ /* Nothing to do if the logical destination is invalid. */
+ if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
+ return;
+
+ physical_id = logid_entry &
+ AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+ } else {
+ /*
+ * For x2APIC, the logical APIC ID is a read-only value that is
+ * derived from the x2APIC ID, thus the x2APIC ID can be found
+ * by reversing the calculation (stored in logid_index). Note,
+ * bits 31:20 of the x2APIC ID aren't propagated to the logical
+ * ID, but KVM limits the x2APIC ID limited to KVM_MAX_VCPU_IDS.
+ */
+ physical_id = logid_index;
+ }
+
+ avic_kick_vcpu_by_physical_id(kvm, physical_id, icrl);
+}
+
/*
* A fast-path version of avic_kick_target_vcpus(), which attempts to match
* destination APIC ID to vCPU without looping through all vCPUs.
@@ -346,11 +378,10 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu)
static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh, u32 index)
{
- u32 l1_physical_id, dest;
- struct kvm_vcpu *target_vcpu;
int dest_mode = icrl & APIC_DEST_MASK;
int shorthand = icrl & APIC_SHORT_MASK;
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
+ u32 dest;
if (shorthand != APIC_DEST_NOSHORT)
return -EINVAL;
@@ -367,18 +398,18 @@ static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source
if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
return -EINVAL;
- l1_physical_id = dest;
-
- if (WARN_ON_ONCE(l1_physical_id != index))
+ if (WARN_ON_ONCE(dest != index))
return -EINVAL;
+ avic_kick_vcpu_by_physical_id(kvm, dest, icrl);
} else {
- u32 bitmap, cluster;
- int logid_index;
+ u32 *avic_logical_id_table;
+ unsigned long bitmap, i;
+ u32 cluster;
if (apic_x2apic_mode(source)) {
/* 16 bit dest mask, 16 bit cluster id */
- bitmap = dest & 0xFFFF0000;
+ bitmap = dest & 0xFFFF;
cluster = (dest >> 16) << 4;
} else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
/* 8 bit dest mask*/
@@ -390,67 +421,32 @@ static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source
cluster = (dest >> 4) << 2;
}
+ /* Nothing to do if there are no destinations in the cluster. */
if (unlikely(!bitmap))
- /* guest bug: nobody to send the logical interrupt to */
return 0;
- if (!is_power_of_2(bitmap))
- /* multiple logical destinations, use slow path */
- return -EINVAL;
-
- logid_index = cluster + __ffs(bitmap);
-
- if (!apic_x2apic_mode(source)) {
- u32 *avic_logical_id_table =
- page_address(kvm_svm->avic_logical_id_table_page);
-
- u32 logid_entry = avic_logical_id_table[logid_index];
-
- if (WARN_ON_ONCE(index != logid_index))
- return -EINVAL;
-
- /* guest bug: non existing/reserved logical destination */
- if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
- return 0;
-
- l1_physical_id = logid_entry &
- AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
- } else {
- /*
- * For x2APIC logical mode, cannot leverage the index.
- * Instead, calculate physical ID from logical ID in ICRH.
- */
- int cluster = (icrh & 0xffff0000) >> 16;
- int apic = ffs(icrh & 0xffff) - 1;
-
- /*
- * If the x2APIC logical ID sub-field (i.e. icrh[15:0])
- * contains anything but a single bit, we cannot use the
- * fast path, because it is limited to a single vCPU.
- */
- if (apic < 0 || icrh != (1 << apic))
- return -EINVAL;
+ if (apic_x2apic_mode(source))
+ avic_logical_id_table = NULL;
+ else
+ avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
- l1_physical_id = (cluster << 4) + apic;
- }
+ /*
+ * AVIC is inhibited if vCPUs aren't mapped 1:1 with logical
+ * IDs, thus each bit in the destination is guaranteed to map
+ * to at most one vCPU.
+ */
+ for_each_set_bit(i, &bitmap, 16)
+ avic_kick_vcpu_by_logical_id(kvm, avic_logical_id_table,
+ cluster + i, icrl);
}
- target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
- if (unlikely(!target_vcpu))
- /* guest bug: non existing vCPU is a target of this IPI*/
- return 0;
-
- target_vcpu->arch.apic->irr_pending = true;
- svm_complete_interrupt_delivery(target_vcpu,
- icrl & APIC_MODE_MASK,
- icrl & APIC_INT_LEVELTRIG,
- icrl & APIC_VECTOR_MASK);
return 0;
}
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh, u32 index)
{
+ u32 dest = apic_x2apic_mode(source) ? icrh : GET_XAPIC_DEST_FIELD(icrh);
unsigned long i;
struct kvm_vcpu *vcpu;
@@ -466,21 +462,9 @@ static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
* since entered the guest will have processed pending IRQs at VMRUN.
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
- u32 dest;
-
- if (apic_x2apic_mode(vcpu->arch.apic))
- dest = icrh;
- else
- dest = GET_XAPIC_DEST_FIELD(icrh);
-
if (kvm_apic_match_dest(vcpu, source, icrl & APIC_SHORT_MASK,
- dest, icrl & APIC_DEST_MASK)) {
- vcpu->arch.apic->irr_pending = true;
- svm_complete_interrupt_delivery(vcpu,
- icrl & APIC_MODE_MASK,
- icrl & APIC_INT_LEVELTRIG,
- icrl & APIC_VECTOR_MASK);
- }
+ dest, icrl & APIC_DEST_MASK))
+ avic_kick_vcpu(vcpu, icrl);
}
}
@@ -496,14 +480,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
trace_kvm_avic_incomplete_ipi(vcpu->vcpu_id, icrh, icrl, id, index);
switch (id) {
+ case AVIC_IPI_FAILURE_INVALID_TARGET:
case AVIC_IPI_FAILURE_INVALID_INT_TYPE:
/*
* Emulate IPIs that are not handled by AVIC hardware, which
- * only virtualizes Fixed, Edge-Triggered INTRs. The exit is
- * a trap, e.g. ICR holds the correct value and RIP has been
- * advanced, KVM is responsible only for emulating the IPI.
- * Sadly, hardware may sometimes leave the BUSY flag set, in
- * which case KVM needs to emulate the ICR write as well in
+ * only virtualizes Fixed, Edge-Triggered INTRs, and falls over
+ * if _any_ targets are invalid, e.g. if the logical mode mask
+ * is a superset of running vCPUs.
+ *
+ * The exit is a trap, e.g. ICR holds the correct value and RIP
+ * has been advanced, KVM is responsible only for emulating the
+ * IPI. Sadly, hardware may sometimes leave the BUSY flag set,
+ * in which case KVM needs to emulate the ICR write as well in
* order to clear the BUSY flag.
*/
if (icrl & APIC_ICR_BUSY)
@@ -519,8 +507,6 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu)
*/
avic_kick_target_vcpus(vcpu->kvm, apic, icrl, icrh, index);
break;
- case AVIC_IPI_FAILURE_INVALID_TARGET:
- break;
case AVIC_IPI_FAILURE_INVALID_BACKING_PAGE:
WARN_ONCE(1, "Invalid backing page\n");
break;
@@ -541,33 +527,33 @@ unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu)
static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u32 ldr, bool flat)
{
struct kvm_svm *kvm_svm = to_kvm_svm(vcpu->kvm);
- int index;
u32 *logical_apic_id_table;
- int dlid = GET_APIC_LOGICAL_ID(ldr);
-
- if (!dlid)
- return NULL;
+ u32 cluster, index;
- if (flat) { /* flat */
- index = ffs(dlid) - 1;
- if (index > 7)
- return NULL;
- } else { /* cluster */
- int cluster = (dlid & 0xf0) >> 4;
- int apic = ffs(dlid & 0x0f) - 1;
+ ldr = GET_APIC_LOGICAL_ID(ldr);
- if ((apic < 0) || (apic > 7) ||
- (cluster >= 0xf))
+ if (flat) {
+ cluster = 0;
+ } else {
+ cluster = (ldr >> 4);
+ if (cluster >= 0xf)
return NULL;
- index = (cluster << 2) + apic;
+ ldr &= 0xf;
}
+ if (!ldr || !is_power_of_2(ldr))
+ return NULL;
+
+ index = __ffs(ldr);
+ if (WARN_ON_ONCE(index > 7))
+ return NULL;
+ index += (cluster << 2);
logical_apic_id_table = (u32 *) page_address(kvm_svm->avic_logical_id_table_page);
return &logical_apic_id_table[index];
}
-static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
+static void avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
{
bool flat;
u32 *entry, new_entry;
@@ -575,15 +561,13 @@ static int avic_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, u32 ldr)
flat = kvm_lapic_get_reg(vcpu->arch.apic, APIC_DFR) == APIC_DFR_FLAT;
entry = avic_get_logical_id_entry(vcpu, ldr, flat);
if (!entry)
- return -EINVAL;
+ return;
new_entry = READ_ONCE(*entry);
new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK);
new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK;
WRITE_ONCE(*entry, new_entry);
-
- return 0;
}
static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
@@ -601,29 +585,23 @@ static void avic_invalidate_logical_id_entry(struct kvm_vcpu *vcpu)
clear_bit(AVIC_LOGICAL_ID_ENTRY_VALID_BIT, (unsigned long *)entry);
}
-static int avic_handle_ldr_update(struct kvm_vcpu *vcpu)
+static void avic_handle_ldr_update(struct kvm_vcpu *vcpu)
{
- int ret = 0;
struct vcpu_svm *svm = to_svm(vcpu);
u32 ldr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LDR);
u32 id = kvm_xapic_id(vcpu->arch.apic);
/* AVIC does not support LDR update for x2APIC */
if (apic_x2apic_mode(vcpu->arch.apic))
- return 0;
+ return;
if (ldr == svm->ldr_reg)
- return 0;
+ return;
avic_invalidate_logical_id_entry(vcpu);
- if (ldr)
- ret = avic_ldr_write(vcpu, id, ldr);
-
- if (!ret)
- svm->ldr_reg = ldr;
-
- return ret;
+ svm->ldr_reg = ldr;
+ avic_ldr_write(vcpu, id, ldr);
}
static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
@@ -645,12 +623,14 @@ static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu)
switch (offset) {
case APIC_LDR:
- if (avic_handle_ldr_update(vcpu))
- return 0;
+ avic_handle_ldr_update(vcpu);
break;
case APIC_DFR:
avic_handle_dfr_update(vcpu);
break;
+ case APIC_RRR:
+ /* Ignore writes to Read Remote Data, it's read-only. */
+ return 1;
default:
break;
}
@@ -739,18 +719,6 @@ void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
avic_handle_ldr_update(vcpu);
}
-void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
-{
- if (!lapic_in_kernel(vcpu) || avic_mode == AVIC_MODE_NONE)
- return;
-
- if (kvm_get_apic_mode(vcpu) == LAPIC_MODE_INVALID) {
- WARN_ONCE(true, "Invalid local APIC state (vcpu_id=%d)", vcpu->vcpu_id);
- return;
- }
- avic_refresh_apicv_exec_ctrl(vcpu);
-}
-
static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate)
{
int ret = 0;
@@ -995,23 +963,6 @@ out:
return ret;
}
-bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
-{
- ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
- BIT(APICV_INHIBIT_REASON_ABSENT) |
- BIT(APICV_INHIBIT_REASON_HYPERV) |
- BIT(APICV_INHIBIT_REASON_NESTED) |
- BIT(APICV_INHIBIT_REASON_IRQWIN) |
- BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
- BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
- BIT(APICV_INHIBIT_REASON_SEV) |
- BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
- BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
-
- return supported & BIT(reason);
-}
-
-
static inline int
avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
{
@@ -1064,6 +1015,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
return;
entry = READ_ONCE(*(svm->avic_physical_id_cache));
+ WARN_ON_ONCE(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK);
entry &= ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK;
entry |= (h_physical_id & AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK);
@@ -1092,17 +1044,15 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-
-void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb *vmcb = svm->vmcb01.ptr;
- bool activated = kvm_vcpu_apicv_active(vcpu);
- if (!enable_apicv)
+ if (!lapic_in_kernel(vcpu) || !enable_apicv)
return;
- if (activated) {
+ if (kvm_vcpu_apicv_active(vcpu)) {
/**
* During AVIC temporary deactivation, guest could update
* APIC ID, DFR and LDR registers, which would not be trapped
@@ -1116,6 +1066,16 @@ void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
avic_deactivate_vmcb(svm);
}
vmcb_mark_dirty(vmcb, VMCB_AVIC);
+}
+
+void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+{
+ bool activated = kvm_vcpu_apicv_active(vcpu);
+
+ if (!enable_apicv)
+ return;
+
+ avic_refresh_virtual_apic_mode(vcpu);
if (activated)
avic_vcpu_load(vcpu, vcpu->cpu);
@@ -1165,32 +1125,32 @@ bool avic_hardware_setup(struct kvm_x86_ops *x86_ops)
if (!npt_enabled)
return false;
+ /* AVIC is a prerequisite for x2AVIC. */
+ if (!boot_cpu_has(X86_FEATURE_AVIC) && !force_avic) {
+ if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
+ pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
+ pr_warn(FW_BUG "Try enable AVIC using force_avic option");
+ }
+ return false;
+ }
+
if (boot_cpu_has(X86_FEATURE_AVIC)) {
- avic_mode = AVIC_MODE_X1;
pr_info("AVIC enabled\n");
} else if (force_avic) {
/*
* Some older systems does not advertise AVIC support.
* See Revision Guide for specific AMD processor for more detail.
*/
- avic_mode = AVIC_MODE_X1;
pr_warn("AVIC is not supported in CPUID but force enabled");
pr_warn("Your system might crash and burn");
}
/* AVIC is a prerequisite for x2AVIC. */
- if (boot_cpu_has(X86_FEATURE_X2AVIC)) {
- if (avic_mode == AVIC_MODE_X1) {
- avic_mode = AVIC_MODE_X2;
- pr_info("x2AVIC enabled\n");
- } else {
- pr_warn(FW_BUG "Cannot support x2AVIC due to AVIC is disabled");
- pr_warn(FW_BUG "Try enable AVIC using force_avic option");
- }
- }
+ x2avic_enabled = boot_cpu_has(X86_FEATURE_X2AVIC);
+ if (x2avic_enabled)
+ pr_info("x2AVIC enabled\n");
- if (avic_mode != AVIC_MODE_NONE)
- amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
+ amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier);
- return !!avic_mode;
+ return true;
}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 500da957e590..700df66d23c7 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1104,7 +1104,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
* to benefit from it right away.
*/
if (kvm_apicv_activated(vcpu->kvm))
- kvm_vcpu_update_apicv(vcpu);
+ __kvm_vcpu_update_apicv(vcpu);
return 0;
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 799b24801d31..d13cf53e7390 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -825,7 +825,7 @@ void svm_set_x2apic_msr_interception(struct vcpu_svm *svm, bool intercept)
if (intercept == svm->x2avic_msrs_intercepted)
return;
- if (avic_mode != AVIC_MODE_X2 ||
+ if (!x2avic_enabled ||
!apic_x2apic_mode(svm->vcpu.arch.apic))
return;
@@ -4769,10 +4769,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.enable_nmi_window = svm_enable_nmi_window,
.enable_irq_window = svm_enable_irq_window,
.update_cr8_intercept = svm_update_cr8_intercept,
- .set_virtual_apic_mode = avic_set_virtual_apic_mode,
+ .set_virtual_apic_mode = avic_refresh_virtual_apic_mode,
.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
- .check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
.apicv_post_state_restore = avic_apicv_post_state_restore,
+ .required_apicv_inhibits = AVIC_REQUIRED_APICV_INHIBITS,
.get_exit_info = svm_get_exit_info,
@@ -5026,6 +5026,8 @@ static __init int svm_hardware_setup(void)
svm_x86_ops.vcpu_blocking = NULL;
svm_x86_ops.vcpu_unblocking = NULL;
svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
+ } else if (!x2avic_enabled) {
+ svm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization = true;
}
if (vls) {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 4826e6cc611b..41eabb098b13 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -35,14 +35,7 @@ extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
extern bool npt_enabled;
extern int vgif;
extern bool intercept_smi;
-
-enum avic_modes {
- AVIC_MODE_NONE = 0,
- AVIC_MODE_X1,
- AVIC_MODE_X2,
-};
-
-extern enum avic_modes avic_mode;
+extern bool x2avic_enabled;
/*
* Clean bits in VMCB.
@@ -628,6 +621,21 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
extern struct kvm_x86_nested_ops svm_nested_ops;
/* avic.c */
+#define AVIC_REQUIRED_APICV_INHIBITS \
+( \
+ BIT(APICV_INHIBIT_REASON_DISABLE) | \
+ BIT(APICV_INHIBIT_REASON_ABSENT) | \
+ BIT(APICV_INHIBIT_REASON_HYPERV) | \
+ BIT(APICV_INHIBIT_REASON_NESTED) | \
+ BIT(APICV_INHIBIT_REASON_IRQWIN) | \
+ BIT(APICV_INHIBIT_REASON_PIT_REINJ) | \
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
+ BIT(APICV_INHIBIT_REASON_SEV) | \
+ BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) | \
+ BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED) \
+)
bool avic_hardware_setup(struct kvm_x86_ops *ops);
int avic_ga_log_notifier(u32 ga_tag);
@@ -641,14 +649,13 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
-bool avic_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason);
int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
uint32_t guest_irq, bool set);
void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
void avic_ring_doorbell(struct kvm_vcpu *vcpu);
unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
-void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
+void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
/* sev.c */
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 73005d7e4e43..c788aa382611 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -3904,39 +3904,6 @@ static void seg_setup(int seg)
vmcs_write32(sf->ar_bytes, ar);
}
-static int alloc_apic_access_page(struct kvm *kvm)
-{
- struct page *page;
- void __user *hva;
- int ret = 0;
-
- mutex_lock(&kvm->slots_lock);
- if (kvm->arch.apic_access_memslot_enabled)
- goto out;
- hva = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
- APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
- if (IS_ERR(hva)) {
- ret = PTR_ERR(hva);
- goto out;
- }
-
- page = gfn_to_page(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
- if (is_error_page(page)) {
- ret = -EFAULT;
- goto out;
- }
-
- /*
- * Do not pin the page in memory, so that memory hot-unplug
- * is able to migrate it.
- */
- put_page(page);
- kvm->arch.apic_access_memslot_enabled = true;
-out:
- mutex_unlock(&kvm->slots_lock);
- return ret;
-}
-
int allocate_vpid(void)
{
int vpid;
@@ -7490,7 +7457,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
vmx->loaded_vmcs = &vmx->vmcs01;
if (cpu_need_virtualize_apic_accesses(vcpu)) {
- err = alloc_apic_access_page(vcpu->kvm);
+ err = kvm_alloc_apic_access_page(vcpu->kvm);
if (err)
goto free_vmcs;
}
@@ -8129,17 +8096,16 @@ static void vmx_hardware_unsetup(void)
free_kvm_area();
}
-static bool vmx_check_apicv_inhibit_reasons(enum kvm_apicv_inhibit reason)
-{
- ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
- BIT(APICV_INHIBIT_REASON_ABSENT) |
- BIT(APICV_INHIBIT_REASON_HYPERV) |
- BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
- BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
- BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
-
- return supported & BIT(reason);
-}
+#define VMX_REQUIRED_APICV_INHIBITS \
+( \
+ BIT(APICV_INHIBIT_REASON_DISABLE)| \
+ BIT(APICV_INHIBIT_REASON_ABSENT) | \
+ BIT(APICV_INHIBIT_REASON_HYPERV) | \
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ) | \
+ BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) | \
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) | \
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) \
+)
static void vmx_vm_destroy(struct kvm *kvm)
{
@@ -8225,7 +8191,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
.load_eoi_exitmap = vmx_load_eoi_exitmap,
.apicv_post_state_restore = vmx_apicv_post_state_restore,
- .check_apicv_inhibit_reasons = vmx_check_apicv_inhibit_reasons,
+ .required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,
.guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c3ac88036b52..508074e47bc0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10148,7 +10148,7 @@ void kvm_make_scan_ioapic_request(struct kvm *kvm)
kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
}
-void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
bool activate;
@@ -10183,7 +10183,30 @@ out:
preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
-EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
+EXPORT_SYMBOL_GPL(__kvm_vcpu_update_apicv);
+
+static void kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu)
+{
+ if (!lapic_in_kernel(vcpu))
+ return;
+
+ /*
+ * Due to sharing page tables across vCPUs, the xAPIC memslot must be
+ * deleted if any vCPU has xAPIC virtualization and x2APIC enabled, but
+ * and hardware doesn't support x2APIC virtualization. E.g. some AMD
+ * CPUs support AVIC but not x2APIC. KVM still allows enabling AVIC in
+ * this case so that KVM can the AVIC doorbell to inject interrupts to
+ * running vCPUs, but KVM must not create SPTEs for the APIC base as
+ * the vCPU would incorrectly be able to access the vAPIC page via MMIO
+ * despite being in x2APIC mode. For simplicity, inhibiting the APIC
+ * access page is sticky.
+ */
+ if (apic_x2apic_mode(vcpu->arch.apic) &&
+ kvm_x86_ops.allow_apicv_in_x2apic_without_x2apic_virtualization)
+ kvm_inhibit_apic_access_page(vcpu);
+
+ __kvm_vcpu_update_apicv(vcpu);
+}
void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
enum kvm_apicv_inhibit reason, bool set)
@@ -10192,7 +10215,7 @@ void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
lockdep_assert_held_write(&kvm->arch.apicv_update_lock);
- if (!static_call(kvm_x86_check_apicv_inhibit_reasons)(reason))
+ if (!(kvm_x86_ops.required_apicv_inhibits & BIT(reason)))
return;
old = new = kvm->arch.apicv_inhibit_reasons;