summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Makefile5
-rw-r--r--arch/x86/events/intel/uncore_snbep.c9
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kvm/cpuid.c1
-rw-r--r--arch/x86/kvm/lapic.c20
-rw-r--r--arch/x86/kvm/mmu/mmu.c26
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h14
-rw-r--r--arch/x86/kvm/svm/avic.c6
-rw-r--r--arch/x86/kvm/svm/sev.c26
-rw-r--r--arch/x86/kvm/trace.h6
-rw-r--r--arch/x86/kvm/vmx/vmx.c1
-rw-r--r--arch/x86/kvm/x86.c25
-rw-r--r--arch/x86/pci/fixup.c44
13 files changed, 150 insertions, 37 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 307529417021..cb5e8d39cac1 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -200,8 +200,9 @@ endif
KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
ifdef CONFIG_LTO_CLANG
-KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \
- -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+KBUILD_LDFLAGS += -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
endif
ifdef CONFIG_X86_NEED_RELOCS
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 63f097289a84..3a75a2c601c2 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -1406,6 +1406,8 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
die_id = i;
else
die_id = topology_phys_to_logical_pkg(i);
+ if (die_id < 0)
+ die_id = -ENODEV;
map->pbus_to_dieid[bus] = die_id;
break;
}
@@ -1452,14 +1454,14 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool
i = -1;
if (reverse) {
for (bus = 255; bus >= 0; bus--) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
}
} else {
for (bus = 0; bus <= 255; bus++) {
- if (map->pbus_to_dieid[bus] >= 0)
+ if (map->pbus_to_dieid[bus] != -1)
i = map->pbus_to_dieid[bus];
else
map->pbus_to_dieid[bus] = i;
@@ -5097,9 +5099,10 @@ static struct intel_uncore_type icx_uncore_m2m = {
.perf_ctr = SNR_M2M_PCI_PMON_CTR0,
.event_ctl = SNR_M2M_PCI_PMON_CTL0,
.event_mask = SNBEP_PMON_RAW_EVENT_MASK,
+ .event_mask_ext = SNR_M2M_PCI_PMON_UMASK_EXT,
.box_ctl = SNR_M2M_PCI_PMON_BOX_CTL,
.ops = &snr_m2m_uncore_pci_ops,
- .format_group = &skx_uncore_format_group,
+ .format_group = &snr_m2m_uncore_format_group,
};
static struct attribute *icx_upi_uncore_formats_attr[] = {
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 3ef5868ac588..7aecb2fc3186 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -63,7 +63,7 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BPU_PERFCTR0;
}
- fallthrough;
+ break;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_PERFCTR0;
@@ -96,7 +96,7 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
case 15:
return msr - MSR_P4_BSU_ESCR0;
}
- fallthrough;
+ break;
case X86_VENDOR_ZHAOXIN:
case X86_VENDOR_CENTAUR:
return msr - MSR_ARCH_PERFMON_EVENTSEL0;
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 9a48f138832d..b4da665bb892 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -655,6 +655,7 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
entry->ecx = F(RDPID);
++array->nent;
+ break;
default:
break;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8120e8614b92..17fa4ab1b834 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1410,6 +1410,9 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
+ if (alignment + len > 4)
+ return 1;
+
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
return 1;
@@ -1494,6 +1497,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
static void cancel_hv_timer(struct kvm_lapic *apic);
+static void cancel_apic_timer(struct kvm_lapic *apic)
+{
+ hrtimer_cancel(&apic->lapic_timer.timer);
+ preempt_disable();
+ if (apic->lapic_timer.hv_timer_in_use)
+ cancel_hv_timer(apic);
+ preempt_enable();
+}
+
static void apic_update_lvtt(struct kvm_lapic *apic)
{
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
@@ -1502,11 +1514,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
if (apic->lapic_timer.timer_mode != timer_mode) {
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
APIC_LVT_TIMER_TSCDEADLINE)) {
- hrtimer_cancel(&apic->lapic_timer.timer);
- preempt_disable();
- if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_timer(apic);
- preempt_enable();
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
apic->lapic_timer.period = 0;
apic->lapic_timer.tscdeadline = 0;
@@ -2092,7 +2100,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
if (apic_lvtt_tscdeadline(apic))
break;
- hrtimer_cancel(&apic->lapic_timer.timer);
+ cancel_apic_timer(apic);
kvm_lapic_set_reg(apic, APIC_TMICT, val);
start_apic_timer(apic);
break;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0144c40d09c7..8d5876dfc6b7 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4739,9 +4739,33 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu)
context->inject_page_fault = kvm_inject_page_fault;
}
+static union kvm_mmu_role kvm_calc_nested_mmu_role(struct kvm_vcpu *vcpu)
+{
+ union kvm_mmu_role role = kvm_calc_shadow_root_page_role_common(vcpu, false);
+
+ /*
+ * Nested MMUs are used only for walking L2's gva->gpa, they never have
+ * shadow pages of their own and so "direct" has no meaning. Set it
+ * to "true" to try to detect bogus usage of the nested MMU.
+ */
+ role.base.direct = true;
+
+ if (!is_paging(vcpu))
+ role.base.level = 0;
+ else if (is_long_mode(vcpu))
+ role.base.level = is_la57_mode(vcpu) ? PT64_ROOT_5LEVEL :
+ PT64_ROOT_4LEVEL;
+ else if (is_pae(vcpu))
+ role.base.level = PT32E_ROOT_LEVEL;
+ else
+ role.base.level = PT32_ROOT_LEVEL;
+
+ return role;
+}
+
static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
{
- union kvm_mmu_role new_role = kvm_calc_mmu_role_common(vcpu, false);
+ union kvm_mmu_role new_role = kvm_calc_nested_mmu_role(vcpu);
struct kvm_mmu *g_context = &vcpu->arch.nested_mmu;
if (new_role.as_u64 == g_context->mmu_role.as_u64)
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 70b7e44e3035..823a5919f9fa 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -90,8 +90,8 @@ struct guest_walker {
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
bool pte_writable[PT_MAX_FULL_LEVELS];
- unsigned pt_access;
- unsigned pte_access;
+ unsigned int pt_access[PT_MAX_FULL_LEVELS];
+ unsigned int pte_access;
gfn_t gfn;
struct x86_exception fault;
};
@@ -418,13 +418,15 @@ retry_walk:
}
walker->ptes[walker->level - 1] = pte;
+
+ /* Convert to ACC_*_MASK flags for struct guest_walker. */
+ walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
} while (!is_last_gpte(mmu, walker->level, pte));
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
/* Convert to ACC_*_MASK flags for struct guest_walker. */
- walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
if (unlikely(errcode))
@@ -463,7 +465,8 @@ retry_walk:
}
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
- __func__, (u64)pte, walker->pte_access, walker->pt_access);
+ __func__, (u64)pte, walker->pte_access,
+ walker->pt_access[walker->level - 1]);
return 1;
error:
@@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
- unsigned direct_access, access = gw->pt_access;
+ unsigned int direct_access, access;
int top_level, level, req_level, ret;
gfn_t base_gfn = gw->gfn;
@@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
sp = NULL;
if (!is_shadow_present_pte(*it.sptep)) {
table_gfn = gw->table_gfn[it.level - 2];
+ access = gw->pt_access[it.level - 2];
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
false, access);
}
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 0e62e6a2438c..5e7e920113f3 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -221,7 +221,7 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu,
return &avic_physical_id_table[index];
}
-/**
+/*
* Note:
* AVIC hardware walks the nested page table to check permissions,
* but does not use the SPA address specified in the leaf page
@@ -764,7 +764,7 @@ out:
return ret;
}
-/**
+/*
* Note:
* The HW cannot support posting multicast/broadcast
* interrupts to a vCPU. So, we still use legacy interrupt
@@ -1005,7 +1005,7 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu)
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-/**
+/*
* This function is called during VCPU halt/unhalt.
*/
static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 5bc887e9a986..8d36f0c73071 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -199,9 +199,19 @@ static void sev_asid_free(struct kvm_sev_info *sev)
sev->misc_cg = NULL;
}
-static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+static void sev_decommission(unsigned int handle)
{
struct sev_data_decommission decommission;
+
+ if (!handle)
+ return;
+
+ decommission.handle = handle;
+ sev_guest_decommission(&decommission, NULL);
+}
+
+static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
+{
struct sev_data_deactivate deactivate;
if (!handle)
@@ -214,9 +224,7 @@ static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
sev_guest_deactivate(&deactivate, NULL);
up_read(&sev_deactivate_lock);
- /* decommission handle */
- decommission.handle = handle;
- sev_guest_decommission(&decommission, NULL);
+ sev_decommission(handle);
}
static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
@@ -341,8 +349,10 @@ static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
/* Bind ASID to this guest */
ret = sev_bind_asid(kvm, start.handle, error);
- if (ret)
+ if (ret) {
+ sev_decommission(start.handle);
goto e_free_session;
+ }
/* return handle to userspace */
params.handle = start.handle;
@@ -1103,10 +1113,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct sev_data_send_start data;
int ret;
+ memset(&data, 0, sizeof(data));
data.handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
- if (ret < 0)
- return ret;
params->session_len = data.session_len;
if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
@@ -1215,10 +1224,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
struct sev_data_send_update_data data;
int ret;
+ memset(&data, 0, sizeof(data));
data.handle = sev->handle;
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
- if (ret < 0)
- return ret;
params->hdr_len = data.hdr_len;
params->trans_len = data.trans_len;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a61c015870e3..4f839148948b 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
TP_ARGS(msg, err),
TP_STRUCT__entry(
- __field(const char *, msg)
+ __string(msg, msg)
__field(u32, err)
),
TP_fast_assign(
- __entry->msg = msg;
+ __assign_str(msg, msg);
__entry->err = err;
),
- TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
+ TP_printk("%s%s", __get_str(msg), !__entry->err ? "" :
__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 50b42d7a8a11..c2a779b688e6 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6247,6 +6247,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
switch (kvm_get_apic_mode(vcpu)) {
case LAPIC_MODE_INVALID:
WARN_ONCE(true, "Invalid local APIC state");
+ break;
case LAPIC_MODE_DISABLED:
break;
case LAPIC_MODE_XAPIC:
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b594275d49b5..e0f4a46649d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3072,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
{
++vcpu->stat.tlb_flush;
+
+ if (!tdp_enabled) {
+ /*
+ * A TLB flush on behalf of the guest is equivalent to
+ * INVPCID(all), toggling CR4.PGE, etc., which requires
+ * a forced sync of the shadow page tables. Unload the
+ * entire MMU here and the subsequent load will sync the
+ * shadow page tables, and also flush the TLB.
+ */
+ kvm_mmu_unload(vcpu);
+ return;
+ }
+
static_call(kvm_x86_tlb_flush_guest)(vcpu);
}
@@ -3101,9 +3114,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
* expensive IPIs.
*/
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
+ u8 st_preempted = xchg(&st->preempted, 0);
+
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
- st->preempted & KVM_VCPU_FLUSH_TLB);
- if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
+ st_preempted & KVM_VCPU_FLUSH_TLB);
+ if (st_preempted & KVM_VCPU_FLUSH_TLB)
kvm_vcpu_flush_tlb_guest(vcpu);
} else {
st->preempted = 0;
@@ -7091,7 +7106,10 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
{
- emul_to_vcpu(ctxt)->arch.hflags = emul_flags;
+ struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+
+ vcpu->arch.hflags = emul_flags;
+ kvm_mmu_reset_context(vcpu);
}
static int emulator_pre_leave_smm(struct x86_emulate_ctxt *ctxt,
@@ -8243,6 +8261,7 @@ void kvm_arch_exit(void)
kvm_x86_ops.hardware_enable = NULL;
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
+ kmem_cache_destroy(x86_emulator_cache);
kmem_cache_destroy(x86_fpu_cache);
#ifdef CONFIG_KVM_XEN
static_key_deferred_flush(&kvm_xen_enabled);
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 02dc64625e64..2edd86649468 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -779,4 +779,48 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1571, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x15b1, pci_amd_enable_64bit_bar);
DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AMD, 0x1601, pci_amd_enable_64bit_bar);
+#define RS690_LOWER_TOP_OF_DRAM2 0x30
+#define RS690_LOWER_TOP_OF_DRAM2_VALID 0x1
+#define RS690_UPPER_TOP_OF_DRAM2 0x31
+#define RS690_HTIU_NB_INDEX 0xA8
+#define RS690_HTIU_NB_INDEX_WR_ENABLE 0x100
+#define RS690_HTIU_NB_DATA 0xAC
+
+/*
+ * Some BIOS implementations support RAM above 4GB, but do not configure the
+ * PCI host to respond to bus master accesses for these addresses. These
+ * implementations set the TOP_OF_DRAM_SLOT1 register correctly, so PCI DMA
+ * works as expected for addresses below 4GB.
+ *
+ * Reference: "AMD RS690 ASIC Family Register Reference Guide" (pg. 2-57)
+ * https://www.amd.com/system/files/TechDocs/43372_rs690_rrg_3.00o.pdf
+ */
+static void rs690_fix_64bit_dma(struct pci_dev *pdev)
+{
+ u32 val = 0;
+ phys_addr_t top_of_dram = __pa(high_memory - 1) + 1;
+
+ if (top_of_dram <= (1ULL << 32))
+ return;
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2);
+ pci_read_config_dword(pdev, RS690_HTIU_NB_DATA, &val);
+
+ if (val)
+ return;
+
+ pci_info(pdev, "Adjusting top of DRAM to %pa for 64-bit DMA support\n", &top_of_dram);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_UPPER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA, top_of_dram >> 32);
+
+ pci_write_config_dword(pdev, RS690_HTIU_NB_INDEX,
+ RS690_LOWER_TOP_OF_DRAM2 | RS690_HTIU_NB_INDEX_WR_ENABLE);
+ pci_write_config_dword(pdev, RS690_HTIU_NB_DATA,
+ top_of_dram | RS690_LOWER_TOP_OF_DRAM2_VALID);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7910, rs690_fix_64bit_dma);
+
#endif