summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2021-01-07 18:06:52 -0500
committerPaolo Bonzini <pbonzini@redhat.com>2021-01-07 18:06:52 -0500
commitbc351f07260533cc1b3987339551decd00ddd52e (patch)
tree3b6f396681f08dacc8dd24ce2c586ea2195e825d /arch/x86
parentd45f89f7437d0f2c8275b4434096164db106384d (diff)
parent9aa418792f5f11ef5d6f72265e1f8ae07efd5784 (diff)
downloadlinux-stable-bc351f07260533cc1b3987339551decd00ddd52e.tar.gz
linux-stable-bc351f07260533cc1b3987339551decd00ddd52e.tar.bz2
linux-stable-bc351f07260533cc1b3987339551decd00ddd52e.zip
Merge branch 'kvm-master' into kvm-next
Fixes to get_mmio_spte, destined to 5.10 stable branch.
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Makefile3
-rw-r--r--arch/x86/boot/compressed/Makefile4
-rw-r--r--arch/x86/boot/compressed/sev-es.c5
-rw-r--r--arch/x86/events/intel/cstate.c6
-rw-r--r--arch/x86/events/intel/ds.c4
-rw-r--r--arch/x86/events/intel/uncore.c4
-rw-r--r--arch/x86/events/intel/uncore.h12
-rw-r--r--arch/x86/events/rapl.c14
-rw-r--r--arch/x86/include/asm/insn.h15
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mwait.h2
-rw-r--r--arch/x86/include/asm/pgtable_types.h1
-rw-r--r--arch/x86/include/asm/sparsemem.h10
-rw-r--r--arch/x86/include/asm/sync_core.h9
-rw-r--r--arch/x86/kernel/apic/vector.c24
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c2
-rw-r--r--arch/x86/kernel/cpu/bugs.c4
-rw-r--r--arch/x86/kernel/cpu/mce/core.c6
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c63
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h3
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c6
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c74
-rw-r--r--arch/x86/kernel/dumpstack.c23
-rw-r--r--arch/x86/kernel/kprobes/opt.c22
-rw-r--r--arch/x86/kernel/process.c12
-rw-r--r--arch/x86/kernel/tboot.c8
-rw-r--r--arch/x86/kernel/uprobes.c10
-rw-r--r--arch/x86/kvm/irq.c85
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c53
-rw-r--r--arch/x86/kvm/mmu/spte.c4
-rw-r--r--arch/x86/kvm/mmu/spte.h25
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c13
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h4
-rw-r--r--arch/x86/kvm/svm/sev.c2
-rw-r--r--arch/x86/kvm/svm/svm.c8
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/lib/insn-eval.c10
-rw-r--r--arch/x86/mm/mem_encrypt_identity.c4
-rw-r--r--arch/x86/mm/numa.c2
-rw-r--r--arch/x86/mm/tlb.c10
-rw-r--r--arch/x86/platform/efi/efi_64.c24
-rw-r--r--arch/x86/xen/spinlock.c12
45 files changed, 331 insertions, 297 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f6946b81f74a..fbf26e0f7a6a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_HUGE_PMD_SHARE
+ select ARCH_WANT_LD_ORPHAN_WARN
select ARCH_WANTS_THP_SWAP if X86_64
select BUILDTIME_TABLE_SORT
select CLKEVT_I8253
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 154259f18b8b..1bf21746f4ce 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -209,9 +209,6 @@ ifdef CONFIG_X86_64
LDFLAGS_vmlinux += -z max-page-size=0x200000
endif
-# We never want expected sections to be placed heuristically by the
-# linker. All sections should be explicitly named in the linker script.
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
archscripts: scripts_basic
$(Q)$(MAKE) $(build)=arch/x86/tools relocs
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index ee249088cbfe..40b8fd375d52 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -61,7 +61,9 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
# Compressed kernel should be built as PIE since it may be loaded at any
# address by the bootloader.
LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
-LDFLAGS_vmlinux += $(call ld-option, --orphan-handling=warn)
+ifdef CONFIG_LD_ORPHAN_WARN
+LDFLAGS_vmlinux += --orphan-handling=warn
+endif
LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c
index 954cb2702e23..27826c265aab 100644
--- a/arch/x86/boot/compressed/sev-es.c
+++ b/arch/x86/boot/compressed/sev-es.c
@@ -32,13 +32,12 @@ struct ghcb *boot_ghcb;
*/
static bool insn_has_rep_prefix(struct insn *insn)
{
+ insn_byte_t p;
int i;
insn_get_prefixes(insn);
- for (i = 0; i < insn->prefixes.nbytes; i++) {
- insn_byte_t p = insn->prefixes.bytes[i];
-
+ for_each_insn_prefix(insn, i, p) {
if (p == 0xf2 || p == 0xf3)
return true;
}
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 442e1ed4acd4..4eb7ee5fed72 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -107,14 +107,14 @@
MODULE_LICENSE("GPL");
#define DEFINE_CSTATE_FORMAT_ATTR(_var, _name, _format) \
-static ssize_t __cstate_##_var##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
+static ssize_t __cstate_##_var##_show(struct device *dev, \
+ struct device_attribute *attr, \
char *page) \
{ \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
-static struct kobj_attribute format_attr_##_var = \
+static struct device_attribute format_attr_##_var = \
__ATTR(_name, 0444, __cstate_##_var##_show, NULL)
static ssize_t cstate_get_attr_cpumask(struct device *dev,
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index b47cc4226934..485c5066f8b8 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1916,7 +1916,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
* that caused the PEBS record. It's called collision.
* If collision happened, the record will be dropped.
*/
- if (p->status != (1ULL << bit)) {
+ if (pebs_status != (1ULL << bit)) {
for_each_set_bit(i, (unsigned long *)&pebs_status, size)
error[i]++;
continue;
@@ -1940,7 +1940,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
if (error[bit]) {
perf_log_lost_samples(event, error[bit]);
- if (perf_event_account_interrupt(event))
+ if (iregs && perf_event_account_interrupt(event))
x86_pmu_stop(event, 0);
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 86d012b3e0b4..80d52cbe2fde 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -94,8 +94,8 @@ end:
return map;
}
-ssize_t uncore_event_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+ssize_t uncore_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct uncore_event_desc *event =
container_of(attr, struct uncore_event_desc, attr);
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 83d2a7d490e0..9efea154349d 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -157,7 +157,7 @@ struct intel_uncore_box {
#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2
struct uncore_event_desc {
- struct kobj_attribute attr;
+ struct device_attribute attr;
const char *config;
};
@@ -179,8 +179,8 @@ struct pci2phy_map {
struct pci2phy_map *__find_pci2phy_map(int segment);
int uncore_pcibus_to_physid(struct pci_bus *bus);
-ssize_t uncore_event_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf);
+ssize_t uncore_event_show(struct device *dev,
+ struct device_attribute *attr, char *buf);
static inline struct intel_uncore_pmu *dev_to_uncore_pmu(struct device *dev)
{
@@ -201,14 +201,14 @@ extern int __uncore_max_dies;
}
#define DEFINE_UNCORE_FORMAT_ATTR(_var, _name, _format) \
-static ssize_t __uncore_##_var##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
+static ssize_t __uncore_##_var##_show(struct device *dev, \
+ struct device_attribute *attr, \
char *page) \
{ \
BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
return sprintf(page, _format "\n"); \
} \
-static struct kobj_attribute format_attr_##_var = \
+static struct device_attribute format_attr_##_var = \
__ATTR(_name, 0444, __uncore_##_var##_show, NULL)
static inline bool uncore_pmc_fixed(int idx)
diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 7c0120e2e957..7dbbeaacd995 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -93,18 +93,6 @@ static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
* any other bit is reserved
*/
#define RAPL_EVENT_MASK 0xFFULL
-
-#define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \
-static ssize_t __rapl_##_var##_show(struct kobject *kobj, \
- struct kobj_attribute *attr, \
- char *page) \
-{ \
- BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
- return sprintf(page, _format "\n"); \
-} \
-static struct kobj_attribute format_attr_##_var = \
- __ATTR(_name, 0444, __rapl_##_var##_show, NULL)
-
#define RAPL_CNTR_WIDTH 32
#define RAPL_EVENT_ATTR_STR(_name, v, str) \
@@ -441,7 +429,7 @@ static struct attribute_group rapl_pmu_events_group = {
.attrs = attrs_empty,
};
-DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7");
+PMU_FORMAT_ATTR(event, "config:0-7");
static struct attribute *rapl_formats_attr[] = {
&format_attr_event.attr,
NULL,
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 5c1ae3eff9d4..a8c3d284fa46 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -201,6 +201,21 @@ static inline int insn_offset_immediate(struct insn *insn)
return insn_offset_displacement(insn) + insn->displacement.nbytes;
}
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx: Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix) \
+ for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
#define POP_SS_OPCODE 0x1f
#define MOV_SREG_OPCODE 0x8e
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 39707e72b062..3ab7b46087b7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1667,6 +1667,7 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index e039a933aca3..29dd27b5a339 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -88,8 +88,6 @@ static inline void __mwaitx(unsigned long eax, unsigned long ebx,
static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
{
- trace_hardirqs_on();
-
mds_idle_clear_cpu_buffers();
/* "mwait %eax, %ecx;" */
asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 816b31c68550..394757ee030a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -155,6 +155,7 @@ enum page_cache_mode {
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
+#define _PAGE_LARGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT_LARGE)
#define _PAGE_NOCACHE (cachemode2protval(_PAGE_CACHE_MODE_UC))
#define _PAGE_CACHE_WP (cachemode2protval(_PAGE_CACHE_MODE_WP))
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 6bfc878f6771..6a9ccc1b2be5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -28,4 +28,14 @@
#endif
#endif /* CONFIG_SPARSEMEM */
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_NUMA_KEEP_MEMINFO
+extern int phys_to_target_node(phys_addr_t start);
+#define phys_to_target_node phys_to_target_node
+extern int memory_add_physaddr_to_nid(u64 start);
+#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
+#endif
+#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index 0fd4a9dfb29c..ab7382f92aff 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -98,12 +98,13 @@ static inline void sync_core_before_usermode(void)
/* With PTI, we unconditionally serialize before running user code. */
if (static_cpu_has(X86_FEATURE_PTI))
return;
+
/*
- * Return from interrupt and NMI is done through iret, which is core
- * serializing.
+ * Even if we're in an interrupt, we might reschedule before returning,
+ * in which case we could switch to a different thread in the same mm
+ * and return using SYSRET or SYSEXIT. Instead of trying to keep
+ * track of our need to sync the core, just sync right away.
*/
- if (in_irq() || in_nmi())
- return;
sync_core();
}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 1eac53632786..758bbf25ef74 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -273,20 +273,24 @@ static int assign_irq_vector_any_locked(struct irq_data *irqd)
const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
int node = irq_data_get_node(irqd);
- if (node == NUMA_NO_NODE)
- goto all;
- /* Try the intersection of @affmsk and node mask */
- cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
- if (!assign_vector_locked(irqd, vector_searchmask))
- return 0;
- /* Try the node mask */
- if (!assign_vector_locked(irqd, cpumask_of_node(node)))
- return 0;
-all:
+ if (node != NUMA_NO_NODE) {
+ /* Try the intersection of @affmsk and node mask */
+ cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
+ if (!assign_vector_locked(irqd, vector_searchmask))
+ return 0;
+ }
+
/* Try the full affinity mask */
cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
if (!assign_vector_locked(irqd, vector_searchmask))
return 0;
+
+ if (node != NUMA_NO_NODE) {
+ /* Try the node mask */
+ if (!assign_vector_locked(irqd, cpumask_of_node(node)))
+ return 0;
+ }
+
/* Try the full online mask */
return assign_vector_locked(irqd, cpu_online_mask);
}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 1b98f8c12b96..235f5cde06fc 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -161,7 +161,7 @@ static int __init early_set_hub_type(void)
/* UV4/4A only have a revision difference */
case UV4_HUB_PART_NUMBER:
uv_min_hub_revision_id = node_id.s.revision
- + UV4_HUB_REVISION_BASE;
+ + UV4_HUB_REVISION_BASE - 1;
uv_hub_type_set(UV4);
if (uv_min_hub_revision_id == UV4A_HUB_REVISION_BASE)
uv_hub_type_set(UV4|UV4A);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 581fb7223ad0..d41b70fe4918 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -739,11 +739,13 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
if (boot_cpu_has(X86_FEATURE_IBPB)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
+ spectre_v2_user_ibpb = mode;
switch (cmd) {
case SPECTRE_V2_USER_CMD_FORCE:
case SPECTRE_V2_USER_CMD_PRCTL_IBPB:
case SPECTRE_V2_USER_CMD_SECCOMP_IBPB:
static_branch_enable(&switch_mm_always_ibpb);
+ spectre_v2_user_ibpb = SPECTRE_V2_USER_STRICT;
break;
case SPECTRE_V2_USER_CMD_PRCTL:
case SPECTRE_V2_USER_CMD_AUTO:
@@ -757,8 +759,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
static_key_enabled(&switch_mm_always_ibpb) ?
"always-on" : "conditional");
-
- spectre_v2_user_ibpb = mode;
}
/*
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 4102b866e7c0..32b7099e3511 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1384,8 +1384,10 @@ noinstr void do_machine_check(struct pt_regs *regs)
* When there's any problem use only local no_way_out state.
*/
if (!lmce) {
- if (mce_end(order) < 0)
- no_way_out = worst >= MCE_PANIC_SEVERITY;
+ if (mce_end(order) < 0) {
+ if (!no_way_out)
+ no_way_out = worst >= MCE_PANIC_SEVERITY;
+ }
} else {
/*
* If there was a fatal machine check we should have
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 6a99535d7f37..7e8e07bddd5f 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -100,53 +100,6 @@ static int has_newer_microcode(void *mc, unsigned int csig, int cpf, int new_rev
return find_matching_signature(mc, csig, cpf);
}
-/*
- * Given CPU signature and a microcode patch, this function finds if the
- * microcode patch has matching family and model with the CPU.
- *
- * %true - if there's a match
- * %false - otherwise
- */
-static bool microcode_matches(struct microcode_header_intel *mc_header,
- unsigned long sig)
-{
- unsigned long total_size = get_totalsize(mc_header);
- unsigned long data_size = get_datasize(mc_header);
- struct extended_sigtable *ext_header;
- unsigned int fam_ucode, model_ucode;
- struct extended_signature *ext_sig;
- unsigned int fam, model;
- int ext_sigcount, i;
-
- fam = x86_family(sig);
- model = x86_model(sig);
-
- fam_ucode = x86_family(mc_header->sig);
- model_ucode = x86_model(mc_header->sig);
-
- if (fam == fam_ucode && model == model_ucode)
- return true;
-
- /* Look for ext. headers: */
- if (total_size <= data_size + MC_HEADER_SIZE)
- return false;
-
- ext_header = (void *) mc_header + data_size + MC_HEADER_SIZE;
- ext_sig = (void *)ext_header + EXT_HEADER_SIZE;
- ext_sigcount = ext_header->count;
-
- for (i = 0; i < ext_sigcount; i++) {
- fam_ucode = x86_family(ext_sig->sig);
- model_ucode = x86_model(ext_sig->sig);
-
- if (fam == fam_ucode && model == model_ucode)
- return true;
-
- ext_sig++;
- }
- return false;
-}
-
static struct ucode_patch *memdup_patch(void *data, unsigned int size)
{
struct ucode_patch *p;
@@ -164,7 +117,7 @@ static struct ucode_patch *memdup_patch(void *data, unsigned int size)
return p;
}
-static void save_microcode_patch(void *data, unsigned int size)
+static void save_microcode_patch(struct ucode_cpu_info *uci, void *data, unsigned int size)
{
struct microcode_header_intel *mc_hdr, *mc_saved_hdr;
struct ucode_patch *iter, *tmp, *p = NULL;
@@ -210,6 +163,9 @@ static void save_microcode_patch(void *data, unsigned int size)
if (!p)
return;
+ if (!find_matching_signature(p->data, uci->cpu_sig.sig, uci->cpu_sig.pf))
+ return;
+
/*
* Save for early loading. On 32-bit, that needs to be a physical
* address as the APs are running from physical addresses, before
@@ -344,13 +300,14 @@ scan_microcode(void *data, size_t size, struct ucode_cpu_info *uci, bool save)
size -= mc_size;
- if (!microcode_matches(mc_header, uci->cpu_sig.sig)) {
+ if (!find_matching_signature(data, uci->cpu_sig.sig,
+ uci->cpu_sig.pf)) {
data += mc_size;
continue;
}
if (save) {
- save_microcode_patch(data, mc_size);
+ save_microcode_patch(uci, data, mc_size);
goto next;
}
@@ -483,14 +440,14 @@ static void show_saved_mc(void)
* Save this microcode patch. It will be loaded early when a CPU is
* hot-added or resumes.
*/
-static void save_mc_for_early(u8 *mc, unsigned int size)
+static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size)
{
/* Synchronization during CPU hotplug. */
static DEFINE_MUTEX(x86_cpu_microcode_mutex);
mutex_lock(&x86_cpu_microcode_mutex);
- save_microcode_patch(mc, size);
+ save_microcode_patch(uci, mc, size);
show_saved_mc();
mutex_unlock(&x86_cpu_microcode_mutex);
@@ -935,7 +892,7 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
* permanent memory. So it will be loaded early when a CPU is hot added
* or resumes.
*/
- save_mc_for_early(new_mc, new_mc_size);
+ save_mc_for_early(uci, new_mc, new_mc_size);
pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n",
cpu, new_rev, uci->cpu_sig.rev);
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index e5f4ee8f4c3b..e8b5f1cf1ae8 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -570,6 +570,8 @@ static void domain_add_cpu(int cpu, struct rdt_resource *r)
if (d) {
cpumask_set_cpu(cpu, &d->cpu_mask);
+ if (r->cache.arch_has_per_cpu_cfg)
+ rdt_domain_reconfigure_cdp(r);
return;
}
@@ -923,6 +925,7 @@ static __init void rdt_init_res_defs_intel(void)
r->rid == RDT_RESOURCE_L2CODE) {
r->cache.arch_has_sparse_bitmaps = false;
r->cache.arch_has_empty_bitmaps = false;
+ r->cache.arch_has_per_cpu_cfg = false;
} else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_THRTL_BASE;
r->msr_update = mba_wrmsr_intel;
@@ -943,6 +946,7 @@ static __init void rdt_init_res_defs_amd(void)
r->rid == RDT_RESOURCE_L2CODE) {
r->cache.arch_has_sparse_bitmaps = true;
r->cache.arch_has_empty_bitmaps = true;
+ r->cache.arch_has_per_cpu_cfg = true;
} else if (r->rid == RDT_RESOURCE_MBA) {
r->msr_base = MSR_IA32_MBA_BW_BASE;
r->msr_update = mba_wrmsr_amd;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 80fa997fae60..f65d3c0dbc41 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -360,6 +360,8 @@ struct msr_param {
* executing entities
* @arch_has_sparse_bitmaps: True if a bitmap like f00f is valid.
* @arch_has_empty_bitmaps: True if the '0' bitmap is valid.
+ * @arch_has_per_cpu_cfg: True if QOS_CFG register for this cache
+ * level has CPU scope.
*/
struct rdt_cache {
unsigned int cbm_len;
@@ -369,6 +371,7 @@ struct rdt_cache {
unsigned int shareable_bits;
bool arch_has_sparse_bitmaps;
bool arch_has_empty_bitmaps;
+ bool arch_has_per_cpu_cfg;
};
/**
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index 54dffe574e67..a98519a3a2e6 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -279,7 +279,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
return;
chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width);
- m->chunks += chunks;
cur_bw = (chunks * r->mon_scale) >> 20;
if (m->delta_comp)
@@ -450,15 +449,14 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
+ __mon_event_count(rmid, &rr);
/*
* Call the MBA software controller only for the
* control groups and when user has enabled
* the software controller explicitly.
*/
- if (!is_mba_sc(NULL))
- __mon_event_count(rmid, &rr);
- else
+ if (is_mba_sc(NULL))
mbm_bw_count(rmid, &rr);
}
}
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index af323e2e3100..f3418428682b 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -507,6 +507,24 @@ unlock:
return ret ?: nbytes;
}
+/**
+ * rdtgroup_remove - the helper to remove resource group safely
+ * @rdtgrp: resource group to remove
+ *
+ * On resource group creation via a mkdir, an extra kernfs_node reference is
+ * taken to ensure that the rdtgroup structure remains accessible for the
+ * rdtgroup_kn_unlock() calls where it is removed.
+ *
+ * Drop the extra reference here, then free the rdtgroup structure.
+ *
+ * Return: void
+ */
+static void rdtgroup_remove(struct rdtgroup *rdtgrp)
+{
+ kernfs_put(rdtgrp->kn);
+ kfree(rdtgrp);
+}
+
struct task_move_callback {
struct callback_head work;
struct rdtgroup *rdtgrp;
@@ -529,7 +547,7 @@ static void move_myself(struct callback_head *head)
(rdtgrp->flags & RDT_DELETED)) {
current->closid = 0;
current->rmid = 0;
- kfree(rdtgrp);
+ rdtgroup_remove(rdtgrp);
}
if (unlikely(current->flags & PF_EXITING))
@@ -1769,7 +1787,6 @@ static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
if (IS_ERR(kn_subdir))
return PTR_ERR(kn_subdir);
- kernfs_get(kn_subdir);
ret = rdtgroup_kn_set_ugid(kn_subdir);
if (ret)
return ret;
@@ -1792,7 +1809,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
if (IS_ERR(kn_info))
return PTR_ERR(kn_info);
- kernfs_get(kn_info);
ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
if (ret)
@@ -1813,12 +1829,6 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
goto out_destroy;
}
- /*
- * This extra ref will be put in kernfs_remove() and guarantees
- * that @rdtgrp->kn is always accessible.
- */
- kernfs_get(kn_info);
-
ret = rdtgroup_kn_set_ugid(kn_info);
if (ret)
goto out_destroy;
@@ -1847,12 +1857,6 @@ mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
if (dest_kn)
*dest_kn = kn;
- /*
- * This extra ref will be put in kernfs_remove() and guarantees
- * that @rdtgrp->kn is always accessible.
- */
- kernfs_get(kn);
-
ret = rdtgroup_kn_set_ugid(kn);
if (ret)
goto out_destroy;
@@ -1905,8 +1909,13 @@ static int set_cache_qos_cfg(int level, bool enable)
r_l = &rdt_resources_all[level];
list_for_each_entry(d, &r_l->domains, list) {
- /* Pick one CPU from each domain instance to update MSR */
- cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
+ if (r_l->cache.arch_has_per_cpu_cfg)
+ /* Pick all the CPUs in the domain instance */
+ for_each_cpu(cpu, &d->cpu_mask)
+ cpumask_set_cpu(cpu, cpu_mask);
+ else
+ /* Pick one CPU from each domain instance to update MSR */
+ cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
}
cpu = get_cpu();
/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
@@ -2079,8 +2088,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
rdtgroup_pseudo_lock_remove(rdtgrp);
kernfs_unbreak_active_protection(kn);
- kernfs_put(rdtgrp->kn);
- kfree(rdtgrp);
+ rdtgroup_remove(rdtgrp);
} else {
kernfs_unbreak_active_protection(kn);
}
@@ -2139,13 +2147,11 @@ static int rdt_get_tree(struct fs_context *fc)
&kn_mongrp);
if (ret < 0)
goto out_info;
- kernfs_get(kn_mongrp);
ret = mkdir_mondata_all(rdtgroup_default.kn,
&rdtgroup_default, &kn_mondata);
if (ret < 0)
goto out_mongrp;
- kernfs_get(kn_mondata);
rdtgroup_default.mon.mon_data_kn = kn_mondata;
}
@@ -2357,7 +2363,7 @@ static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
if (atomic_read(&sentry->waitcount) != 0)
sentry->flags = RDT_DELETED;
else
- kfree(sentry);
+ rdtgroup_remove(sentry);
}
}
@@ -2399,7 +2405,7 @@ static void rmdir_all_sub(void)
if (atomic_read(&rdtgrp->waitcount) != 0)
rdtgrp->flags = RDT_DELETED;
else
- kfree(rdtgrp);
+ rdtgroup_remove(rdtgrp);
}
/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
update_closid_rmid(cpu_online_mask, &rdtgroup_default);
@@ -2499,11 +2505,6 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
if (IS_ERR(kn))
return PTR_ERR(kn);
- /*
- * This extra ref will be put in kernfs_remove() and guarantees
- * that kn is always accessible.
- */
- kernfs_get(kn);
ret = rdtgroup_kn_set_ugid(kn);
if (ret)
goto out_destroy;
@@ -2838,8 +2839,8 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
/*
* kernfs_remove() will drop the reference count on "kn" which
* will free it. But we still need it to stick around for the
- * rdtgroup_kn_unlock(kn} call below. Take one extra reference
- * here, which will be dropped inside rdtgroup_kn_unlock().
+ * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
+ * which will be dropped by kernfs_put() in rdtgroup_remove().
*/
kernfs_get(kn);
@@ -2880,6 +2881,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
out_idfree:
free_rmid(rdtgrp->mon.rmid);
out_destroy:
+ kernfs_put(rdtgrp->kn);
kernfs_remove(rdtgrp->kn);
out_free_rgrp:
kfree(rdtgrp);
@@ -2892,7 +2894,7 @@ static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
{
kernfs_remove(rgrp->kn);
free_rmid(rgrp->mon.rmid);
- kfree(rgrp);
+ rdtgroup_remove(rgrp);
}
/*
@@ -3049,11 +3051,6 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
list_del(&rdtgrp->mon.crdtgrp_list);
- /*
- * one extra hold on this, will drop when we kfree(rdtgrp)
- * in rdtgroup_kn_unlock()
- */
- kernfs_get(kn);
kernfs_remove(rdtgrp->kn);
return 0;
@@ -3065,11 +3062,6 @@ static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
rdtgrp->flags = RDT_DELETED;
list_del(&rdtgrp->rdtgroup_list);
- /*
- * one extra hold on this, will drop when we kfree(rdtgrp)
- * in rdtgroup_kn_unlock()
- */
- kernfs_get(kn);
kernfs_remove(rdtgrp->kn);
return 0;
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 25c06b67e7e0..97aa900386cb 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -78,6 +78,9 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
if (!user_mode(regs))
return copy_from_kernel_nofault(buf, (u8 *)src, nbytes);
+ /* The user space code from other tasks cannot be accessed. */
+ if (regs != task_pt_regs(current))
+ return -EPERM;
/*
* Make sure userspace isn't trying to trick us into dumping kernel
* memory by pointing the userspace instruction pointer at it.
@@ -85,6 +88,12 @@ static int copy_code(struct pt_regs *regs, u8 *buf, unsigned long src,
if (__chk_range_not_ok(src, nbytes, TASK_SIZE_MAX))
return -EINVAL;
+ /*
+ * Even if named copy_from_user_nmi() this can be invoked from
+ * other contexts and will not try to resolve a pagefault, which is
+ * the correct thing to do here as this code can be called from any
+ * context.
+ */
return copy_from_user_nmi(buf, (void __user *)src, nbytes);
}
@@ -115,13 +124,19 @@ void show_opcodes(struct pt_regs *regs, const char *loglvl)
u8 opcodes[OPCODE_BUFSIZE];
unsigned long prologue = regs->ip - PROLOGUE_SIZE;
- if (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
- printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
- loglvl, prologue);
- } else {
+ switch (copy_code(regs, opcodes, prologue, sizeof(opcodes))) {
+ case 0:
printk("%sCode: %" __stringify(PROLOGUE_SIZE) "ph <%02x> %"
__stringify(EPILOGUE_SIZE) "ph\n", loglvl, opcodes,
opcodes[PROLOGUE_SIZE], opcodes + PROLOGUE_SIZE + 1);
+ break;
+ case -EPERM:
+ /* No access to the user space stack of other tasks. Ignore. */
+ break;
+ default:
+ printk("%sCode: Unable to access opcode bytes at RIP 0x%lx.\n",
+ loglvl, prologue);
+ break;
}
}
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 041f0b50bc27..08eb23074f92 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -272,6 +272,19 @@ static int insn_is_indirect_jump(struct insn *insn)
return ret;
}
+static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
+{
+ unsigned char ops;
+
+ for (; addr < eaddr; addr++) {
+ if (get_kernel_nofault(ops, (void *)addr) < 0 ||
+ ops != INT3_INSN_OPCODE)
+ return false;
+ }
+
+ return true;
+}
+
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
@@ -310,9 +323,14 @@ static int can_optimize(unsigned long paddr)
return 0;
kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
insn_get_length(&insn);
- /* Another subsystem puts a breakpoint */
+ /*
+ * In the case of detecting unknown breakpoint, this could be
+ * a padding INT3 between functions. Let's check that all the
+ * rest of the bytes are also INT3.
+ */
if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
- return 0;
+ return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
+
/* Recover address */
insn.kaddr = (void *)addr;
insn.next_byte = (void *)(addr + insn.length);
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ba4593a913fa..145a7ac0c19a 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -685,7 +685,7 @@ void arch_cpu_idle(void)
*/
void __cpuidle default_idle(void)
{
- safe_halt();
+ raw_safe_halt();
}
#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE)
EXPORT_SYMBOL(default_idle);
@@ -736,6 +736,8 @@ void stop_this_cpu(void *dummy)
/*
* AMD Erratum 400 aware idle routine. We handle it the same way as C3 power
* states (local apic timer and TSC stop).
+ *
+ * XXX this function is completely buggered vs RCU and tracing.
*/
static void amd_e400_idle(void)
{
@@ -757,9 +759,9 @@ static void amd_e400_idle(void)
* The switch back from broadcast mode needs to be called with
* interrupts disabled.
*/
- local_irq_disable();
+ raw_local_irq_disable();
tick_broadcast_exit();
- local_irq_enable();
+ raw_local_irq_enable();
}
/*
@@ -801,9 +803,9 @@ static __cpuidle void mwait_idle(void)
if (!need_resched())
__sti_mwait(0, 0);
else
- local_irq_enable();
+ raw_local_irq_enable();
} else {
- local_irq_enable();
+ raw_local_irq_enable();
}
__current_clr_polling();
}
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 992fb1415c0f..ae64f98ec2ab 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -514,16 +514,10 @@ int tboot_force_iommu(void)
if (!tboot_enabled())
return 0;
- if (intel_iommu_tboot_noforce)
- return 1;
-
- if (no_iommu || swiotlb || dmar_disabled)
+ if (no_iommu || dmar_disabled)
pr_warn("Forcing Intel-IOMMU to enabled\n");
dmar_disabled = 0;
-#ifdef CONFIG_SWIOTLB
- swiotlb = 0;
-#endif
no_iommu = 0;
return 1;
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 3fdaa042823d..138bdb1fd136 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -255,12 +255,13 @@ static volatile u32 good_2byte_insns[256 / 32] = {
static bool is_prefix_bad(struct insn *insn)
{
+ insn_byte_t p;
int i;
- for (i = 0; i < insn->prefixes.nbytes; i++) {
+ for_each_insn_prefix(insn, i, p) {
insn_attr_t attr;
- attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+ attr = inat_get_opcode_attribute(p);
switch (attr) {
case INAT_MAKE_PREFIX(INAT_PFX_ES):
case INAT_MAKE_PREFIX(INAT_PFX_CS):
@@ -715,6 +716,7 @@ static const struct uprobe_xol_ops push_xol_ops = {
static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
{
u8 opc1 = OPCODE1(insn);
+ insn_byte_t p;
int i;
switch (opc1) {
@@ -746,8 +748,8 @@ static int branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
* Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix.
* No one uses these insns, reject any branch insns with such prefix.
*/
- for (i = 0; i < insn->prefixes.nbytes; i++) {
- if (insn->prefixes.bytes[i] == 0x66)
+ for_each_insn_prefix(insn, i, p) {
+ if (p == 0x66)
return -ENOTSUPP;
}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 99d118ffc67d..814698e5b152 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -40,29 +40,10 @@ static int pending_userspace_extint(struct kvm_vcpu *v)
* check if there is pending interrupt from
* non-APIC source without intack.
*/
-static int kvm_cpu_has_extint(struct kvm_vcpu *v)
-{
- u8 accept = kvm_apic_accept_pic_intr(v);
-
- if (accept) {
- if (irqchip_split(v->kvm))
- return pending_userspace_extint(v);
- else
- return v->kvm->arch.vpic->output;
- } else
- return 0;
-}
-
-/*
- * check if there is injectable interrupt:
- * when virtual interrupt delivery enabled,
- * interrupt from apic will handled by hardware,
- * we don't need to check it here.
- */
-int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
+int kvm_cpu_has_extint(struct kvm_vcpu *v)
{
/*
- * FIXME: interrupt.injected represents an interrupt that it's
+ * FIXME: interrupt.injected represents an interrupt whose
* side-effects have already been applied (e.g. bit from IRR
* already moved to ISR). Therefore, it is incorrect to rely
* on interrupt.injected to know if there is a pending
@@ -75,6 +56,23 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
if (!lapic_in_kernel(v))
return v->arch.interrupt.injected;
+ if (!kvm_apic_accept_pic_intr(v))
+ return 0;
+
+ if (irqchip_split(v->kvm))
+ return pending_userspace_extint(v);
+ else
+ return v->kvm->arch.vpic->output;
+}
+
+/*
+ * check if there is injectable interrupt:
+ * when virtual interrupt delivery enabled,
+ * interrupt from apic will handled by hardware,
+ * we don't need to check it here.
+ */
+int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
+{
if (kvm_cpu_has_extint(v))
return 1;
@@ -91,20 +89,6 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr);
*/
int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
{
- /*
- * FIXME: interrupt.injected represents an interrupt that it's
- * side-effects have already been applied (e.g. bit from IRR
- * already moved to ISR). Therefore, it is incorrect to rely
- * on interrupt.injected to know if there is a pending
- * interrupt in the user-mode LAPIC.
- * This leads to nVMX/nSVM not be able to distinguish
- * if it should exit from L2 to L1 on EXTERNAL_INTERRUPT on
- * pending interrupt or should re-inject an injected
- * interrupt.
- */
- if (!lapic_in_kernel(v))
- return v->arch.interrupt.injected;
-
if (kvm_cpu_has_extint(v))
return 1;
@@ -118,16 +102,21 @@ EXPORT_SYMBOL_GPL(kvm_cpu_has_interrupt);
*/
static int kvm_cpu_get_extint(struct kvm_vcpu *v)
{
- if (kvm_cpu_has_extint(v)) {
- if (irqchip_split(v->kvm)) {
- int vector = v->arch.pending_external_vector;
-
- v->arch.pending_external_vector = -1;
- return vector;
- } else
- return kvm_pic_read_irq(v->kvm); /* PIC */
- } else
+ if (!kvm_cpu_has_extint(v)) {
+ WARN_ON(!lapic_in_kernel(v));
return -1;
+ }
+
+ if (!lapic_in_kernel(v))
+ return v->arch.interrupt.nr;
+
+ if (irqchip_split(v->kvm)) {
+ int vector = v->arch.pending_external_vector;
+
+ v->arch.pending_external_vector = -1;
+ return vector;
+ } else
+ return kvm_pic_read_irq(v->kvm); /* PIC */
}
/*
@@ -135,13 +124,7 @@ static int kvm_cpu_get_extint(struct kvm_vcpu *v)
*/
int kvm_cpu_get_interrupt(struct kvm_vcpu *v)
{
- int vector;
-
- if (!lapic_in_kernel(v))
- return v->arch.interrupt.nr;
-
- vector = kvm_cpu_get_extint(v);
-
+ int vector = kvm_cpu_get_extint(v);
if (vector != -1)
return vector; /* PIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6a87623aa578..3136e05831cf 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2465,7 +2465,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic;
u32 ppr;
- if (!kvm_apic_hw_enabled(apic))
+ if (!kvm_apic_present(vcpu))
return -1;
__apic_update_ppr(apic, &ppr);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5dfe0ede0e81..6d16481aa29d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3493,26 +3493,25 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
* Return the level of the lowest level SPTE added to sptes.
* That SPTE may be non-present.
*/
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
{
struct kvm_shadow_walk_iterator iterator;
- int leaf = vcpu->arch.mmu->root_level;
+ int leaf = -1;
u64 spte;
-
walk_shadow_page_lockless_begin(vcpu);
- for (shadow_walk_init(&iterator, vcpu, addr);
+ for (shadow_walk_init(&iterator, vcpu, addr),
+ *root_level = iterator.level;
shadow_walk_okay(&iterator);
__shadow_walk_next(&iterator, spte)) {
leaf = iterator.level;
spte = mmu_spte_get_lockless(iterator.sptep);
- sptes[leaf - 1] = spte;
+ sptes[leaf] = spte;
if (!is_shadow_present_pte(spte))
break;
-
}
walk_shadow_page_lockless_end(vcpu);
@@ -3520,14 +3519,12 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
return leaf;
}
-/* return true if reserved bit is detected on spte. */
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
{
- u64 sptes[PT64_ROOT_MAX_LEVEL];
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
struct rsvd_bits_validate *rsvd_check;
- int root = vcpu->arch.mmu->root_level;
- int leaf;
- int level;
+ int root, leaf, level;
bool reserved = false;
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
@@ -3536,35 +3533,45 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
}
if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
- leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+ leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
else
- leaf = get_walk(vcpu, addr, sptes);
+ leaf = get_walk(vcpu, addr, sptes, &root);
+
+ if (unlikely(leaf < 0)) {
+ *sptep = 0ull;
+ return reserved;
+ }
+
+ *sptep = sptes[leaf];
+
+ /*
+ * Skip reserved bits checks on the terminal leaf if it's not a valid
+ * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by
+ * design, always have reserved bits set. The purpose of the checks is
+ * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
+ */
+ if (!is_shadow_present_pte(sptes[leaf]))
+ leaf++;
rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
- for (level = root; level >= leaf; level--) {
- if (!is_shadow_present_pte(sptes[level - 1]))
- break;
+ for (level = root; level >= leaf; level--)
/*
* Use a bitwise-OR instead of a logical-OR to aggregate the
* reserved bit and EPT's invalid memtype/XWR checks to avoid
* adding a Jcc in the loop.
*/
- reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
- __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
- level);
- }
+ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
+ __is_rsvd_bits_set(rsvd_check, sptes[level], level);
if (reserved) {
pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
__func__, addr);
for (level = root; level >= leaf; level--)
pr_err("------ spte 0x%llx level %d.\n",
- sptes[level - 1], level);
+ sptes[level], level);
}
- *sptep = sptes[leaf - 1];
-
return reserved;
}
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index fcac2cac78fe..c51ad544f25b 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen)
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
- mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
- mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
+ mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
+ mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
return mask;
}
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 5c75a451c000..2b3a30bd38b0 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -56,11 +56,11 @@
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
/*
- * Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
+ * Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
* the memslots generation and is derived as follows:
*
* Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
- * Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
+ * Bits 9-17 of the MMIO generation are propagated to spte bits 54-62
*
* The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
* the MMIO generation number, as doing so would require stealing a bit from
@@ -69,18 +69,29 @@
* requires a full MMU zap). The flag is instead explicitly queried when
* checking for MMIO spte cache hits.
*/
-#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)
#define MMIO_SPTE_GEN_LOW_START 3
#define MMIO_SPTE_GEN_LOW_END 11
-#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
- MMIO_SPTE_GEN_LOW_START)
#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
#define MMIO_SPTE_GEN_HIGH_END 62
+
+#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
+ MMIO_SPTE_GEN_LOW_START)
#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
MMIO_SPTE_GEN_HIGH_START)
+#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
+#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
+
+/* remember to adjust the comment above as well if you change these */
+static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9);
+
+#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0)
+#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
+
+#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
+
extern u64 __read_mostly shadow_nx_mask;
extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
extern u64 __read_mostly shadow_user_mask;
@@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
{
u64 gen;
- gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
- gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
+ gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT;
+ gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT;
return gen;
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 75db27fda8f3..6574af2d0994 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -68,7 +68,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
{
- gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+ gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
lockdep_assert_held(&kvm->mmu_lock);
@@ -464,7 +464,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
{
- gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
+ gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
bool flush;
flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);
@@ -1160,16 +1160,19 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
* Return the level of the lowest level SPTE added to sptes.
* That SPTE may be non-present.
*/
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level)
{
struct tdp_iter iter;
struct kvm_mmu *mmu = vcpu->arch.mmu;
- int leaf = vcpu->arch.mmu->shadow_root_level;
gfn_t gfn = addr >> PAGE_SHIFT;
+ int leaf = -1;
+
+ *root_level = vcpu->arch.mmu->shadow_root_level;
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
leaf = iter.level;
- sptes[leaf - 1] = iter.old_spte;
+ sptes[leaf] = iter.old_spte;
}
return leaf;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 556e065503f6..cbbdbadd1526 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level);
+
#endif /* __KVM_X86_MMU_TDP_MMU_H */
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index e57847ff8bd2..9858d5ae9ddd 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -754,8 +754,8 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
* Its safe to read more than we are asked, caller should ensure that
* destination has enough space.
*/
- src_paddr = round_down(src_paddr, 16);
offset = src_paddr & 15;
+ src_paddr = round_down(src_paddr, 16);
sz = round_up(sz + offset, 16);
return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 941e5251e13f..cce0143a6f80 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -546,12 +546,12 @@ static int svm_hardware_enable(void)
static void svm_cpu_uninit(int cpu)
{
- struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
+ struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
if (!sd)
return;
- per_cpu(svm_data, raw_smp_processor_id()) = NULL;
+ per_cpu(svm_data, cpu) = NULL;
kfree(sd->sev_vmcbs);
__free_page(sd->save_area);
kfree(sd);
@@ -1347,8 +1347,10 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
svm->avic_is_running = true;
svm->msrpm = svm_vcpu_alloc_msrpm();
- if (!svm->msrpm)
+ if (!svm->msrpm) {
+ err = -ENOMEM;
goto error_free_vmsa_page;
+ }
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 648c677b12e9..e28ab76b80dc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4082,21 +4082,23 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
static int kvm_cpu_accept_dm_intr(struct kvm_vcpu *vcpu)
{
+ /*
+ * We can accept userspace's request for interrupt injection
+ * as long as we have a place to store the interrupt number.
+ * The actual injection will happen when the CPU is able to
+ * deliver the interrupt.
+ */
+ if (kvm_cpu_has_extint(vcpu))
+ return false;
+
+ /* Acknowledging ExtINT does not happen if LINT0 is masked. */
return (!lapic_in_kernel(vcpu) ||
kvm_apic_accept_pic_intr(vcpu));
}
-/*
- * if userspace requested an interrupt window, check that the
- * interrupt window is open.
- *
- * No need to exit to userspace if we already have an interrupt queued.
- */
static int kvm_vcpu_ready_for_interrupt_injection(struct kvm_vcpu *vcpu)
{
return kvm_arch_interrupt_allowed(vcpu) &&
- !kvm_cpu_has_interrupt(vcpu) &&
- !kvm_event_needs_reinjection(vcpu) &&
kvm_cpu_accept_dm_intr(vcpu);
}
diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c
index 58f7fb95c7f4..4229950a5d78 100644
--- a/arch/x86/lib/insn-eval.c
+++ b/arch/x86/lib/insn-eval.c
@@ -63,13 +63,12 @@ static bool is_string_insn(struct insn *insn)
*/
bool insn_has_rep_prefix(struct insn *insn)
{
+ insn_byte_t p;
int i;
insn_get_prefixes(insn);
- for (i = 0; i < insn->prefixes.nbytes; i++) {
- insn_byte_t p = insn->prefixes.bytes[i];
-
+ for_each_insn_prefix(insn, i, p) {
if (p == 0xf2 || p == 0xf3)
return true;
}
@@ -95,14 +94,15 @@ static int get_seg_reg_override_idx(struct insn *insn)
{
int idx = INAT_SEG_REG_DEFAULT;
int num_overrides = 0, i;
+ insn_byte_t p;
insn_get_prefixes(insn);
/* Look for any segment override prefixes. */
- for (i = 0; i < insn->prefixes.nbytes; i++) {
+ for_each_insn_prefix(insn, i, p) {
insn_attr_t attr;
- attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+ attr = inat_get_opcode_attribute(p);
switch (attr) {
case INAT_MAKE_PREFIX(INAT_PFX_CS):
idx = INAT_SEG_REG_CS;
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 733b983f3a89..6c5eb6f3f14f 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -45,8 +45,8 @@
#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL)
#define PMD_FLAGS_DEC PMD_FLAGS_LARGE
-#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \
- (_PAGE_PAT | _PAGE_PWT))
+#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_LARGE_CACHE_MASK) | \
+ (_PAGE_PAT_LARGE | _PAGE_PWT))
#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC)
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 44148691d78b..5eb4dc2b97da 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -938,6 +938,7 @@ int phys_to_target_node(phys_addr_t start)
return meminfo_to_nid(&numa_reserved_meminfo, start);
}
+EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start)
{
@@ -947,4 +948,5 @@ int memory_add_physaddr_to_nid(u64 start)
nid = numa_meminfo.blk[0].nid;
return nid;
}
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 11666ba19b62..569ac1d57f55 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -474,8 +474,14 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
/*
* The membarrier system call requires a full memory barrier and
* core serialization before returning to user-space, after
- * storing to rq->curr. Writing to CR3 provides that full
- * memory barrier and core serializing instruction.
+ * storing to rq->curr, when changing mm. This is because
+ * membarrier() sends IPIs to all CPUs that are in the target mm
+ * to make them issue memory barriers. However, if another CPU
+ * switches to/from the target mm concurrently with
+ * membarrier(), it can cause that CPU not to receive an IPI
+ * when it really should issue a memory barrier. Writing to CR3
+ * provides that full memory barrier and core serializing
+ * instruction.
*/
if (real_prev == next) {
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 8f5759df7776..e1e8d4e3a213 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -78,28 +78,30 @@ int __init efi_alloc_page_tables(void)
gfp_mask = GFP_KERNEL | __GFP_ZERO;
efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
if (!efi_pgd)
- return -ENOMEM;
+ goto fail;
pgd = efi_pgd + pgd_index(EFI_VA_END);
p4d = p4d_alloc(&init_mm, pgd, EFI_VA_END);
- if (!p4d) {
- free_page((unsigned long)efi_pgd);
- return -ENOMEM;
- }
+ if (!p4d)
+ goto free_pgd;
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
- if (!pud) {
- if (pgtable_l5_enabled())
- free_page((unsigned long) pgd_page_vaddr(*pgd));
- free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
- return -ENOMEM;
- }
+ if (!pud)
+ goto free_p4d;
efi_mm.pgd = efi_pgd;
mm_init_cpumask(&efi_mm);
init_new_context(NULL, &efi_mm);
return 0;
+
+free_p4d:
+ if (pgtable_l5_enabled())
+ free_page((unsigned long)pgd_page_vaddr(*pgd));
+free_pgd:
+ free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
+fail:
+ return -ENOMEM;
}
/*
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 799f4eba0a62..043c73dfd2c9 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -93,10 +93,20 @@ void xen_init_lock_cpu(int cpu)
void xen_uninit_lock_cpu(int cpu)
{
+ int irq;
+
if (!xen_pvspin)
return;
- unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+ /*
+ * When booting the kernel with 'mitigations=auto,nosmt', the secondary
+ * CPUs are not activated, and lock_kicker_irq is not initialized.
+ */
+ irq = per_cpu(lock_kicker_irq, cpu);
+ if (irq == -1)
+ return;
+
+ unbind_from_irqhandler(irq, NULL);
per_cpu(lock_kicker_irq, cpu) = -1;
kfree(per_cpu(irq_name, cpu));
per_cpu(irq_name, cpu) = NULL;