Merge branch 'dts-fixes' into omap-for-v4.15/fixes-dt

author: Tony Lindgren <tony@atomide.com> 2017-11-28 08:12:32 -0800
committer: Tony Lindgren <tony@atomide.com> 2017-11-28 08:12:32 -0800
commit: bc686442f8a601bccac1f22506ecdb4b0d62cadd (patch)
tree: b224ab4aa2350b233da640f5850f48bc6bfeb2d0 /arch/x86/kernel
parent: 60636a5d0fa2f8bc6d0c23c4027100ba20866f9b (diff)
parent: ca41e244517d6d3f1600c229ff7ca615049c1e9c (diff)
download: linux-stable-bc686442f8a601bccac1f22506ecdb4b0d62cadd.tar.gz
linux-stable-bc686442f8a601bccac1f22506ecdb4b0d62cadd.tar.bz2
linux-stable-bc686442f8a601bccac1f22506ecdb4b0d62cadd.zip
171 files changed, 3274 insertions, 2146 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index fd0a7895b63f..81bb565f4497 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for the linux kernel.
 #
@@ -24,9 +25,9 @@ endif
 KASAN_SANITIZE_head$(BITS).o				:= n
 KASAN_SANITIZE_dumpstack.o				:= n
 KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
-KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_stacktrace.o				:= n
+KASAN_SANITIZE_paravirt.o				:= n
 
-OBJECT_FILES_NON_STANDARD_head_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
@@ -126,10 +127,11 @@ obj-$(CONFIG_EFI)			+= sysfb_efi.o
 obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
 obj-$(CONFIG_TRACING)			+= tracepoint.o
 obj-$(CONFIG_SCHED_MC_PRIO)		+= itmt.o
+obj-$(CONFIG_X86_INTEL_UMIP)		+= umip.o
 
-obj-$(CONFIG_ORC_UNWINDER)		+= unwind_orc.o
-obj-$(CONFIG_FRAME_POINTER_UNWINDER)	+= unwind_frame.o
-obj-$(CONFIG_GUESS_UNWINDER)		+= unwind_guess.o
+obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
+obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
+obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index 85a9e17e0dbc..f1bb57b0e41e 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
 
 obj-$(CONFIG_ACPI)		+= boot.o
diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c
index ea3046e0b0cf..bb8d300fecbd 100644
--- a/arch/x86/kernel/acpi/apei.c
+++ b/arch/x86/kernel/acpi/apei.c
@@ -52,8 +52,3 @@ void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
 	apei_mce_report_mem_error(sev, mem_err);
 #endif
 }
-
-void arch_apei_flush_tlb_one(unsigned long addr)
-{
-	__flush_tlb_one(addr);
-}
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 079535e53e2a..f4c463df8b08 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
 #ifdef CONFIG_X86_IO_APIC
 #define MP_ISA_BUS		0
 
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+						u8 trigger, u32 gsi);
+
 static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 					  u32 gsi)
 {
-	int ioapic;
-	int pin;
-	struct mpc_intsrc mp_irq;
-
 	/*
 	 * Check bus_irq boundary.
 	 */
@@ -358,14 +357,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 	}
 
 	/*
-	 * Convert 'gsi' to 'ioapic.pin'.
-	 */
-	ioapic = mp_find_ioapic(gsi);
-	if (ioapic < 0)
-		return;
-	pin = mp_find_ioapic_pin(ioapic, gsi);
-
-	/*
 	 * TBD: This check is for faulty timer entries, where the override
 	 *      erroneously sets the trigger to level, resulting in a HUGE
 	 *      increase of timer interrupts!
@@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
 	if ((bus_irq == 0) && (trigger == 3))
 		trigger = 1;
 
-	mp_irq.type = MP_INTSRC;
-	mp_irq.irqtype = mp_INT;
-	mp_irq.irqflag = (trigger << 2) | polarity;
-	mp_irq.srcbus = MP_ISA_BUS;
-	mp_irq.srcbusirq = bus_irq;	/* IRQ */
-	mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */
-	mp_irq.dstirq = pin;	/* INTIN# */
-
-	mp_save_irq(&mp_irq);
-
+	if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0)
+		return;
 	/*
 	 * Reset default identity mapping if gsi is also an legacy IRQ,
 	 * otherwise there will be more than one entry with the same GSI
@@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
 	return 0;
 }
 
+static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
+						u8 trigger, u32 gsi)
+{
+	struct mpc_intsrc mp_irq;
+	int ioapic, pin;
+
+	/* Convert 'gsi' to 'ioapic.pin'(INTIN#) */
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0) {
+		pr_warn("Failed to find ioapic for gsi : %u\n", gsi);
+		return ioapic;
+	}
+
+	pin = mp_find_ioapic_pin(ioapic, gsi);
+
+	mp_irq.type = MP_INTSRC;
+	mp_irq.irqtype = mp_INT;
+	mp_irq.irqflag = (trigger << 2) | polarity;
+	mp_irq.srcbus = MP_ISA_BUS;
+	mp_irq.srcbusirq = bus_irq;
+	mp_irq.dstapic = mpc_ioapic_id(ioapic);
+	mp_irq.dstirq = pin;
+
+	mp_save_irq(&mp_irq);
+
+	return 0;
+}
+
 static int __init
 acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
 {
@@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
 	if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK)
 		polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK;
 
-	mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+	if (bus_irq < NR_IRQS_LEGACY)
+		mp_override_legacy_irq(bus_irq, polarity, trigger, gsi);
+	else
+		mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi);
+
 	acpi_penalize_sci_irq(bus_irq, trigger, polarity);
 
 	/*
@@ -961,6 +976,11 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
 		x86_platform.legacy.rtc = 0;
 	}
 
+	if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_VGA) {
+		pr_debug("ACPI: probing for VGA not safe\n");
+		x86_platform.legacy.no_vga = 1;
+	}
+
 #ifdef CONFIG_X86_PM_TIMER
 	/* detect the location of the ACPI PM Timer */
 	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) {
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index ed014814ea35..7188aea91549 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * sleep.c - x86-specific ACPI sleep support.
  *
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 65c7b606b606..fbb60ca4255c 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *	Variables and functions used by the code in sleep.c
  */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 3344d3382e91..dbaf14d69ebd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -442,7 +442,6 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
 {
 	const s32 *poff;
 
-	mutex_lock(&text_mutex);
 	for (poff = start; poff < end; poff++) {
 		u8 *ptr = (u8 *)poff + *poff;
 
@@ -452,7 +451,6 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
 		if (*ptr == 0x3e)
 			text_poke(ptr, ((unsigned char []){0xf0}), 1);
 	}
-	mutex_unlock(&text_mutex);
 }
 
 static void alternatives_smp_unlock(const s32 *start, const s32 *end,
@@ -460,7 +458,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 {
 	const s32 *poff;
 
-	mutex_lock(&text_mutex);
 	for (poff = start; poff < end; poff++) {
 		u8 *ptr = (u8 *)poff + *poff;
 
@@ -470,7 +467,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 		if (*ptr == 0xf0)
 			text_poke(ptr, ((unsigned char []){0x3E}), 1);
 	}
-	mutex_unlock(&text_mutex);
 }
 
 struct smp_alt_module {
@@ -489,8 +485,7 @@ struct smp_alt_module {
 	struct list_head next;
 };
 static LIST_HEAD(smp_alt_modules);
-static DEFINE_MUTEX(smp_alt);
-static bool uniproc_patched = false;	/* protected by smp_alt */
+static bool uniproc_patched = false;	/* protected by text_mutex */
 
 void __init_or_module alternatives_smp_module_add(struct module *mod,
 						  char *name,
@@ -499,7 +494,7 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 {
 	struct smp_alt_module *smp;
 
-	mutex_lock(&smp_alt);
+	mutex_lock(&text_mutex);
 	if (!uniproc_patched)
 		goto unlock;
 
@@ -526,14 +521,14 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 smp_unlock:
 	alternatives_smp_unlock(locks, locks_end, text, text_end);
 unlock:
-	mutex_unlock(&smp_alt);
+	mutex_unlock(&text_mutex);
 }
 
 void __init_or_module alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
 
-	mutex_lock(&smp_alt);
+	mutex_lock(&text_mutex);
 	list_for_each_entry(item, &smp_alt_modules, next) {
 		if (mod != item->mod)
 			continue;
@@ -541,7 +536,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
 		kfree(item);
 		break;
 	}
-	mutex_unlock(&smp_alt);
+	mutex_unlock(&text_mutex);
 }
 
 void alternatives_enable_smp(void)
@@ -551,7 +546,7 @@ void alternatives_enable_smp(void)
 	/* Why bother if there are no other CPUs? */
 	BUG_ON(num_possible_cpus() == 1);
 
-	mutex_lock(&smp_alt);
+	mutex_lock(&text_mutex);
 
 	if (uniproc_patched) {
 		pr_info("switching to SMP code\n");
@@ -563,10 +558,13 @@ void alternatives_enable_smp(void)
 					      mod->text, mod->text_end);
 		uniproc_patched = false;
 	}
-	mutex_unlock(&smp_alt);
+	mutex_unlock(&text_mutex);
 }
 
-/* Return 1 if the address range is reserved for smp-alternatives */
+/*
+ * Return 1 if the address range is reserved for SMP-alternatives.
+ * Must hold text_mutex.
+ */
 int alternatives_text_reserved(void *start, void *end)
 {
 	struct smp_alt_module *mod;
@@ -574,6 +572,8 @@ int alternatives_text_reserved(void *start, void *end)
 	u8 *text_start = start;
 	u8 *text_end = end;
 
+	lockdep_assert_held(&text_mutex);
+
 	list_for_each_entry(mod, &smp_alt_modules, next) {
 		if (mod->text > text_end || mod->text_end < text_start)
 			continue;
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 458da8509b75..6db28f17ff28 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -27,6 +27,8 @@ static const struct pci_device_id amd_root_ids[] = {
 	{}
 };
 
+#define PCI_DEVICE_ID_AMD_CNB17H_F4     0x1704
+
 const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
@@ -37,6 +39,7 @@ const struct pci_device_id amd_nb_misc_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) },
 	{}
 };
 EXPORT_SYMBOL_GPL(amd_nb_misc_ids);
@@ -48,6 +51,7 @@ static const struct pci_device_id amd_nb_link_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) },
 	{}
 };
 
@@ -402,11 +406,48 @@ void amd_flush_garts(void)
 }
 EXPORT_SYMBOL_GPL(amd_flush_garts);
 
+static void __fix_erratum_688(void *info)
+{
+#define MSR_AMD64_IC_CFG 0xC0011021
+
+	msr_set_bit(MSR_AMD64_IC_CFG, 3);
+	msr_set_bit(MSR_AMD64_IC_CFG, 14);
+}
+
+/* Apply erratum 688 fix so machines without a BIOS fix work. */
+static __init void fix_erratum_688(void)
+{
+	struct pci_dev *F4;
+	u32 val;
+
+	if (boot_cpu_data.x86 != 0x14)
+		return;
+
+	if (!amd_northbridges.num)
+		return;
+
+	F4 = node_to_amd_nb(0)->link;
+	if (!F4)
+		return;
+
+	if (pci_read_config_dword(F4, 0x164, &val))
+		return;
+
+	if (val & BIT(2))
+		return;
+
+	on_each_cpu(__fix_erratum_688, NULL, 0);
+
+	pr_info("x86/cpu/AMD: CPU erratum 688 worked around\n");
+}
+
 static __init int init_amd_nbs(void)
 {
 	amd_cache_northbridges();
 	amd_cache_gart();
 
+	fix_erratum_688();
+
 	return 0;
 }
 
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index ef2859f9fcce..f5d92bc3b884 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Firmware replacement code.
  *
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 8e63ebdcbd0b..a6fcaf16cdbf 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for local APIC drivers and for the IO-APIC code
 #
@@ -6,12 +7,11 @@
 # In particualr, smp_apic_timer_interrupt() is called in random places.
 KCOV_INSTRUMENT		:= n
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_common.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
 obj-$(CONFIG_PCI_MSI)		+= msi.o
-obj-$(CONFIG_HT_IRQ)		+= htirq.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d705c769f77d..6e272f3ea984 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -211,11 +211,7 @@ static inline int lapic_get_version(void)
  */
 static inline int lapic_is_integrated(void)
 {
-#ifdef CONFIG_X86_64
-	return 1;
-#else
 	return APIC_INTEGRATED(lapic_get_version());
-#endif
 }
 
 /*
@@ -298,14 +294,11 @@ int get_physical_broadcast(void)
  */
 int lapic_get_maxlvt(void)
 {
-	unsigned int v;
-
-	v = apic_read(APIC_LVR);
 	/*
 	 * - we always have APIC integrated on 64bit mode
 	 * - 82489DXs do not report # of LVT entries
 	 */
-	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+	return lapic_is_integrated() ? GET_APIC_MAXLVT(apic_read(APIC_LVR)) : 2;
 }
 
 /*
@@ -573,11 +566,21 @@ static u32 bdx_deadline_rev(void)
 	return ~0U;
 }
 
+static u32 skx_deadline_rev(void)
+{
+	switch (boot_cpu_data.x86_mask) {
+	case 0x03: return 0x01000136;
+	case 0x04: return 0x02000014;
+	}
+
+	return ~0U;
+}
+
 static const struct x86_cpu_id deadline_match[] = {
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_HASWELL_X,	hsx_deadline_rev),
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_BROADWELL_X,	0x0b000020),
 	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_BROADWELL_XEON_D,	bdx_deadline_rev),
-	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_SKYLAKE_X,	0x02000014),
+	DEADLINE_MODEL_MATCH_FUNC( INTEL_FAM6_SKYLAKE_X,	skx_deadline_rev),
 
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_CORE,	0x22),
 	DEADLINE_MODEL_MATCH_REV ( INTEL_FAM6_HASWELL_ULT,	0x20),
@@ -600,7 +603,8 @@ static void apic_check_deadline_errata(void)
 	const struct x86_cpu_id *m;
 	u32 rev;
 
-	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+	if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
+	    boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		return;
 
 	m = x86_match_cpu(deadline_match);
@@ -1218,53 +1222,100 @@ void __init sync_Arb_IDs(void)
 			APIC_INT_LEVELTRIG | APIC_DM_INIT);
 }
 
-/*
- * An initial setup of the virtual wire mode.
- */
-void __init init_bsp_APIC(void)
+enum apic_intr_mode_id apic_intr_mode;
+
+static int __init apic_intr_mode_select(void)
 {
-	unsigned int value;
+	/* Check kernel option */
+	if (disable_apic) {
+		pr_info("APIC disabled via kernel command line\n");
+		return APIC_PIC;
+	}
 
-	/*
-	 * Don't do the setup now if we have a SMP BIOS as the
-	 * through-I/O-APIC virtual wire mode might be active.
-	 */
-	if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
-		return;
+	/* Check BIOS */
+#ifdef CONFIG_X86_64
+	/* On 64-bit, the APIC must be integrated, Check local APIC only */
+	if (!boot_cpu_has(X86_FEATURE_APIC)) {
+		disable_apic = 1;
+		pr_info("APIC disabled by BIOS\n");
+		return APIC_PIC;
+	}
+#else
+	/* On 32-bit, the APIC may be integrated APIC or 82489DX */
 
-	/*
-	 * Do not trust the local APIC being empty at bootup.
-	 */
-	clear_local_APIC();
+	/* Neither 82489DX nor integrated APIC ? */
+	if (!boot_cpu_has(X86_FEATURE_APIC) && !smp_found_config) {
+		disable_apic = 1;
+		return APIC_PIC;
+	}
 
-	/*
-	 * Enable APIC.
-	 */
-	value = apic_read(APIC_SPIV);
-	value &= ~APIC_VECTOR_MASK;
-	value |= APIC_SPIV_APIC_ENABLED;
+	/* If the BIOS pretends there is an integrated APIC ? */
+	if (!boot_cpu_has(X86_FEATURE_APIC) &&
+		APIC_INTEGRATED(boot_cpu_apic_version)) {
+		disable_apic = 1;
+		pr_err(FW_BUG "Local APIC %d not detected, force emulation\n",
+				       boot_cpu_physical_apicid);
+		return APIC_PIC;
+	}
+#endif
 
-#ifdef CONFIG_X86_32
-	/* This bit is reserved on P4/Xeon and should be cleared */
-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
-	    (boot_cpu_data.x86 == 15))
-		value &= ~APIC_SPIV_FOCUS_DISABLED;
-	else
+	/* Check MP table or ACPI MADT configuration */
+	if (!smp_found_config) {
+		disable_ioapic_support();
+		if (!acpi_lapic) {
+			pr_info("APIC: ACPI MADT or MP tables are not detected\n");
+			return APIC_VIRTUAL_WIRE_NO_CONFIG;
+		}
+		return APIC_VIRTUAL_WIRE;
+	}
+
+#ifdef CONFIG_SMP
+	/* If SMP should be disabled, then really disable it! */
+	if (!setup_max_cpus) {
+		pr_info("APIC: SMP mode deactivated\n");
+		return APIC_SYMMETRIC_IO_NO_ROUTING;
+	}
+
+	if (read_apic_id() != boot_cpu_physical_apicid) {
+		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
+		     read_apic_id(), boot_cpu_physical_apicid);
+		/* Or can we switch back to PIC here? */
+	}
 #endif
-		value |= APIC_SPIV_FOCUS_DISABLED;
-	value |= SPURIOUS_APIC_VECTOR;
-	apic_write(APIC_SPIV, value);
 
-	/*
-	 * Set up the virtual wire mode.
-	 */
-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
-	value = APIC_DM_NMI;
-	if (!lapic_is_integrated())		/* 82489DX */
-		value |= APIC_LVT_LEVEL_TRIGGER;
-	if (apic_extnmi == APIC_EXTNMI_NONE)
-		value |= APIC_LVT_MASKED;
-	apic_write(APIC_LVT1, value);
+	return APIC_SYMMETRIC_IO;
+}
+
+/* Init the interrupt delivery mode for the BSP */
+void __init apic_intr_mode_init(void)
+{
+	bool upmode = IS_ENABLED(CONFIG_UP_LATE_INIT);
+
+	apic_intr_mode = apic_intr_mode_select();
+
+	switch (apic_intr_mode) {
+	case APIC_PIC:
+		pr_info("APIC: Keep in PIC mode(8259)\n");
+		return;
+	case APIC_VIRTUAL_WIRE:
+		pr_info("APIC: Switch to virtual wire mode setup\n");
+		default_setup_apic_routing();
+		break;
+	case APIC_VIRTUAL_WIRE_NO_CONFIG:
+		pr_info("APIC: Switch to virtual wire mode setup with no configuration\n");
+		upmode = true;
+		default_setup_apic_routing();
+		break;
+	case APIC_SYMMETRIC_IO:
+		pr_info("APIC: Switch to symmetric I/O mode setup\n");
+		default_setup_apic_routing();
+		break;
+	case APIC_SYMMETRIC_IO_NO_ROUTING:
+		pr_info("APIC: Switch to symmetric I/O mode setup in no SMP routine\n");
+		break;
+	}
+
+	apic_bsp_setup(upmode);
 }
 
 static void lapic_setup_esr(void)
@@ -1462,7 +1513,7 @@ void setup_local_APIC(void)
 	/*
 	 * Set up LVT0, LVT1:
 	 *
-	 * set up through-local-APIC on the BP's LINT0. This is not
+	 * set up through-local-APIC on the boot CPU's LINT0. This is not
 	 * strictly necessary in pure symmetric-IO mode, but sometimes
 	 * we delegate interrupts to the 8259A.
 	 */
@@ -1488,7 +1539,9 @@ void setup_local_APIC(void)
 		value = APIC_DM_NMI;
 	else
 		value = APIC_DM_NMI | APIC_LVT_MASKED;
-	if (!lapic_is_integrated())		/* 82489DX */
+
+	/* Is 82489DX ? */
+	if (!lapic_is_integrated())
 		value |= APIC_LVT_LEVEL_TRIGGER;
 	apic_write(APIC_LVT1, value);
 
@@ -1634,7 +1687,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
 		 * under KVM
 		 */
 		if (max_physical_apicid > 255 ||
-		    !hypervisor_x2apic_available()) {
+		    !x86_init.hyper.x2apic_available()) {
 			pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
 			x2apic_disable();
 			return;
@@ -1874,8 +1927,8 @@ void __init init_apic_mappings(void)
 		 * yeah -- we lie about apic_version
 		 * in case if apic was disabled via boot option
 		 * but it's not a problem for SMP compiled kernel
-		 * since smp_sanity_check is prepared for such a case
-		 * and disable smp mode
+		 * since apic_intr_mode_select is prepared for such
+		 * a case and disable smp mode
 		 */
 		boot_cpu_apic_version = GET_APIC_VERSION(apic_read(APIC_LVR));
 	}
@@ -2231,44 +2284,6 @@ int hard_smp_processor_id(void)
 	return read_apic_id();
 }
 
-void default_init_apic_ldr(void)
-{
-	unsigned long val;
-
-	apic_write(APIC_DFR, APIC_DFR_VALUE);
-	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
-	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
-	apic_write(APIC_LDR, val);
-}
-
-int default_cpu_mask_to_apicid(const struct cpumask *mask,
-			       struct irq_data *irqdata,
-			       unsigned int *apicid)
-{
-	unsigned int cpu = cpumask_first(mask);
-
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
-	*apicid = per_cpu(x86_cpu_to_apicid, cpu);
-	irq_data_update_effective_affinity(irqdata, cpumask_of(cpu));
-	return 0;
-}
-
-int flat_cpu_mask_to_apicid(const struct cpumask *mask,
-			    struct irq_data *irqdata,
-			    unsigned int *apicid)
-
-{
-	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
-	unsigned long cpu_mask = cpumask_bits(mask)[0] & APIC_ALL_CPUS;
-
-	if (!cpu_mask)
-		return -EINVAL;
-	*apicid = (unsigned int)cpu_mask;
-	cpumask_bits(effmsk)[0] = cpu_mask;
-	return 0;
-}
-
 /*
  * Override the generic EOI implementation with an optimized version.
  * Only called during early boot when only one CPU is active and with
@@ -2311,72 +2326,27 @@ static void __init apic_bsp_up_setup(void)
  * Returns:
  * apic_id of BSP APIC
  */
-int __init apic_bsp_setup(bool upmode)
+void __init apic_bsp_setup(bool upmode)
 {
-	int id;
-
 	connect_bsp_APIC();
 	if (upmode)
 		apic_bsp_up_setup();
 	setup_local_APIC();
 
-	if (x2apic_mode)
-		id = apic_read(APIC_LDR);
-	else
-		id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
-
 	enable_IO_APIC();
 	end_local_APIC_setup();
 	irq_remap_enable_fault_handling();
 	setup_IO_APIC();
-	/* Setup local timer */
-	x86_init.timers.setup_percpu_clockev();
-	return id;
-}
-
-/*
- * This initializes the IO-APIC and APIC hardware if this is
- * a UP kernel.
- */
-int __init APIC_init_uniprocessor(void)
-{
-	if (disable_apic) {
-		pr_info("Apic disabled\n");
-		return -1;
-	}
-#ifdef CONFIG_X86_64
-	if (!boot_cpu_has(X86_FEATURE_APIC)) {
-		disable_apic = 1;
-		pr_info("Apic disabled by BIOS\n");
-		return -1;
-	}
-#else
-	if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
-		return -1;
-
-	/*
-	 * Complain if the BIOS pretends there is one.
-	 */
-	if (!boot_cpu_has(X86_FEATURE_APIC) &&
-	    APIC_INTEGRATED(boot_cpu_apic_version)) {
-		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
-			boot_cpu_physical_apicid);
-		return -1;
-	}
-#endif
-
-	if (!smp_found_config)
-		disable_ioapic_support();
-
-	default_setup_apic_routing();
-	apic_bsp_setup(true);
-	return 0;
 }
 
 #ifdef CONFIG_UP_LATE_INIT
 void __init up_late_init(void)
 {
-	APIC_init_uniprocessor();
+	if (apic_intr_mode == APIC_PIC)
+		return;
+
+	/* Setup local timer */
+	x86_init.timers.setup_percpu_clockev();
 }
 #endif
 
diff --git a/arch/x86/kernel/apic/apic_common.c b/arch/x86/kernel/apic/apic_common.c
new file mode 100644
index 000000000000..a360801779ae
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_common.c
@@ -0,0 +1,46 @@
+/*
+ * Common functions shared between the various APIC flavours
+ *
+ * SPDX-License-Identifier: GPL-2.0
+ */
+#include <linux/irq.h>
+#include <asm/apic.h>
+
+u32 apic_default_calc_apicid(unsigned int cpu)
+{
+	return per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+u32 apic_flat_calc_apicid(unsigned int cpu)
+{
+	return 1U << cpu;
+}
+
+bool default_check_apicid_used(physid_mask_t *map, int apicid)
+{
+	return physid_isset(apicid, *map);
+}
+
+void default_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
+{
+	*retmap = *phys_map;
+}
+
+int default_cpu_present_to_apicid(int mps_cpu)
+{
+	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+	else
+		return BAD_APICID;
+}
+EXPORT_SYMBOL_GPL(default_cpu_present_to_apicid);
+
+int default_check_phys_apicid_present(int phys_apicid)
+{
+	return physid_isset(phys_apicid, phys_cpu_present_map);
+}
+
+int default_apic_id_valid(int apicid)
+{
+	return (apicid < 255);
+}
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index dedd5a41ba48..aa85690e9b64 100644
--- a/arch/x86/kernel/apic/apic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -119,7 +119,7 @@ static unsigned int flat_get_apic_id(unsigned long x)
 	return (x >> 24) & 0xFF;
 }
 
-static unsigned long set_apic_id(unsigned int id)
+static u32 set_apic_id(unsigned int id)
 {
 	return (id & 0xFF) << 24;
 }
@@ -154,12 +154,10 @@ static struct apic apic_flat __ro_after_init = {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -172,7 +170,7 @@ static struct apic apic_flat __ro_after_init = {
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single,
 	.send_IPI_mask			= flat_send_IPI_mask,
@@ -249,12 +247,10 @@ static struct apic apic_physflat __ro_after_init = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	/* not needed, but shouldn't hurt: */
 	.init_apic_ldr			= flat_init_apic_ldr,
 
@@ -268,7 +264,7 @@ static struct apic apic_physflat __ro_after_init = {
 	.get_apic_id			= flat_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
 	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 6599f437b4ab..7b659c4480c9 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * NOOP APIC driver.
  *
@@ -83,20 +84,6 @@ static int noop_apic_id_registered(void)
 	return physid_isset(0, phys_cpu_present_map);
 }
 
-static const struct cpumask *noop_target_cpus(void)
-{
-	/* only BSP here */
-	return cpumask_of(0);
-}
-
-static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
-					  const struct cpumask *mask)
-{
-	if (cpu != 0)
-		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
-	cpumask_copy(retmask, cpumask_of(cpu));
-}
-
 static u32 noop_apic_read(u32 reg)
 {
 	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
@@ -108,6 +95,13 @@ static void noop_apic_write(u32 reg, u32 v)
 	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
 }
 
+#ifdef CONFIG_X86_32
+static int noop_x86_32_early_logical_apicid(int cpu)
+{
+	return BAD_APICID;
+}
+#endif
+
 struct apic apic_noop __ro_after_init = {
 	.name				= "noop",
 	.probe				= noop_probe,
@@ -120,12 +114,10 @@ struct apic apic_noop __ro_after_init = {
 	/* logical delivery broadcast to all CPUs: */
 	.irq_dest_mode			= 1,
 
-	.target_cpus			= noop_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= default_check_apicid_used,
 
-	.vector_allocation_domain	= noop_vector_allocation_domain,
 	.init_apic_ldr			= noop_init_apic_ldr,
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
@@ -141,7 +133,7 @@ struct apic apic_noop __ro_after_init = {
 	.get_apic_id			= noop_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= noop_send_IPI,
 	.send_IPI_mask			= noop_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 2fda912219a6..134e04506ab4 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -38,7 +38,7 @@ static unsigned int numachip1_get_apic_id(unsigned long x)
 	return id;
 }
 
-static unsigned long numachip1_set_apic_id(unsigned int id)
+static u32 numachip1_set_apic_id(unsigned int id)
 {
 	return (id & 0xff) << 24;
 }
@@ -51,7 +51,7 @@ static unsigned int numachip2_get_apic_id(unsigned long x)
 	return ((mcfg >> (28 - 8)) & 0xfff00) | (x >> 24);
 }
 
-static unsigned long numachip2_set_apic_id(unsigned int id)
+static u32 numachip2_set_apic_id(unsigned int id)
 {
 	return id << 24;
 }
@@ -249,12 +249,10 @@ static const struct apic apic_numachip1 __refconst = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -267,7 +265,7 @@ static const struct apic apic_numachip1 __refconst = {
 	.get_apic_id			= numachip1_get_apic_id,
 	.set_apic_id			= numachip1_set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
 	.send_IPI_mask			= numachip_send_IPI_mask,
@@ -300,12 +298,10 @@ static const struct apic apic_numachip2 __refconst = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= flat_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -318,7 +314,7 @@ static const struct apic apic_numachip2 __refconst = {
 	.get_apic_id			= numachip2_get_apic_id,
 	.set_apic_id			= numachip2_set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= numachip_send_IPI_one,
 	.send_IPI_mask			= numachip_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
index 456e45e8bf84..afee386ff711 100644
--- a/arch/x86/kernel/apic/bigsmp_32.c
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
  *
@@ -26,9 +27,9 @@ static int bigsmp_apic_id_registered(void)
 	return 1;
 }
 
-static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
+static bool bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
 {
-	return 0;
+	return false;
 }
 
 static int bigsmp_early_logical_apicid(int cpu)
@@ -154,12 +155,10 @@ static struct apic apic_bigsmp __ro_after_init = {
 	/* phys delivery to target CPU: */
 	.irq_dest_mode			= 0,
 
-	.target_cpus			= default_target_cpus,
 	.disable_esr			= 1,
 	.dest_logical			= 0,
 	.check_apicid_used		= bigsmp_check_apicid_used,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= bigsmp_init_apic_ldr,
 
 	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
@@ -172,7 +171,7 @@ static struct apic apic_bigsmp __ro_after_init = {
 	.get_apic_id			= bigsmp_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single_phys,
 	.send_IPI_mask			= default_send_IPI_mask_sequence_phys,
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
deleted file mode 100644
index 56ccf9346b08..000000000000
--- a/arch/x86/kernel/apic/htirq.c
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Support Hypertransport IRQ
- *
- * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
- *	Moved from arch/x86/kernel/apic/io_apic.c.
- * Jiang Liu <jiang.liu@linux.intel.com>
- *	Add support of hierarchical irqdomain
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/pci.h>
-#include <linux/htirq.h>
-#include <asm/irqdomain.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/hypertransport.h>
-
-static struct irq_domain *htirq_domain;
-
-/*
- * Hypertransport interrupt support
- */
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_data *parent = data->parent_data;
-	int ret;
-
-	ret = parent->chip->irq_set_affinity(parent, mask, force);
-	if (ret >= 0) {
-		struct ht_irq_msg msg;
-		struct irq_cfg *cfg = irqd_cfg(data);
-
-		fetch_ht_irq_msg(data->irq, &msg);
-		msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK |
-				    HT_IRQ_LOW_DEST_ID_MASK);
-		msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) |
-				  HT_IRQ_LOW_DEST_ID(cfg->dest_apicid);
-		msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-		msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
-		write_ht_irq_msg(data->irq, &msg);
-	}
-
-	return ret;
-}
-
-static struct irq_chip ht_irq_chip = {
-	.name			= "PCI-HT",
-	.irq_mask		= mask_ht_irq,
-	.irq_unmask		= unmask_ht_irq,
-	.irq_ack		= irq_chip_ack_parent,
-	.irq_set_affinity	= ht_set_affinity,
-	.irq_retrigger		= irq_chip_retrigger_hierarchy,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs, void *arg)
-{
-	struct ht_irq_cfg *ht_cfg;
-	struct irq_alloc_info *info = arg;
-	struct pci_dev *dev;
-	irq_hw_number_t hwirq;
-	int ret;
-
-	if (nr_irqs > 1 || !info)
-		return -EINVAL;
-
-	dev = info->ht_dev;
-	hwirq = (info->ht_idx & 0xFF) |
-		PCI_DEVID(dev->bus->number, dev->devfn) << 8 |
-		(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24;
-	if (irq_find_mapping(domain, hwirq) > 0)
-		return -EEXIST;
-
-	ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL);
-	if (!ht_cfg)
-		return -ENOMEM;
-
-	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info);
-	if (ret < 0) {
-		kfree(ht_cfg);
-		return ret;
-	}
-
-	/* Initialize msg to a value that will never match the first write. */
-	ht_cfg->msg.address_lo = 0xffffffff;
-	ht_cfg->msg.address_hi = 0xffffffff;
-	ht_cfg->dev = info->ht_dev;
-	ht_cfg->update = info->ht_update;
-	ht_cfg->pos = info->ht_pos;
-	ht_cfg->idx = 0x10 + (info->ht_idx * 2);
-	irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg,
-			    handle_edge_irq, ht_cfg, "edge");
-
-	return 0;
-}
-
-static void htirq_domain_free(struct irq_domain *domain, unsigned int virq,
-			      unsigned int nr_irqs)
-{
-	struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
-
-	BUG_ON(nr_irqs != 1);
-	kfree(irq_data->chip_data);
-	irq_domain_free_irqs_top(domain, virq, nr_irqs);
-}
-
-static void htirq_domain_activate(struct irq_domain *domain,
-				  struct irq_data *irq_data)
-{
-	struct ht_irq_msg msg;
-	struct irq_cfg *cfg = irqd_cfg(irq_data);
-
-	msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid);
-	msg.address_lo =
-		HT_IRQ_LOW_BASE |
-		HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) |
-		HT_IRQ_LOW_VECTOR(cfg->vector) |
-		((apic->irq_dest_mode == 0) ?
-			HT_IRQ_LOW_DM_PHYSICAL :
-			HT_IRQ_LOW_DM_LOGICAL) |
-		HT_IRQ_LOW_RQEOI_EDGE |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			HT_IRQ_LOW_MT_FIXED :
-			HT_IRQ_LOW_MT_ARBITRATED) |
-		HT_IRQ_LOW_IRQ_MASKED;
-	write_ht_irq_msg(irq_data->irq, &msg);
-}
-
-static void htirq_domain_deactivate(struct irq_domain *domain,
-				    struct irq_data *irq_data)
-{
-	struct ht_irq_msg msg;
-
-	memset(&msg, 0, sizeof(msg));
-	write_ht_irq_msg(irq_data->irq, &msg);
-}
-
-static const struct irq_domain_ops htirq_domain_ops = {
-	.alloc		= htirq_domain_alloc,
-	.free		= htirq_domain_free,
-	.activate	= htirq_domain_activate,
-	.deactivate	= htirq_domain_deactivate,
-};
-
-void __init arch_init_htirq_domain(struct irq_domain *parent)
-{
-	struct fwnode_handle *fn;
-
-	if (disable_apic)
-		return;
-
-	fn = irq_domain_alloc_named_fwnode("PCI-HT");
-	if (!fn)
-		goto warn;
-
-	htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL);
-	irq_domain_free_fwnode(fn);
-	if (!htirq_domain)
-		goto warn;
-
-	htirq_domain->parent = parent;
-	return;
-
-warn:
-	pr_warn("Failed to initialize irqdomain for HTIRQ.\n");
-}
-
-int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev,
-		      ht_irq_update_t *update)
-{
-	struct irq_alloc_info info;
-
-	if (!htirq_domain)
-		return -ENOSYS;
-
-	init_irq_alloc_info(&info, NULL);
-	info.ht_idx = idx;
-	info.ht_pos = pos;
-	info.ht_dev = dev;
-	info.ht_update = update;
-
-	return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev),
-				     &info);
-}
-
-void arch_teardown_ht_irq(unsigned int irq)
-{
-	irq_domain_free_irqs(irq, 1);
-}
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index d6f387780849..d1fc62a67320 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  HW NMI watchdog support
  *
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 70e48aa6af98..201579dc5242 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Intel IO-APIC support for multi-Pentium hosts.
  *
@@ -1013,6 +1014,7 @@ static int alloc_isa_irq_from_domain(struct irq_domain *domain,
 					  info->ioapic_pin))
 			return -ENOMEM;
 	} else {
+		info->flags |= X86_IRQ_ALLOC_LEGACY;
 		irq = __irq_domain_alloc_irqs(domain, irq, 1, node, info, true,
 					      NULL);
 		if (irq >= 0) {
@@ -1585,6 +1587,43 @@ static int __init notimercheck(char *s)
 }
 __setup("no_timer_check", notimercheck);
 
+static void __init delay_with_tsc(void)
+{
+	unsigned long long start, now;
+	unsigned long end = jiffies + 4;
+
+	start = rdtsc();
+
+	/*
+	 * We don't know the TSC frequency yet, but waiting for
+	 * 40000000000/HZ TSC cycles is safe:
+	 * 4 GHz == 10 jiffies
+	 * 1 GHz == 40 jiffies
+	 */
+	do {
+		rep_nop();
+		now = rdtsc();
+	} while ((now - start) < 40000000000UL / HZ &&
+		time_before_eq(jiffies, end));
+}
+
+static void __init delay_without_tsc(void)
+{
+	unsigned long end = jiffies + 4;
+	int band = 1;
+
+	/*
+	 * We don't know any frequency yet, but waiting for
+	 * 40940000000/HZ cycles is safe:
+	 * 4 GHz == 10 jiffies
+	 * 1 GHz == 40 jiffies
+	 * 1 << 1 + 1 << 2 +...+ 1 << 11 = 4094
+	 */
+	do {
+		__delay(((1U << band++) * 10000000UL) / HZ);
+	} while (band < 12 && time_before_eq(jiffies, end));
+}
+
 /*
  * There is a nasty bug in some older SMP boards, their mptable lies
  * about the timer IRQ. We do the following to work around the situation:
@@ -1603,8 +1642,12 @@ static int __init timer_irq_works(void)
 
 	local_save_flags(flags);
 	local_irq_enable();
-	/* Let ten ticks pass... */
-	mdelay((10 * 1000) / HZ);
+
+	if (boot_cpu_has(X86_FEATURE_TSC))
+		delay_with_tsc();
+	else
+		delay_without_tsc();
+
 	local_irq_restore(flags);
 
 	/*
@@ -1820,26 +1863,36 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data)
 	eoi_ioapic_pin(data->entry.vector, data);
 }
 
+static void ioapic_configure_entry(struct irq_data *irqd)
+{
+	struct mp_chip_data *mpd = irqd->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(irqd);
+	struct irq_pin_list *entry;
+
+	/*
+	 * Only update when the parent is the vector domain, don't touch it
+	 * if the parent is the remapping domain. Check the installed
+	 * ioapic chip to verify that.
+	 */
+	if (irqd->chip == &ioapic_chip) {
+		mpd->entry.dest = cfg->dest_apicid;
+		mpd->entry.vector = cfg->vector;
+	}
+	for_each_irq_pin(entry, mpd->irq_2_pin)
+		__ioapic_write_entry(entry->apic, entry->pin, mpd->entry);
+}
+
 static int ioapic_set_affinity(struct irq_data *irq_data,
 			       const struct cpumask *mask, bool force)
 {
 	struct irq_data *parent = irq_data->parent_data;
-	struct mp_chip_data *data = irq_data->chip_data;
-	struct irq_pin_list *entry;
-	struct irq_cfg *cfg;
 	unsigned long flags;
 	int ret;
 
 	ret = parent->chip->irq_set_affinity(parent, mask, force);
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
-		cfg = irqd_cfg(irq_data);
-		data->entry.dest = cfg->dest_apicid;
-		data->entry.vector = cfg->vector;
-		for_each_irq_pin(entry, data->irq_2_pin)
-			__ioapic_write_entry(entry->apic, entry->pin,
-					     data->entry);
-	}
+	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE)
+		ioapic_configure_entry(irq_data);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return ret;
@@ -2096,7 +2149,7 @@ static inline void __init check_timer(void)
 				unmask_ioapic_irq(irq_get_irq_data(0));
 		}
 		irq_domain_deactivate_irq(irq_data);
-		irq_domain_activate_irq(irq_data);
+		irq_domain_activate_irq(irq_data, false);
 		if (timer_irq_works()) {
 			if (disable_timer_pin_1 > 0)
 				clear_IO_APIC_pin(0, pin1);
@@ -2118,7 +2171,7 @@ static inline void __init check_timer(void)
 		 */
 		replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2);
 		irq_domain_deactivate_irq(irq_data);
-		irq_domain_activate_irq(irq_data);
+		irq_domain_activate_irq(irq_data, false);
 		legacy_pic->unmask(0);
 		if (timer_irq_works()) {
 			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2512,52 +2565,9 @@ int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
 }
 
 /*
- * This function currently is only a helper for the i386 smp boot process where
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
- * so mask in all cases should simply be apic->target_cpus()
+ * This function updates target affinity of IOAPIC interrupts to include
+ * the CPUs which came online during SMP bringup.
  */
-#ifdef CONFIG_SMP
-void __init setup_ioapic_dest(void)
-{
-	int pin, ioapic, irq, irq_entry;
-	const struct cpumask *mask;
-	struct irq_desc *desc;
-	struct irq_data *idata;
-	struct irq_chip *chip;
-
-	if (skip_ioapic_setup == 1)
-		return;
-
-	for_each_ioapic_pin(ioapic, pin) {
-		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
-		if (irq_entry == -1)
-			continue;
-
-		irq = pin_2_irq(irq_entry, ioapic, pin, 0);
-		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
-			continue;
-
-		desc = irq_to_desc(irq);
-		raw_spin_lock_irq(&desc->lock);
-		idata = irq_desc_get_irq_data(desc);
-
-		/*
-		 * Honour affinities which have been set in early boot
-		 */
-		if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
-			mask = irq_data_get_affinity_mask(idata);
-		else
-			mask = apic->target_cpus();
-
-		chip = irq_data_get_irq_chip(idata);
-		/* Might be lapic_chip for irq 0 */
-		if (chip->irq_set_affinity)
-			chip->irq_set_affinity(idata, mask, false);
-		raw_spin_unlock_irq(&desc->lock);
-	}
-}
-#endif
-
 #define IOAPIC_RESOURCE_NAME_SIZE 11
 
 static struct resource *ioapic_resources;
@@ -2977,17 +2987,15 @@ void mp_irqdomain_free(struct irq_domain *domain, unsigned int virq,
 	irq_domain_free_irqs_top(domain, virq, nr_irqs);
 }
 
-void mp_irqdomain_activate(struct irq_domain *domain,
-			   struct irq_data *irq_data)
+int mp_irqdomain_activate(struct irq_domain *domain,
+			  struct irq_data *irq_data, bool early)
 {
 	unsigned long flags;
-	struct irq_pin_list *entry;
-	struct mp_chip_data *data = irq_data->chip_data;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	for_each_irq_pin(entry, data->irq_2_pin)
-		__ioapic_write_entry(entry->apic, entry->pin, data->entry);
+	ioapic_configure_entry(irq_data);
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	return 0;
 }
 
 void mp_irqdomain_deactivate(struct irq_domain *domain,
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 3a205d4a12d0..82f9244fe61f 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
index 63287659adb6..fa22017de806 100644
--- a/arch/x86/kernel/apic/probe_32.c
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -66,6 +66,31 @@ static void setup_apic_flat_routing(void)
 #endif
 }
 
+static int default_apic_id_registered(void)
+{
+	return physid_isset(read_apic_id(), phys_cpu_present_map);
+}
+
+/*
+ * Set up the logical destination ID.  Intel recommends to set DFR, LDR and
+ * TPR before enabling an APIC.  See e.g. "AP-388 82489DX User's Manual"
+ * (Intel document number 292116).
+ */
+static void default_init_apic_ldr(void)
+{
+	unsigned long val;
+
+	apic_write(APIC_DFR, APIC_DFR_VALUE);
+	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+	apic_write(APIC_LDR, val);
+}
+
+static int default_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+	return cpuid_apic >> index_msb;
+}
+
 /* should be called last. */
 static int probe_default(void)
 {
@@ -84,12 +109,10 @@ static struct apic apic_default __ro_after_init = {
 	/* logical delivery broadcast to all CPUs: */
 	.irq_dest_mode			= 1,
 
-	.target_cpus			= default_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= default_check_apicid_used,
 
-	.vector_allocation_domain	= flat_vector_allocation_domain,
 	.init_apic_ldr			= default_init_apic_ldr,
 
 	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
@@ -102,7 +125,7 @@ static struct apic apic_default __ro_after_init = {
 	.get_apic_id			= default_get_apic_id,
 	.set_apic_id			= NULL,
 
-	.cpu_mask_to_apicid		= flat_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_flat_calc_apicid,
 
 	.send_IPI			= default_send_IPI_single,
 	.send_IPI_mask			= default_send_IPI_mask_logical,
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 88c214e75a6b..6a823a25eaff 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -1,5 +1,5 @@
 /*
- * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ * Local APIC related interfaces to support IOAPIC, MSI, etc.
  *
  * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
  *	Moved from arch/x86/kernel/apic/io_apic.c.
@@ -11,6 +11,7 @@
  * published by the Free Software Foundation.
  */
 #include <linux/interrupt.h>
+#include <linux/seq_file.h>
 #include <linux/init.h>
 #include <linux/compiler.h>
 #include <linux/slab.h>
@@ -21,20 +22,30 @@
 #include <asm/desc.h>
 #include <asm/irq_remapping.h>
 
+#include <asm/trace/irq_vectors.h>
+
 struct apic_chip_data {
-	struct irq_cfg		cfg;
-	cpumask_var_t		domain;
-	cpumask_var_t		old_domain;
-	u8			move_in_progress : 1;
+	struct irq_cfg		hw_irq_cfg;
+	unsigned int		vector;
+	unsigned int		prev_vector;
+	unsigned int		cpu;
+	unsigned int		prev_cpu;
+	unsigned int		irq;
+	struct hlist_node	clist;
+	unsigned int		move_in_progress	: 1,
+				is_managed		: 1,
+				can_reserve		: 1,
+				has_reserved		: 1;
 };
 
 struct irq_domain *x86_vector_domain;
 EXPORT_SYMBOL_GPL(x86_vector_domain);
 static DEFINE_RAW_SPINLOCK(vector_lock);
-static cpumask_var_t vector_cpumask, vector_searchmask, searched_cpumask;
+static cpumask_var_t vector_searchmask;
 static struct irq_chip lapic_controller;
-#ifdef	CONFIG_X86_IO_APIC
-static struct apic_chip_data *legacy_irq_data[NR_IRQS_LEGACY];
+static struct irq_matrix *vector_matrix;
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct hlist_head, cleanup_list);
 #endif
 
 void lock_vector_lock(void)
@@ -50,22 +61,37 @@ void unlock_vector_lock(void)
 	raw_spin_unlock(&vector_lock);
 }
 
-static struct apic_chip_data *apic_chip_data(struct irq_data *irq_data)
+void init_irq_alloc_info(struct irq_alloc_info *info,
+			 const struct cpumask *mask)
+{
+	memset(info, 0, sizeof(*info));
+	info->mask = mask;
+}
+
+void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
 {
-	if (!irq_data)
+	if (src)
+		*dst = *src;
+	else
+		memset(dst, 0, sizeof(*dst));
+}
+
+static struct apic_chip_data *apic_chip_data(struct irq_data *irqd)
+{
+	if (!irqd)
 		return NULL;
 
-	while (irq_data->parent_data)
-		irq_data = irq_data->parent_data;
+	while (irqd->parent_data)
+		irqd = irqd->parent_data;
 
-	return irq_data->chip_data;
+	return irqd->chip_data;
 }
 
-struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+struct irq_cfg *irqd_cfg(struct irq_data *irqd)
 {
-	struct apic_chip_data *data = apic_chip_data(irq_data);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 
-	return data ? &data->cfg : NULL;
+	return apicd ? &apicd->hw_irq_cfg : NULL;
 }
 EXPORT_SYMBOL_GPL(irqd_cfg);
 
@@ -76,270 +102,395 @@ struct irq_cfg *irq_cfg(unsigned int irq)
 
 static struct apic_chip_data *alloc_apic_chip_data(int node)
 {
-	struct apic_chip_data *data;
+	struct apic_chip_data *apicd;
 
-	data = kzalloc_node(sizeof(*data), GFP_KERNEL, node);
-	if (!data)
-		return NULL;
-	if (!zalloc_cpumask_var_node(&data->domain, GFP_KERNEL, node))
-		goto out_data;
-	if (!zalloc_cpumask_var_node(&data->old_domain, GFP_KERNEL, node))
-		goto out_domain;
-	return data;
-out_domain:
-	free_cpumask_var(data->domain);
-out_data:
-	kfree(data);
-	return NULL;
-}
-
-static void free_apic_chip_data(struct apic_chip_data *data)
-{
-	if (data) {
-		free_cpumask_var(data->domain);
-		free_cpumask_var(data->old_domain);
-		kfree(data);
+	apicd = kzalloc_node(sizeof(*apicd), GFP_KERNEL, node);
+	if (apicd)
+		INIT_HLIST_NODE(&apicd->clist);
+	return apicd;
+}
+
+static void free_apic_chip_data(struct apic_chip_data *apicd)
+{
+	kfree(apicd);
+}
+
+static void apic_update_irq_cfg(struct irq_data *irqd, unsigned int vector,
+				unsigned int cpu)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+	lockdep_assert_held(&vector_lock);
+
+	apicd->hw_irq_cfg.vector = vector;
+	apicd->hw_irq_cfg.dest_apicid = apic->calc_dest_apicid(cpu);
+	irq_data_update_effective_affinity(irqd, cpumask_of(cpu));
+	trace_vector_config(irqd->irq, vector, cpu,
+			    apicd->hw_irq_cfg.dest_apicid);
+}
+
+static void apic_update_vector(struct irq_data *irqd, unsigned int newvec,
+			       unsigned int newcpu)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	struct irq_desc *desc = irq_data_to_desc(irqd);
+
+	lockdep_assert_held(&vector_lock);
+
+	trace_vector_update(irqd->irq, newvec, newcpu, apicd->vector,
+			    apicd->cpu);
+
+	/* Setup the vector move, if required  */
+	if (apicd->vector && cpu_online(apicd->cpu)) {
+		apicd->move_in_progress = true;
+		apicd->prev_vector = apicd->vector;
+		apicd->prev_cpu = apicd->cpu;
+	} else {
+		apicd->prev_vector = 0;
 	}
+
+	apicd->vector = newvec;
+	apicd->cpu = newcpu;
+	BUG_ON(!IS_ERR_OR_NULL(per_cpu(vector_irq, newcpu)[newvec]));
+	per_cpu(vector_irq, newcpu)[newvec] = desc;
 }
 
-static int __assign_irq_vector(int irq, struct apic_chip_data *d,
-			       const struct cpumask *mask,
-			       struct irq_data *irqdata)
+static void vector_assign_managed_shutdown(struct irq_data *irqd)
 {
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, vector;
+	unsigned int cpu = cpumask_first(cpu_online_mask);
 
-	/*
-	 * If there is still a move in progress or the previous move has not
-	 * been cleaned up completely, tell the caller to come back later.
-	 */
-	if (d->move_in_progress ||
-	    cpumask_intersects(d->old_domain, cpu_online_mask))
-		return -EBUSY;
+	apic_update_irq_cfg(irqd, MANAGED_IRQ_SHUTDOWN_VECTOR, cpu);
+}
 
-	/* Only try and allocate irqs on cpus that are present */
-	cpumask_clear(d->old_domain);
-	cpumask_clear(searched_cpumask);
-	cpu = cpumask_first_and(mask, cpu_online_mask);
-	while (cpu < nr_cpu_ids) {
-		int new_cpu, offset;
+static int reserve_managed_vector(struct irq_data *irqd)
+{
+	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	unsigned long flags;
+	int ret;
 
-		/* Get the possible target cpus for @mask/@cpu from the apic */
-		apic->vector_allocation_domain(cpu, vector_cpumask, mask);
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	apicd->is_managed = true;
+	ret = irq_matrix_reserve_managed(vector_matrix, affmsk);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	trace_vector_reserve_managed(irqd->irq, ret);
+	return ret;
+}
 
-		/*
-		 * Clear the offline cpus from @vector_cpumask for searching
-		 * and verify whether the result overlaps with @mask. If true,
-		 * then the call to apic->cpu_mask_to_apicid() will
-		 * succeed as well. If not, no point in trying to find a
-		 * vector in this mask.
-		 */
-		cpumask_and(vector_searchmask, vector_cpumask, cpu_online_mask);
-		if (!cpumask_intersects(vector_searchmask, mask))
-			goto next_cpu;
-
-		if (cpumask_subset(vector_cpumask, d->domain)) {
-			if (cpumask_equal(vector_cpumask, d->domain))
-				goto success;
-			/*
-			 * Mark the cpus which are not longer in the mask for
-			 * cleanup.
-			 */
-			cpumask_andnot(d->old_domain, d->domain, vector_cpumask);
-			vector = d->cfg.vector;
-			goto update;
-		}
+static void reserve_irq_vector_locked(struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 16;
-		if (vector >= FIRST_SYSTEM_VECTOR) {
-			offset = (offset + 1) % 16;
-			vector = FIRST_EXTERNAL_VECTOR + offset;
-		}
+	irq_matrix_reserve(vector_matrix);
+	apicd->can_reserve = true;
+	apicd->has_reserved = true;
+	trace_vector_reserve(irqd->irq, 0);
+	vector_assign_managed_shutdown(irqd);
+}
 
-		/* If the search wrapped around, try the next cpu */
-		if (unlikely(current_vector == vector))
-			goto next_cpu;
+static int reserve_irq_vector(struct irq_data *irqd)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	reserve_irq_vector_locked(irqd);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return 0;
+}
 
-		if (test_bit(vector, used_vectors))
-			goto next;
+static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	bool resvd = apicd->has_reserved;
+	unsigned int cpu = apicd->cpu;
+	int vector = apicd->vector;
 
-		for_each_cpu(new_cpu, vector_searchmask) {
-			if (!IS_ERR_OR_NULL(per_cpu(vector_irq, new_cpu)[vector]))
-				goto next;
-		}
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		/* Schedule the old vector for cleanup on all cpus */
-		if (d->cfg.vector)
-			cpumask_copy(d->old_domain, d->domain);
-		for_each_cpu(new_cpu, vector_searchmask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq_to_desc(irq);
-		goto update;
-
-next_cpu:
-		/*
-		 * We exclude the current @vector_cpumask from the requested
-		 * @mask and try again with the next online cpu in the
-		 * result. We cannot modify @mask, so we use @vector_cpumask
-		 * as a temporary buffer here as it will be reassigned when
-		 * calling apic->vector_allocation_domain() above.
-		 */
-		cpumask_or(searched_cpumask, searched_cpumask, vector_cpumask);
-		cpumask_andnot(vector_cpumask, mask, searched_cpumask);
-		cpu = cpumask_first_and(vector_cpumask, cpu_online_mask);
-		continue;
-	}
-	return -ENOSPC;
+	lockdep_assert_held(&vector_lock);
 
-update:
 	/*
-	 * Exclude offline cpus from the cleanup mask and set the
-	 * move_in_progress flag when the result is not empty.
+	 * If the current target CPU is online and in the new requested
+	 * affinity mask, there is no point in moving the interrupt from
+	 * one CPU to another.
 	 */
-	cpumask_and(d->old_domain, d->old_domain, cpu_online_mask);
-	d->move_in_progress = !cpumask_empty(d->old_domain);
-	d->cfg.old_vector = d->move_in_progress ? d->cfg.vector : 0;
-	d->cfg.vector = vector;
-	cpumask_copy(d->domain, vector_cpumask);
-success:
-	/*
-	 * Cache destination APIC IDs into cfg->dest_apicid. This cannot fail
-	 * as we already established, that mask & d->domain & cpu_online_mask
-	 * is not empty.
-	 *
-	 * vector_searchmask is a subset of d->domain and has the offline
-	 * cpus masked out.
-	 */
-	cpumask_and(vector_searchmask, vector_searchmask, mask);
-	BUG_ON(apic->cpu_mask_to_apicid(vector_searchmask, irqdata,
-					&d->cfg.dest_apicid));
+	if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest))
+		return 0;
+
+	vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu);
+	if (vector > 0)
+		apic_update_vector(irqd, vector, cpu);
+	trace_vector_alloc(irqd->irq, vector, resvd, vector);
+	return vector;
+}
+
+static int assign_vector_locked(struct irq_data *irqd,
+				const struct cpumask *dest)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	int vector = allocate_vector(irqd, dest);
+
+	if (vector < 0)
+		return vector;
+
+	apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
 	return 0;
 }
 
-static int assign_irq_vector(int irq, struct apic_chip_data *data,
-			     const struct cpumask *mask,
-			     struct irq_data *irqdata)
+static int assign_irq_vector(struct irq_data *irqd, const struct cpumask *dest)
 {
-	int err;
 	unsigned long flags;
+	int ret;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, data, mask, irqdata);
+	cpumask_and(vector_searchmask, dest, cpu_online_mask);
+	ret = assign_vector_locked(irqd, vector_searchmask);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
+	return ret;
 }
 
-static int assign_irq_vector_policy(int irq, int node,
-				    struct apic_chip_data *data,
-				    struct irq_alloc_info *info,
-				    struct irq_data *irqdata)
+static int assign_irq_vector_any_locked(struct irq_data *irqd)
 {
-	if (info && info->mask)
-		return assign_irq_vector(irq, data, info->mask, irqdata);
-	if (node != NUMA_NO_NODE &&
-	    assign_irq_vector(irq, data, cpumask_of_node(node), irqdata) == 0)
+	/* Get the affinity mask - either irq_default_affinity or (user) set */
+	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+	int node = irq_data_get_node(irqd);
+
+	if (node == NUMA_NO_NODE)
+		goto all;
+	/* Try the intersection of @affmsk and node mask */
+	cpumask_and(vector_searchmask, cpumask_of_node(node), affmsk);
+	if (!assign_vector_locked(irqd, vector_searchmask))
+		return 0;
+	/* Try the node mask */
+	if (!assign_vector_locked(irqd, cpumask_of_node(node)))
 		return 0;
-	return assign_irq_vector(irq, data, apic->target_cpus(), irqdata);
+all:
+	/* Try the full affinity mask */
+	cpumask_and(vector_searchmask, affmsk, cpu_online_mask);
+	if (!assign_vector_locked(irqd, vector_searchmask))
+		return 0;
+	/* Try the full online mask */
+	return assign_vector_locked(irqd, cpu_online_mask);
+}
+
+static int
+assign_irq_vector_policy(struct irq_data *irqd, struct irq_alloc_info *info)
+{
+	if (irqd_affinity_is_managed(irqd))
+		return reserve_managed_vector(irqd);
+	if (info->mask)
+		return assign_irq_vector(irqd, info->mask);
+	/*
+	 * Make only a global reservation with no guarantee. A real vector
+	 * is associated at activation time.
+	 */
+	return reserve_irq_vector(irqd);
 }
 
-static void clear_irq_vector(int irq, struct apic_chip_data *data)
+static int
+assign_managed_vector(struct irq_data *irqd, const struct cpumask *dest)
 {
-	struct irq_desc *desc;
-	int cpu, vector;
+	const struct cpumask *affmsk = irq_data_get_affinity_mask(irqd);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	int vector, cpu;
 
-	if (!data->cfg.vector)
+	cpumask_and(vector_searchmask, vector_searchmask, affmsk);
+	cpu = cpumask_first(vector_searchmask);
+	if (cpu >= nr_cpu_ids)
+		return -EINVAL;
+	/* set_affinity might call here for nothing */
+	if (apicd->vector && cpumask_test_cpu(apicd->cpu, vector_searchmask))
+		return 0;
+	vector = irq_matrix_alloc_managed(vector_matrix, cpu);
+	trace_vector_alloc_managed(irqd->irq, vector, vector);
+	if (vector < 0)
+		return vector;
+	apic_update_vector(irqd, vector, cpu);
+	apic_update_irq_cfg(irqd, vector, cpu);
+	return 0;
+}
+
+static void clear_irq_vector(struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	bool managed = irqd_affinity_is_managed(irqd);
+	unsigned int vector = apicd->vector;
+
+	lockdep_assert_held(&vector_lock);
+
+	if (!vector)
 		return;
 
-	vector = data->cfg.vector;
-	for_each_cpu_and(cpu, data->domain, cpu_online_mask)
-		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
+	trace_vector_clear(irqd->irq, vector, apicd->cpu, apicd->prev_vector,
+			   apicd->prev_cpu);
 
-	data->cfg.vector = 0;
-	cpumask_clear(data->domain);
+	per_cpu(vector_irq, apicd->cpu)[vector] = VECTOR_UNUSED;
+	irq_matrix_free(vector_matrix, apicd->cpu, vector, managed);
+	apicd->vector = 0;
 
-	/*
-	 * If move is in progress or the old_domain mask is not empty,
-	 * i.e. the cleanup IPI has not been processed yet, we need to remove
-	 * the old references to desc from all cpus vector tables.
-	 */
-	if (!data->move_in_progress && cpumask_empty(data->old_domain))
+	/* Clean up move in progress */
+	vector = apicd->prev_vector;
+	if (!vector)
 		return;
 
-	desc = irq_to_desc(irq);
-	for_each_cpu_and(cpu, data->old_domain, cpu_online_mask) {
-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
-		     vector++) {
-			if (per_cpu(vector_irq, cpu)[vector] != desc)
-				continue;
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-			break;
-		}
+	per_cpu(vector_irq, apicd->prev_cpu)[vector] = VECTOR_UNUSED;
+	irq_matrix_free(vector_matrix, apicd->prev_cpu, vector, managed);
+	apicd->prev_vector = 0;
+	apicd->move_in_progress = 0;
+	hlist_del_init(&apicd->clist);
+}
+
+static void x86_vector_deactivate(struct irq_domain *dom, struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	unsigned long flags;
+
+	trace_vector_deactivate(irqd->irq, apicd->is_managed,
+				apicd->can_reserve, false);
+
+	/* Regular fixed assigned interrupt */
+	if (!apicd->is_managed && !apicd->can_reserve)
+		return;
+	/* If the interrupt has a global reservation, nothing to do */
+	if (apicd->has_reserved)
+		return;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	clear_irq_vector(irqd);
+	if (apicd->can_reserve)
+		reserve_irq_vector_locked(irqd);
+	else
+		vector_assign_managed_shutdown(irqd);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+static int activate_reserved(struct irq_data *irqd)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	int ret;
+
+	ret = assign_irq_vector_any_locked(irqd);
+	if (!ret)
+		apicd->has_reserved = false;
+	return ret;
+}
+
+static int activate_managed(struct irq_data *irqd)
+{
+	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+	int ret;
+
+	cpumask_and(vector_searchmask, dest, cpu_online_mask);
+	if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
+		/* Something in the core code broke! Survive gracefully */
+		pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
+		return EINVAL;
+	}
+
+	ret = assign_managed_vector(irqd, vector_searchmask);
+	/*
+	 * This should not happen. The vector reservation got buggered.  Handle
+	 * it gracefully.
+	 */
+	if (WARN_ON_ONCE(ret < 0)) {
+		pr_err("Managed startup irq %u, no vector available\n",
+		       irqd->irq);
 	}
-	data->move_in_progress = 0;
+       return ret;
 }
 
-void init_irq_alloc_info(struct irq_alloc_info *info,
-			 const struct cpumask *mask)
+static int x86_vector_activate(struct irq_domain *dom, struct irq_data *irqd,
+			       bool early)
 {
-	memset(info, 0, sizeof(*info));
-	info->mask = mask;
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	unsigned long flags;
+	int ret = 0;
+
+	trace_vector_activate(irqd->irq, apicd->is_managed,
+			      apicd->can_reserve, early);
+
+	/* Nothing to do for fixed assigned vectors */
+	if (!apicd->can_reserve && !apicd->is_managed)
+		return 0;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	if (early || irqd_is_managed_and_shutdown(irqd))
+		vector_assign_managed_shutdown(irqd);
+	else if (apicd->is_managed)
+		ret = activate_managed(irqd);
+	else if (apicd->has_reserved)
+		ret = activate_reserved(irqd);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return ret;
 }
 
-void copy_irq_alloc_info(struct irq_alloc_info *dst, struct irq_alloc_info *src)
+static void vector_free_reserved_and_managed(struct irq_data *irqd)
 {
-	if (src)
-		*dst = *src;
-	else
-		memset(dst, 0, sizeof(*dst));
+	const struct cpumask *dest = irq_data_get_affinity_mask(irqd);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+
+	trace_vector_teardown(irqd->irq, apicd->is_managed,
+			      apicd->has_reserved);
+
+	if (apicd->has_reserved)
+		irq_matrix_remove_reserved(vector_matrix);
+	if (apicd->is_managed)
+		irq_matrix_remove_managed(vector_matrix, dest);
 }
 
 static void x86_vector_free_irqs(struct irq_domain *domain,
 				 unsigned int virq, unsigned int nr_irqs)
 {
-	struct apic_chip_data *apic_data;
-	struct irq_data *irq_data;
+	struct apic_chip_data *apicd;
+	struct irq_data *irqd;
 	unsigned long flags;
 	int i;
 
 	for (i = 0; i < nr_irqs; i++) {
-		irq_data = irq_domain_get_irq_data(x86_vector_domain, virq + i);
-		if (irq_data && irq_data->chip_data) {
+		irqd = irq_domain_get_irq_data(x86_vector_domain, virq + i);
+		if (irqd && irqd->chip_data) {
 			raw_spin_lock_irqsave(&vector_lock, flags);
-			clear_irq_vector(virq + i, irq_data->chip_data);
-			apic_data = irq_data->chip_data;
-			irq_domain_reset_irq_data(irq_data);
+			clear_irq_vector(irqd);
+			vector_free_reserved_and_managed(irqd);
+			apicd = irqd->chip_data;
+			irq_domain_reset_irq_data(irqd);
 			raw_spin_unlock_irqrestore(&vector_lock, flags);
-			free_apic_chip_data(apic_data);
-#ifdef	CONFIG_X86_IO_APIC
-			if (virq + i < nr_legacy_irqs())
-				legacy_irq_data[virq + i] = NULL;
-#endif
+			free_apic_chip_data(apicd);
 		}
 	}
 }
 
+static bool vector_configure_legacy(unsigned int virq, struct irq_data *irqd,
+				    struct apic_chip_data *apicd)
+{
+	unsigned long flags;
+	bool realloc = false;
+
+	apicd->vector = ISA_IRQ_VECTOR(virq);
+	apicd->cpu = 0;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	/*
+	 * If the interrupt is activated, then it must stay at this vector
+	 * position. That's usually the timer interrupt (0).
+	 */
+	if (irqd_is_activated(irqd)) {
+		trace_vector_setup(virq, true, 0);
+		apic_update_irq_cfg(irqd, apicd->vector, apicd->cpu);
+	} else {
+		/* Release the vector */
+		apicd->can_reserve = true;
+		clear_irq_vector(irqd);
+		realloc = true;
+	}
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return realloc;
+}
+
 static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 				 unsigned int nr_irqs, void *arg)
 {
 	struct irq_alloc_info *info = arg;
-	struct apic_chip_data *data;
-	struct irq_data *irq_data;
+	struct apic_chip_data *apicd;
+	struct irq_data *irqd;
 	int i, err, node;
 
 	if (disable_apic)
@@ -350,34 +501,37 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq,
 		return -ENOSYS;
 
 	for (i = 0; i < nr_irqs; i++) {
-		irq_data = irq_domain_get_irq_data(domain, virq + i);
-		BUG_ON(!irq_data);
-		node = irq_data_get_node(irq_data);
-#ifdef	CONFIG_X86_IO_APIC
-		if (virq + i < nr_legacy_irqs() && legacy_irq_data[virq + i])
-			data = legacy_irq_data[virq + i];
-		else
-#endif
-			data = alloc_apic_chip_data(node);
-		if (!data) {
+		irqd = irq_domain_get_irq_data(domain, virq + i);
+		BUG_ON(!irqd);
+		node = irq_data_get_node(irqd);
+		WARN_ON_ONCE(irqd->chip_data);
+		apicd = alloc_apic_chip_data(node);
+		if (!apicd) {
 			err = -ENOMEM;
 			goto error;
 		}
 
-		irq_data->chip = &lapic_controller;
-		irq_data->chip_data = data;
-		irq_data->hwirq = virq + i;
-		err = assign_irq_vector_policy(virq + i, node, data, info,
-					       irq_data);
-		if (err)
-			goto error;
+		apicd->irq = virq + i;
+		irqd->chip = &lapic_controller;
+		irqd->chip_data = apicd;
+		irqd->hwirq = virq + i;
+		irqd_set_single_target(irqd);
 		/*
-		 * If the apic destination mode is physical, then the
-		 * effective affinity is restricted to a single target
-		 * CPU. Mark the interrupt accordingly.
+		 * Legacy vectors are already assigned when the IOAPIC
+		 * takes them over. They stay on the same vector. This is
+		 * required for check_timer() to work correctly as it might
+		 * switch back to legacy mode. Only update the hardware
+		 * config.
 		 */
-		if (!apic->irq_dest_mode)
-			irqd_set_single_target(irq_data);
+		if (info->flags & X86_IRQ_ALLOC_LEGACY) {
+			if (!vector_configure_legacy(virq + i, irqd, apicd))
+				continue;
+		}
+
+		err = assign_irq_vector_policy(irqd, info);
+		trace_vector_setup(virq + i, false, err);
+		if (err)
+			goto error;
 	}
 
 	return 0;
@@ -387,9 +541,56 @@ error:
 	return err;
 }
 
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d,
+			   struct irq_data *irqd, int ind)
+{
+	unsigned int cpu, vector, prev_cpu, prev_vector;
+	struct apic_chip_data *apicd;
+	unsigned long flags;
+	int irq;
+
+	if (!irqd) {
+		irq_matrix_debug_show(m, vector_matrix, ind);
+		return;
+	}
+
+	irq = irqd->irq;
+	if (irq < nr_legacy_irqs() && !test_bit(irq, &io_apic_irqs)) {
+		seq_printf(m, "%*sVector: %5d\n", ind, "", ISA_IRQ_VECTOR(irq));
+		seq_printf(m, "%*sTarget: Legacy PIC all CPUs\n", ind, "");
+		return;
+	}
+
+	apicd = irqd->chip_data;
+	if (!apicd) {
+		seq_printf(m, "%*sVector: Not assigned\n", ind, "");
+		return;
+	}
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	cpu = apicd->cpu;
+	vector = apicd->vector;
+	prev_cpu = apicd->prev_cpu;
+	prev_vector = apicd->prev_vector;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	seq_printf(m, "%*sVector: %5u\n", ind, "", vector);
+	seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu);
+	if (prev_vector) {
+		seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector);
+		seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu);
+	}
+}
+#endif
+
 static const struct irq_domain_ops x86_vector_domain_ops = {
-	.alloc	= x86_vector_alloc_irqs,
-	.free	= x86_vector_free_irqs,
+	.alloc		= x86_vector_alloc_irqs,
+	.free		= x86_vector_free_irqs,
+	.activate	= x86_vector_activate,
+	.deactivate	= x86_vector_deactivate,
+#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
+	.debug_show	= x86_vector_debug_show,
+#endif
 };
 
 int __init arch_probe_nr_irqs(void)
@@ -400,7 +601,7 @@ int __init arch_probe_nr_irqs(void)
 		nr_irqs = NR_VECTORS * nr_cpu_ids;
 
 	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+#if defined(CONFIG_PCI_MSI)
 	/*
 	 * for MSI and HT dyn irq
 	 */
@@ -419,35 +620,40 @@ int __init arch_probe_nr_irqs(void)
 	return legacy_pic->probe();
 }
 
-#ifdef	CONFIG_X86_IO_APIC
-static void __init init_legacy_irqs(void)
+void lapic_assign_legacy_vector(unsigned int irq, bool replace)
 {
-	int i, node = cpu_to_node(0);
-	struct apic_chip_data *data;
-
 	/*
-	 * For legacy IRQ's, start with assigning irq0 to irq15 to
-	 * ISA_IRQ_VECTOR(i) for all cpu's.
+	 * Use assign system here so it wont get accounted as allocated
+	 * and moveable in the cpu hotplug check and it prevents managed
+	 * irq reservation from touching it.
 	 */
-	for (i = 0; i < nr_legacy_irqs(); i++) {
-		data = legacy_irq_data[i] = alloc_apic_chip_data(node);
-		BUG_ON(!data);
+	irq_matrix_assign_system(vector_matrix, ISA_IRQ_VECTOR(irq), replace);
+}
+
+void __init lapic_assign_system_vectors(void)
+{
+	unsigned int i, vector = 0;
 
-		data->cfg.vector = ISA_IRQ_VECTOR(i);
-		cpumask_setall(data->domain);
-		irq_set_chip_data(i, data);
+	for_each_set_bit_from(vector, system_vectors, NR_VECTORS)
+		irq_matrix_assign_system(vector_matrix, vector, false);
+
+	if (nr_legacy_irqs() > 1)
+		lapic_assign_legacy_vector(PIC_CASCADE_IR, false);
+
+	/* System vectors are reserved, online it */
+	irq_matrix_online(vector_matrix);
+
+	/* Mark the preallocated legacy interrupts */
+	for (i = 0; i < nr_legacy_irqs(); i++) {
+		if (i != PIC_CASCADE_IR)
+			irq_matrix_assign(vector_matrix, ISA_IRQ_VECTOR(i));
 	}
 }
-#else
-static inline void init_legacy_irqs(void) { }
-#endif
 
 int __init arch_early_irq_init(void)
 {
 	struct fwnode_handle *fn;
 
-	init_legacy_irqs();
-
 	fn = irq_domain_alloc_named_fwnode("VECTOR");
 	BUG_ON(!fn);
 	x86_vector_domain = irq_domain_create_tree(fn, &x86_vector_domain_ops,
@@ -457,102 +663,116 @@ int __init arch_early_irq_init(void)
 	irq_set_default_host(x86_vector_domain);
 
 	arch_init_msi_domain(x86_vector_domain);
-	arch_init_htirq_domain(x86_vector_domain);
 
-	BUG_ON(!alloc_cpumask_var(&vector_cpumask, GFP_KERNEL));
 	BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL));
-	BUG_ON(!alloc_cpumask_var(&searched_cpumask, GFP_KERNEL));
+
+	/*
+	 * Allocate the vector matrix allocator data structure and limit the
+	 * search area.
+	 */
+	vector_matrix = irq_alloc_matrix(NR_VECTORS, FIRST_EXTERNAL_VECTOR,
+					 FIRST_SYSTEM_VECTOR);
+	BUG_ON(!vector_matrix);
 
 	return arch_early_ioapic_init();
 }
 
-/* Initialize vector_irq on a new cpu */
-static void __setup_vector_irq(int cpu)
+#ifdef CONFIG_SMP
+
+static struct irq_desc *__setup_vector_irq(int vector)
 {
-	struct apic_chip_data *data;
-	struct irq_desc *desc;
-	int irq, vector;
+	int isairq = vector - ISA_IRQ_VECTOR(0);
+
+	/* Check whether the irq is in the legacy space */
+	if (isairq < 0 || isairq >= nr_legacy_irqs())
+		return VECTOR_UNUSED;
+	/* Check whether the irq is handled by the IOAPIC */
+	if (test_bit(isairq, &io_apic_irqs))
+		return VECTOR_UNUSED;
+	return irq_to_desc(isairq);
+}
 
-	/* Mark the inuse vectors */
-	for_each_irq_desc(irq, desc) {
-		struct irq_data *idata = irq_desc_get_irq_data(desc);
+/* Online the local APIC infrastructure and initialize the vectors */
+void lapic_online(void)
+{
+	unsigned int vector;
 
-		data = apic_chip_data(idata);
-		if (!data || !cpumask_test_cpu(cpu, data->domain))
-			continue;
-		vector = data->cfg.vector;
-		per_cpu(vector_irq, cpu)[vector] = desc;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		desc = per_cpu(vector_irq, cpu)[vector];
-		if (IS_ERR_OR_NULL(desc))
-			continue;
+	lockdep_assert_held(&vector_lock);
 
-		data = apic_chip_data(irq_desc_get_irq_data(desc));
-		if (!cpumask_test_cpu(cpu, data->domain))
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
-	}
+	/* Online the vector matrix array for this CPU */
+	irq_matrix_online(vector_matrix);
+
+	/*
+	 * The interrupt affinity logic never targets interrupts to offline
+	 * CPUs. The exception are the legacy PIC interrupts. In general
+	 * they are only targeted to CPU0, but depending on the platform
+	 * they can be distributed to any online CPU in hardware. The
+	 * kernel has no influence on that. So all active legacy vectors
+	 * must be installed on all CPUs. All non legacy interrupts can be
+	 * cleared.
+	 */
+	for (vector = 0; vector < NR_VECTORS; vector++)
+		this_cpu_write(vector_irq[vector], __setup_vector_irq(vector));
 }
 
-/*
- * Setup the vector to irq mappings. Must be called with vector_lock held.
- */
-void setup_vector_irq(int cpu)
+void lapic_offline(void)
 {
-	int irq;
+	lock_vector_lock();
+	irq_matrix_offline(vector_matrix);
+	unlock_vector_lock();
+}
+
+static int apic_set_affinity(struct irq_data *irqd,
+			     const struct cpumask *dest, bool force)
+{
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
+	int err;
 
-	lockdep_assert_held(&vector_lock);
 	/*
-	 * On most of the platforms, legacy PIC delivers the interrupts on the
-	 * boot cpu. But there are certain platforms where PIC interrupts are
-	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
-	 * legacy PIC, for the new cpu that is coming online, setup the static
-	 * legacy vector to irq mapping:
+	 * Core code can call here for inactive interrupts. For inactive
+	 * interrupts which use managed or reservation mode there is no
+	 * point in going through the vector assignment right now as the
+	 * activation will assign a vector which fits the destination
+	 * cpumask. Let the core code store the destination mask and be
+	 * done with it.
 	 */
-	for (irq = 0; irq < nr_legacy_irqs(); irq++)
-		per_cpu(vector_irq, cpu)[ISA_IRQ_VECTOR(irq)] = irq_to_desc(irq);
+	if (!irqd_is_activated(irqd) &&
+	    (apicd->is_managed || apicd->can_reserve))
+		return IRQ_SET_MASK_OK;
 
-	__setup_vector_irq(cpu);
+	raw_spin_lock(&vector_lock);
+	cpumask_and(vector_searchmask, dest, cpu_online_mask);
+	if (irqd_affinity_is_managed(irqd))
+		err = assign_managed_vector(irqd, vector_searchmask);
+	else
+		err = assign_vector_locked(irqd, vector_searchmask);
+	raw_spin_unlock(&vector_lock);
+	return err ? err : IRQ_SET_MASK_OK;
 }
 
-static int apic_retrigger_irq(struct irq_data *irq_data)
+#else
+# define apic_set_affinity	NULL
+#endif
+
+static int apic_retrigger_irq(struct irq_data *irqd)
 {
-	struct apic_chip_data *data = apic_chip_data(irq_data);
+	struct apic_chip_data *apicd = apic_chip_data(irqd);
 	unsigned long flags;
-	int cpu;
 
 	raw_spin_lock_irqsave(&vector_lock, flags);
-	cpu = cpumask_first_and(data->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), data->cfg.vector);
+	apic->send_IPI(apicd->cpu, apicd->vector);
 	raw_spin_unlock_irqrestore(&vector_lock, flags);
 
 	return 1;
 }
 
-void apic_ack_edge(struct irq_data *data)
+void apic_ack_edge(struct irq_data *irqd)
 {
-	irq_complete_move(irqd_cfg(data));
-	irq_move_irq(data);
+	irq_complete_move(irqd_cfg(irqd));
+	irq_move_irq(irqd);
 	ack_APIC_irq();
 }
 
-static int apic_set_affinity(struct irq_data *irq_data,
-			     const struct cpumask *dest, bool force)
-{
-	struct apic_chip_data *data = irq_data->chip_data;
-	int err, irq = irq_data->irq;
-
-	if (!IS_ENABLED(CONFIG_SMP))
-		return -EPERM;
-
-	if (!cpumask_intersects(dest, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irq, data, dest, irq_data);
-	return err ? err : IRQ_SET_MASK_OK;
-}
-
 static struct irq_chip lapic_controller = {
 	.name			= "APIC",
 	.irq_ack		= apic_ack_edge,
@@ -561,115 +781,98 @@ static struct irq_chip lapic_controller = {
 };
 
 #ifdef CONFIG_SMP
-static void __send_cleanup_vector(struct apic_chip_data *data)
-{
-	raw_spin_lock(&vector_lock);
-	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-	data->move_in_progress = 0;
-	if (!cpumask_empty(data->old_domain))
-		apic->send_IPI_mask(data->old_domain, IRQ_MOVE_CLEANUP_VECTOR);
-	raw_spin_unlock(&vector_lock);
-}
 
-void send_cleanup_vector(struct irq_cfg *cfg)
+static void free_moved_vector(struct apic_chip_data *apicd)
 {
-	struct apic_chip_data *data;
+	unsigned int vector = apicd->prev_vector;
+	unsigned int cpu = apicd->prev_cpu;
+	bool managed = apicd->is_managed;
 
-	data = container_of(cfg, struct apic_chip_data, cfg);
-	if (data->move_in_progress)
-		__send_cleanup_vector(data);
+	/*
+	 * This should never happen. Managed interrupts are not
+	 * migrated except on CPU down, which does not involve the
+	 * cleanup vector. But try to keep the accounting correct
+	 * nevertheless.
+	 */
+	WARN_ON_ONCE(managed);
+
+	trace_vector_free_moved(apicd->irq, cpu, vector, managed);
+	irq_matrix_free(vector_matrix, cpu, vector, managed);
+	per_cpu(vector_irq, cpu)[vector] = VECTOR_UNUSED;
+	hlist_del_init(&apicd->clist);
+	apicd->prev_vector = 0;
+	apicd->move_in_progress = 0;
 }
 
 asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void)
 {
-	unsigned vector, me;
+	struct hlist_head *clhead = this_cpu_ptr(&cleanup_list);
+	struct apic_chip_data *apicd;
+	struct hlist_node *tmp;
 
 	entering_ack_irq();
-
 	/* Prevent vectors vanishing under us */
 	raw_spin_lock(&vector_lock);
 
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		struct apic_chip_data *data;
-		struct irq_desc *desc;
-		unsigned int irr;
-
-	retry:
-		desc = __this_cpu_read(vector_irq[vector]);
-		if (IS_ERR_OR_NULL(desc))
-			continue;
-
-		if (!raw_spin_trylock(&desc->lock)) {
-			raw_spin_unlock(&vector_lock);
-			cpu_relax();
-			raw_spin_lock(&vector_lock);
-			goto retry;
-		}
-
-		data = apic_chip_data(irq_desc_get_irq_data(desc));
-		if (!data)
-			goto unlock;
+	hlist_for_each_entry_safe(apicd, tmp, clhead, clist) {
+		unsigned int irr, vector = apicd->prev_vector;
 
 		/*
-		 * Nothing to cleanup if irq migration is in progress
-		 * or this cpu is not set in the cleanup mask.
-		 */
-		if (data->move_in_progress ||
-		    !cpumask_test_cpu(me, data->old_domain))
-			goto unlock;
-
-		/*
-		 * We have two cases to handle here:
-		 * 1) vector is unchanged but the target mask got reduced
-		 * 2) vector and the target mask has changed
-		 *
-		 * #1 is obvious, but in #2 we have two vectors with the same
-		 * irq descriptor: the old and the new vector. So we need to
-		 * make sure that we only cleanup the old vector. The new
-		 * vector has the current @vector number in the config and
-		 * this cpu is part of the target mask. We better leave that
-		 * one alone.
-		 */
-		if (vector == data->cfg.vector &&
-		    cpumask_test_cpu(me, data->domain))
-			goto unlock;
-
-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		/*
-		 * Check if the vector that needs to be cleanedup is
-		 * registered at the cpu's IRR. If so, then this is not
-		 * the best time to clean it up. Lets clean it up in the
+		 * Paranoia: Check if the vector that needs to be cleaned
+		 * up is registered at the APICs IRR. If so, then this is
+		 * not the best time to clean it up. Clean it up in the
 		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
-		 * to myself.
+		 * to this CPU. IRQ_MOVE_CLEANUP_VECTOR is the lowest
+		 * priority external vector, so on return from this
+		 * interrupt the device interrupt will happen first.
 		 */
-		if (irr  & (1 << (vector % 32))) {
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		if (irr & (1U << (vector % 32))) {
 			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
-			goto unlock;
+			continue;
 		}
-		__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
-		cpumask_clear_cpu(me, data->old_domain);
-unlock:
-		raw_spin_unlock(&desc->lock);
+		free_moved_vector(apicd);
 	}
 
 	raw_spin_unlock(&vector_lock);
-
 	exiting_irq();
 }
 
+static void __send_cleanup_vector(struct apic_chip_data *apicd)
+{
+	unsigned int cpu;
+
+	raw_spin_lock(&vector_lock);
+	apicd->move_in_progress = 0;
+	cpu = apicd->prev_cpu;
+	if (cpu_online(cpu)) {
+		hlist_add_head(&apicd->clist, per_cpu_ptr(&cleanup_list, cpu));
+		apic->send_IPI(cpu, IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		apicd->prev_vector = 0;
+	}
+	raw_spin_unlock(&vector_lock);
+}
+
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	struct apic_chip_data *apicd;
+
+	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
+	if (apicd->move_in_progress)
+		__send_cleanup_vector(apicd);
+}
+
 static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
 {
-	unsigned me;
-	struct apic_chip_data *data;
+	struct apic_chip_data *apicd;
 
-	data = container_of(cfg, struct apic_chip_data, cfg);
-	if (likely(!data->move_in_progress))
+	apicd = container_of(cfg, struct apic_chip_data, hw_irq_cfg);
+	if (likely(!apicd->move_in_progress))
 		return;
 
-	me = smp_processor_id();
-	if (vector == data->cfg.vector && cpumask_test_cpu(me, data->domain))
-		__send_cleanup_vector(data);
+	if (vector == apicd->vector && apicd->cpu == smp_processor_id())
+		__send_cleanup_vector(apicd);
 }
 
 void irq_complete_move(struct irq_cfg *cfg)
@@ -682,10 +885,9 @@ void irq_complete_move(struct irq_cfg *cfg)
  */
 void irq_force_complete_move(struct irq_desc *desc)
 {
-	struct irq_data *irqdata;
-	struct apic_chip_data *data;
-	struct irq_cfg *cfg;
-	unsigned int cpu;
+	struct apic_chip_data *apicd;
+	struct irq_data *irqd;
+	unsigned int vector;
 
 	/*
 	 * The function is called for all descriptors regardless of which
@@ -696,43 +898,31 @@ void irq_force_complete_move(struct irq_desc *desc)
 	 * Check first that the chip_data is what we expect
 	 * (apic_chip_data) before touching it any further.
 	 */
-	irqdata = irq_domain_get_irq_data(x86_vector_domain,
-					  irq_desc_get_irq(desc));
-	if (!irqdata)
+	irqd = irq_domain_get_irq_data(x86_vector_domain,
+				       irq_desc_get_irq(desc));
+	if (!irqd)
 		return;
 
-	data = apic_chip_data(irqdata);
-	cfg = data ? &data->cfg : NULL;
+	raw_spin_lock(&vector_lock);
+	apicd = apic_chip_data(irqd);
+	if (!apicd)
+		goto unlock;
 
-	if (!cfg)
-		return;
+	/*
+	 * If prev_vector is empty, no action required.
+	 */
+	vector = apicd->prev_vector;
+	if (!vector)
+		goto unlock;
 
 	/*
-	 * This is tricky. If the cleanup of @data->old_domain has not been
+	 * This is tricky. If the cleanup of the old vector has not been
 	 * done yet, then the following setaffinity call will fail with
 	 * -EBUSY. This can leave the interrupt in a stale state.
 	 *
 	 * All CPUs are stuck in stop machine with interrupts disabled so
 	 * calling __irq_complete_move() would be completely pointless.
-	 */
-	raw_spin_lock(&vector_lock);
-	/*
-	 * Clean out all offline cpus (including the outgoing one) from the
-	 * old_domain mask.
-	 */
-	cpumask_and(data->old_domain, data->old_domain, cpu_online_mask);
-
-	/*
-	 * If move_in_progress is cleared and the old_domain mask is empty,
-	 * then there is nothing to cleanup. fixup_irqs() will take care of
-	 * the stale vectors on the outgoing cpu.
-	 */
-	if (!data->move_in_progress && cpumask_empty(data->old_domain)) {
-		raw_spin_unlock(&vector_lock);
-		return;
-	}
-
-	/*
+	 *
 	 * 1) The interrupt is in move_in_progress state. That means that we
 	 *    have not seen an interrupt since the io_apic was reprogrammed to
 	 *    the new vector.
@@ -740,7 +930,7 @@ void irq_force_complete_move(struct irq_desc *desc)
 	 * 2) The interrupt has fired on the new vector, but the cleanup IPIs
 	 *    have not been processed yet.
 	 */
-	if (data->move_in_progress) {
+	if (apicd->move_in_progress) {
 		/*
 		 * In theory there is a race:
 		 *
@@ -774,21 +964,43 @@ void irq_force_complete_move(struct irq_desc *desc)
 		 * area arises.
 		 */
 		pr_warn("IRQ fixup: irq %d move in progress, old vector %d\n",
-			irqdata->irq, cfg->old_vector);
+			irqd->irq, vector);
 	}
-	/*
-	 * If old_domain is not empty, then other cpus still have the irq
-	 * descriptor set in their vector array. Clean it up.
-	 */
-	for_each_cpu(cpu, data->old_domain)
-		per_cpu(vector_irq, cpu)[cfg->old_vector] = VECTOR_UNUSED;
+	free_moved_vector(apicd);
+unlock:
+	raw_spin_unlock(&vector_lock);
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Note, this is not accurate accounting, but at least good enough to
+ * prevent that the actual interrupt move will run out of vectors.
+ */
+int lapic_can_unplug_cpu(void)
+{
+	unsigned int rsvd, avl, tomove, cpu = smp_processor_id();
+	int ret = 0;
 
-	/* Cleanup the left overs of the (half finished) move */
-	cpumask_clear(data->old_domain);
-	data->move_in_progress = 0;
+	raw_spin_lock(&vector_lock);
+	tomove = irq_matrix_allocated(vector_matrix);
+	avl = irq_matrix_available(vector_matrix, true);
+	if (avl < tomove) {
+		pr_warn("CPU %u has %u vectors, %u available. Cannot disable CPU\n",
+			cpu, tomove, avl);
+		ret = -ENOSPC;
+		goto out;
+	}
+	rsvd = irq_matrix_reserved(vector_matrix);
+	if (avl < rsvd) {
+		pr_warn("Reserved vectors %u > available %u. IRQ request may fail\n",
+			rsvd, avl);
+	}
+out:
 	raw_spin_unlock(&vector_lock);
+	return ret;
 }
-#endif
+#endif /* HOTPLUG_CPU */
+#endif /* SMP */
 
 static void __init print_APIC_field(int base)
 {
diff --git a/arch/x86/kernel/apic/x2apic.h b/arch/x86/kernel/apic/x2apic.h
new file mode 100644
index 000000000000..b107de381cb5
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic.h
@@ -0,0 +1,9 @@
+/* Common bits for X2APIC cluster/physical modes. */
+
+int x2apic_apic_id_valid(int apicid);
+int x2apic_apic_id_registered(void);
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest);
+unsigned int x2apic_get_apic_id(unsigned long id);
+u32 x2apic_set_apic_id(unsigned int id);
+int x2apic_phys_pkg_id(int initial_apicid, int index_msb);
+void x2apic_send_IPI_self(int vector);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 481237cb1544..622f13ca8a94 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
@@ -8,22 +9,24 @@
 #include <linux/cpu.h>
 
 #include <asm/smp.h>
-#include <asm/x2apic.h>
+#include "x2apic.h"
+
+struct cluster_mask {
+	unsigned int	clusterid;
+	int		node;
+	struct cpumask	mask;
+};
 
 static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
-static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
 static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
+static DEFINE_PER_CPU(struct cluster_mask *, cluster_masks);
+static struct cluster_mask *cluster_hotplug_mask;
 
 static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 {
 	return x2apic_enabled();
 }
 
-static inline u32 x2apic_cluster(int cpu)
-{
-	return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
-}
-
 static void x2apic_send_IPI(int cpu, int vector)
 {
 	u32 dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
@@ -35,49 +38,34 @@ static void x2apic_send_IPI(int cpu, int vector)
 static void
 __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 {
-	struct cpumask *cpus_in_cluster_ptr;
-	struct cpumask *ipi_mask_ptr;
-	unsigned int cpu, this_cpu;
+	unsigned int cpu, clustercpu;
+	struct cpumask *tmpmsk;
 	unsigned long flags;
 	u32 dest;
 
 	x2apic_wrmsr_fence();
-
 	local_irq_save(flags);
 
-	this_cpu = smp_processor_id();
+	tmpmsk = this_cpu_cpumask_var_ptr(ipi_mask);
+	cpumask_copy(tmpmsk, mask);
+	/* If IPI should not be sent to self, clear current CPU */
+	if (apic_dest != APIC_DEST_ALLINC)
+		cpumask_clear_cpu(smp_processor_id(), tmpmsk);
 
-	/*
-	 * We are to modify mask, so we need an own copy
-	 * and be sure it's manipulated with irq off.
-	 */
-	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
-	cpumask_copy(ipi_mask_ptr, mask);
-
-	/*
-	 * The idea is to send one IPI per cluster.
-	 */
-	for_each_cpu(cpu, ipi_mask_ptr) {
-		unsigned long i;
+	/* Collapse cpus in a cluster so a single IPI per cluster is sent */
+	for_each_cpu(cpu, tmpmsk) {
+		struct cluster_mask *cmsk = per_cpu(cluster_masks, cpu);
 
-		cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
 		dest = 0;
-
-		/* Collect cpus in cluster. */
-		for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
-			if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
-				dest |= per_cpu(x86_cpu_to_logical_apicid, i);
-		}
+		for_each_cpu_and(clustercpu, tmpmsk, &cmsk->mask)
+			dest |= per_cpu(x86_cpu_to_logical_apicid, clustercpu);
 
 		if (!dest)
 			continue;
 
 		__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
-		/*
-		 * Cluster sibling cpus should be discared now so
-		 * we would not send IPI them second time.
-		 */
-		cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+		/* Remove cluster CPUs from tmpmask */
+		cpumask_andnot(tmpmsk, tmpmsk, &cmsk->mask);
 	}
 
 	local_irq_restore(flags);
@@ -104,125 +92,90 @@ static void x2apic_send_IPI_all(int vector)
 	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
 }
 
-static int
-x2apic_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
-			  unsigned int *apicid)
+static u32 x2apic_calc_apicid(unsigned int cpu)
 {
-	struct cpumask *effmsk = irq_data_get_effective_affinity_mask(irqdata);
-	unsigned int cpu;
-	u32 dest = 0;
-	u16 cluster;
-
-	cpu = cpumask_first(mask);
-	if (cpu >= nr_cpu_ids)
-		return -EINVAL;
-
-	dest = per_cpu(x86_cpu_to_logical_apicid, cpu);
-	cluster = x2apic_cluster(cpu);
-
-	cpumask_clear(effmsk);
-	for_each_cpu(cpu, mask) {
-		if (cluster != x2apic_cluster(cpu))
-			continue;
-		dest |= per_cpu(x86_cpu_to_logical_apicid, cpu);
-		cpumask_set_cpu(cpu, effmsk);
-	}
-
-	*apicid = dest;
-	return 0;
+	return per_cpu(x86_cpu_to_logical_apicid, cpu);
 }
 
 static void init_x2apic_ldr(void)
 {
-	unsigned int this_cpu = smp_processor_id();
+	struct cluster_mask *cmsk = this_cpu_read(cluster_masks);
+	u32 cluster, apicid = apic_read(APIC_LDR);
 	unsigned int cpu;
 
-	per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+	this_cpu_write(x86_cpu_to_logical_apicid, apicid);
+
+	if (cmsk)
+		goto update;
 
-	cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+	cluster = apicid >> 16;
 	for_each_online_cpu(cpu) {
-		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-			continue;
-		cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+		cmsk = per_cpu(cluster_masks, cpu);
+		/* Matching cluster found. Link and update it. */
+		if (cmsk && cmsk->clusterid == cluster)
+			goto update;
 	}
+	cmsk = cluster_hotplug_mask;
+	cluster_hotplug_mask = NULL;
+update:
+	this_cpu_write(cluster_masks, cmsk);
+	cpumask_set_cpu(smp_processor_id(), &cmsk->mask);
 }
 
-/*
- * At CPU state changes, update the x2apic cluster sibling info.
- */
-static int x2apic_prepare_cpu(unsigned int cpu)
+static int alloc_clustermask(unsigned int cpu, int node)
 {
-	if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
-		return -ENOMEM;
+	if (per_cpu(cluster_masks, cpu))
+		return 0;
+	/*
+	 * If a hotplug spare mask exists, check whether it's on the right
+	 * node. If not, free it and allocate a new one.
+	 */
+	if (cluster_hotplug_mask) {
+		if (cluster_hotplug_mask->node == node)
+			return 0;
+		kfree(cluster_hotplug_mask);
+	}
 
-	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL)) {
-		free_cpumask_var(per_cpu(cpus_in_cluster, cpu));
+	cluster_hotplug_mask = kzalloc_node(sizeof(*cluster_hotplug_mask),
+					    GFP_KERNEL, node);
+	if (!cluster_hotplug_mask)
 		return -ENOMEM;
-	}
+	cluster_hotplug_mask->node = node;
+	return 0;
+}
 
+static int x2apic_prepare_cpu(unsigned int cpu)
+{
+	if (alloc_clustermask(cpu, cpu_to_node(cpu)) < 0)
+		return -ENOMEM;
+	if (!zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL))
+		return -ENOMEM;
 	return 0;
 }
 
-static int x2apic_dead_cpu(unsigned int this_cpu)
+static int x2apic_dead_cpu(unsigned int dead_cpu)
 {
-	int cpu;
+	struct cluster_mask *cmsk = per_cpu(cluster_masks, dead_cpu);
 
-	for_each_online_cpu(cpu) {
-		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
-			continue;
-		cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
-		cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
-	}
-	free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
-	free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+	cpumask_clear_cpu(dead_cpu, &cmsk->mask);
+	free_cpumask_var(per_cpu(ipi_mask, dead_cpu));
 	return 0;
 }
 
 static int x2apic_cluster_probe(void)
 {
-	int cpu = smp_processor_id();
-	int ret;
-
 	if (!x2apic_mode)
 		return 0;
 
-	ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
-				x2apic_prepare_cpu, x2apic_dead_cpu);
-	if (ret < 0) {
+	if (cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "x86/x2apic:prepare",
+			      x2apic_prepare_cpu, x2apic_dead_cpu) < 0) {
 		pr_err("Failed to register X2APIC_PREPARE\n");
 		return 0;
 	}
-	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+	init_x2apic_ldr();
 	return 1;
 }
 
-static const struct cpumask *x2apic_cluster_target_cpus(void)
-{
-	return cpu_all_mask;
-}
-
-/*
- * Each x2apic cluster is an allocation domain.
- */
-static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
-					     const struct cpumask *mask)
-{
-	/*
-	 * To minimize vector pressure, default case of boot, device bringup
-	 * etc will use a single cpu for the interrupt destination.
-	 *
-	 * On explicit migration requests coming from irqbalance etc,
-	 * interrupts will be routed to the x2apic cluster (cluster-id
-	 * derived from the first cpu in the mask) members specified
-	 * in the mask.
-	 */
-	if (mask == x2apic_cluster_target_cpus())
-		cpumask_copy(retmask, cpumask_of(cpu));
-	else
-		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
-}
-
 static struct apic apic_x2apic_cluster __ro_after_init = {
 
 	.name				= "cluster x2apic",
@@ -234,12 +187,10 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.irq_delivery_mode		= dest_LowestPrio,
 	.irq_dest_mode			= 1, /* logical */
 
-	.target_cpus			= x2apic_cluster_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= cluster_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -252,7 +203,7 @@ static struct apic apic_x2apic_cluster __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
 
-	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
+	.calc_dest_apicid		= x2apic_calc_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
 	.send_IPI_mask			= x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 3baf0c3dc875..f8d9d69994e6 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/threads.h>
 #include <linux/cpumask.h>
 #include <linux/string.h>
@@ -6,7 +7,8 @@
 #include <linux/dmar.h>
 
 #include <asm/smp.h>
-#include <asm/x2apic.h>
+#include <asm/ipi.h>
+#include "x2apic.h"
 
 int x2apic_phys;
 
@@ -98,6 +100,43 @@ static int x2apic_phys_probe(void)
 	return apic == &apic_x2apic_phys;
 }
 
+/* Common x2apic functions, also used by x2apic_cluster */
+int x2apic_apic_id_valid(int apicid)
+{
+	return 1;
+}
+
+int x2apic_apic_id_registered(void)
+{
+	return 1;
+}
+
+void __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
+{
+	unsigned long cfg = __prepare_ICR(0, vector, dest);
+	native_x2apic_icr_write(cfg, apicid);
+}
+
+unsigned int x2apic_get_apic_id(unsigned long id)
+{
+	return id;
+}
+
+u32 x2apic_set_apic_id(unsigned int id)
+{
+	return id;
+}
+
+int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
+{
+	return initial_apicid >> index_msb;
+}
+
+void x2apic_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
 static struct apic apic_x2apic_phys __ro_after_init = {
 
 	.name				= "physical x2apic",
@@ -109,12 +148,10 @@ static struct apic apic_x2apic_phys __ro_after_init = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= 0,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= init_x2apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -127,7 +164,7 @@ static struct apic apic_x2apic_phys __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= x2apic_set_apic_id,
 
-	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_default_calc_apicid,
 
 	.send_IPI			= x2apic_send_IPI,
 	.send_IPI_mask			= x2apic_send_IPI_mask,
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 0d57bb9079c9..e1b8e8bf6b3c 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -154,6 +154,48 @@ static int __init early_get_pnodeid(void)
 	return pnode;
 }
 
+static void __init uv_tsc_check_sync(void)
+{
+	u64 mmr;
+	int sync_state;
+	int mmr_shift;
+	char *state;
+	bool valid;
+
+	/* Accommodate different UV arch BIOSes */
+	mmr = uv_early_read_mmr(UVH_TSC_SYNC_MMR);
+	mmr_shift =
+		is_uv1_hub() ? 0 :
+		is_uv2_hub() ? UVH_TSC_SYNC_SHIFT_UV2K : UVH_TSC_SYNC_SHIFT;
+	if (mmr_shift)
+		sync_state = (mmr >> mmr_shift) & UVH_TSC_SYNC_MASK;
+	else
+		sync_state = 0;
+
+	switch (sync_state) {
+	case UVH_TSC_SYNC_VALID:
+		state = "in sync";
+		valid = true;
+		break;
+
+	case UVH_TSC_SYNC_INVALID:
+		state = "unstable";
+		valid = false;
+		break;
+	default:
+		state = "unknown: assuming valid";
+		valid = true;
+		break;
+	}
+	pr_info("UV: TSC sync state from BIOS:0%d(%s)\n", sync_state, state);
+
+	/* Mark flag that says TSC != 0 is valid for socket 0 */
+	if (valid)
+		mark_tsc_async_resets("UV BIOS");
+	else
+		mark_tsc_unstable("UV BIOS");
+}
+
 /* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */
 
 #define SMT_LEVEL			0	/* Leaf 0xb SMT level */
@@ -288,6 +330,7 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	}
 
 	pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic);
+	uv_tsc_check_sync();
 
 	return uv_apic;
 
@@ -525,16 +568,9 @@ static void uv_init_apic_ldr(void)
 {
 }
 
-static int
-uv_cpu_mask_to_apicid(const struct cpumask *mask, struct irq_data *irqdata,
-		      unsigned int *apicid)
+static u32 apic_uv_calc_apicid(unsigned int cpu)
 {
-	int ret = default_cpu_mask_to_apicid(mask, irqdata, apicid);
-
-	if (!ret)
-		*apicid |= uv_apicid_hibits;
-
-	return ret;
+	return apic_default_calc_apicid(cpu) | uv_apicid_hibits;
 }
 
 static unsigned int x2apic_get_apic_id(unsigned long x)
@@ -547,7 +583,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
 	return id;
 }
 
-static unsigned long set_apic_id(unsigned int id)
+static u32 set_apic_id(unsigned int id)
 {
 	/* CHECKME: Do we need to mask out the xapic extra bits? */
 	return id;
@@ -584,12 +620,10 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 	.irq_delivery_mode		= dest_Fixed,
 	.irq_dest_mode			= 0, /* Physical */
 
-	.target_cpus			= online_target_cpus,
 	.disable_esr			= 0,
 	.dest_logical			= APIC_DEST_LOGICAL,
 	.check_apicid_used		= NULL,
 
-	.vector_allocation_domain	= default_vector_allocation_domain,
 	.init_apic_ldr			= uv_init_apic_ldr,
 
 	.ioapic_phys_id_map		= NULL,
@@ -602,7 +636,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = {
 	.get_apic_id			= x2apic_get_apic_id,
 	.set_apic_id			= set_apic_id,
 
-	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
+	.calc_dest_apicid		= apic_uv_calc_apicid,
 
 	.send_IPI			= uv_send_IPI_one,
 	.send_IPI_mask			= uv_send_IPI_mask,
@@ -920,9 +954,8 @@ static __init void uv_rtc_init(void)
 /*
  * percpu heartbeat timer
  */
-static void uv_heartbeat(unsigned long ignored)
+static void uv_heartbeat(struct timer_list *timer)
 {
-	struct timer_list *timer = &uv_scir_info->timer;
 	unsigned char bits = uv_scir_info->state;
 
 	/* Flip heartbeat bit: */
@@ -947,7 +980,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
 		struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
 
 		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
-		setup_pinned_timer(timer, uv_heartbeat, cpu);
+		timer_setup(timer, uv_heartbeat, TIMER_PINNED);
 		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
 		add_timer_on(timer, cpu);
 		uv_cpu_scir_info(cpu)->enabled = 1;
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index de827d6ac8c2..8ea78275480d 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Generate definitions needed by assembly language modules.
  * This code generates raw asm output which is post-processed to extract
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 710edab9e644..dedf428b20b6 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifndef __LINUX_KBUILD_H
 # error "Please do not build this file directly, build asm-offsets.c instead"
 #endif
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index cf42206926af..630212fa9b9d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #ifndef __LINUX_KBUILD_H
 # error "Please do not build this file directly, build asm-offsets.c instead"
 #endif
diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c
index f3672508b249..e1efe44ebefc 100644
--- a/arch/x86/kernel/audit_64.c
+++ b/arch/x86/kernel/audit_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/audit.h>
diff --git a/arch/x86/kernel/bootflag.c b/arch/x86/kernel/bootflag.c
index 52c8e3c7789d..3fed7ae58b60 100644
--- a/arch/x86/kernel/bootflag.c
+++ b/arch/x86/kernel/bootflag.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Implement 'Simple Boot Flag Specification 2.0'
  */
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c
index 145863d4d343..33399426793e 100644
--- a/arch/x86/kernel/check.c
+++ b/arch/x86/kernel/check.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index e17942c131c8..570e8bb1f386 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 #
 # Makefile for x86-compatible CPU details, features and quirks
 #
@@ -21,7 +22,8 @@ obj-y			+= common.o
 obj-y			+= rdrand.o
 obj-y			+= match.o
 obj-y			+= bugs.o
-obj-$(CONFIG_CPU_FREQ)	+= aperfmperf.o
+obj-y			+= aperfmperf.o
+obj-y			+= cpuid-deps.o
 
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9862e2cd6d93..d58184b7cd44 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -763,6 +763,16 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
 	}
 }
 
+static void init_amd_zn(struct cpuinfo_x86 *c)
+{
+	/*
+	 * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
+	 * all up to and including B1.
+	 */
+	if (c->x86_model <= 1 && c->x86_mask <= 1)
+		set_cpu_cap(c, X86_FEATURE_CPB);
+}
+
 static void init_amd(struct cpuinfo_x86 *c)
 {
 	early_init_amd(c);
@@ -791,6 +801,7 @@ static void init_amd(struct cpuinfo_x86 *c)
 	case 0x10: init_amd_gh(c); break;
 	case 0x12: init_amd_ln(c); break;
 	case 0x15: init_amd_bd(c); break;
+	case 0x17: init_amd_zn(c); break;
 	}
 
 	/* Enable workaround for FXSAVE leak */
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 0ee83321a313..7eba34df54c3 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -14,6 +14,8 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 
+#include "cpu.h"
+
 struct aperfmperf_sample {
 	unsigned int	khz;
 	ktime_t	time;
@@ -24,7 +26,7 @@ struct aperfmperf_sample {
 static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
 
 #define APERFMPERF_CACHE_THRESHOLD_MS	10
-#define APERFMPERF_REFRESH_DELAY_MS	20
+#define APERFMPERF_REFRESH_DELAY_MS	10
 #define APERFMPERF_STALE_THRESHOLD_MS	1000
 
 /*
@@ -38,14 +40,8 @@ static void aperfmperf_snapshot_khz(void *dummy)
 	u64 aperf, aperf_delta;
 	u64 mperf, mperf_delta;
 	struct aperfmperf_sample *s = this_cpu_ptr(&samples);
-	ktime_t now = ktime_get();
-	s64 time_delta = ktime_ms_delta(now, s->time);
 	unsigned long flags;
 
-	/* Don't bother re-computing within the cache threshold time. */
-	if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
-		return;
-
 	local_irq_save(flags);
 	rdmsrl(MSR_IA32_APERF, aperf);
 	rdmsrl(MSR_IA32_MPERF, mperf);
@@ -61,31 +57,68 @@ static void aperfmperf_snapshot_khz(void *dummy)
 	if (mperf_delta == 0)
 		return;
 
-	s->time = now;
+	s->time = ktime_get();
 	s->aperf = aperf;
 	s->mperf = mperf;
+	s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+}
 
-	/* If the previous iteration was too long ago, discard it. */
-	if (time_delta > APERFMPERF_STALE_THRESHOLD_MS)
-		s->khz = 0;
-	else
-		s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
+static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
+{
+	s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
+
+	/* Don't bother re-computing within the cache threshold time. */
+	if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
+		return true;
+
+	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
+
+	/* Return false if the previous iteration was too long ago. */
+	return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
 }
 
-unsigned int arch_freq_get_on_cpu(int cpu)
+unsigned int aperfmperf_get_khz(int cpu)
 {
-	unsigned int khz;
+	if (!cpu_khz)
+		return 0;
+
+	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+		return 0;
 
+	aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
+	return per_cpu(samples.khz, cpu);
+}
+
+void arch_freq_prepare_all(void)
+{
+	ktime_t now = ktime_get();
+	bool wait = false;
+	int cpu;
+
+	if (!cpu_khz)
+		return;
+
+	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+		return;
+
+	for_each_online_cpu(cpu)
+		if (!aperfmperf_snapshot_cpu(cpu, now, false))
+			wait = true;
+
+	if (wait)
+		msleep(APERFMPERF_REFRESH_DELAY_MS);
+}
+
+unsigned int arch_freq_get_on_cpu(int cpu)
+{
 	if (!cpu_khz)
 		return 0;
 
 	if (!static_cpu_has(X86_FEATURE_APERFMPERF))
 		return 0;
 
-	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
-	khz = per_cpu(samples.khz, cpu);
-	if (khz)
-		return khz;
+	if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
+		return per_cpu(samples.khz, cpu);
 
 	msleep(APERFMPERF_REFRESH_DELAY_MS);
 	smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index db684880d74a..ba0b2424c9b0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Copyright (C) 1994  Linus Torvalds
  *
@@ -21,14 +22,6 @@
 
 void __init check_bugs(void)
 {
-#ifdef CONFIG_X86_32
-	/*
-	 * Regardless of whether PCID is enumerated, the SDM says
-	 * that it can't be enabled in 32-bit mode.
-	 */
-	setup_clear_cpu_cap(X86_FEATURE_PCID);
-#endif
-
 	identify_boot_cpu();
 
 	if (!IS_ENABLED(CONFIG_SMP)) {
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index 44207b71fee1..68bc6d9b3132 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 775f10100d7f..fa998ca8aa5a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -329,6 +329,30 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 	}
 }
 
+static __always_inline void setup_umip(struct cpuinfo_x86 *c)
+{
+	/* Check the boot processor, plus build option for UMIP. */
+	if (!cpu_feature_enabled(X86_FEATURE_UMIP))
+		goto out;
+
+	/* Check the current processor's cpuid bits. */
+	if (!cpu_has(c, X86_FEATURE_UMIP))
+		goto out;
+
+	cr4_set_bits(X86_CR4_UMIP);
+
+	pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n");
+
+	return;
+
+out:
+	/*
+	 * Make sure UMIP is disabled in case it was enabled in a
+	 * previous boot (e.g., via kexec).
+	 */
+	cr4_clear_bits(X86_CR4_UMIP);
+}
+
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -863,8 +887,8 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
  * cache alignment.
  * The others are not touched to avoid unwanted side effects.
  *
- * WARNING: this function is only called on the BP.  Don't add code here
- * that is supposed to run on all CPUs.
+ * WARNING: this function is only called on the boot CPU.  Don't add code
+ * here that is supposed to run on all CPUs.
  */
 static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 {
@@ -904,6 +928,14 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
 
 	setup_force_cpu_cap(X86_FEATURE_ALWAYS);
 	fpu__init_system(c);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Regardless of whether PCID is enumerated, the SDM says
+	 * that it can't be enabled in 32-bit mode.
+	 */
+	setup_clear_cpu_cap(X86_FEATURE_PCID);
+#endif
 }
 
 void __init early_cpu_init(void)
@@ -1139,9 +1171,10 @@ static void identify_cpu(struct cpuinfo_x86 *c)
 	/* Disable the PN if appropriate */
 	squash_the_stupid_serial_number(c);
 
-	/* Set up SMEP/SMAP */
+	/* Set up SMEP/SMAP/UMIP */
 	setup_smep(c);
 	setup_smap(c);
+	setup_umip(c);
 
 	/*
 	 * The vendor-specific functions might have changed features.
@@ -1293,18 +1326,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 		pr_cont(")\n");
 }
 
-static __init int setup_disablecpuid(char *arg)
+/*
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
+ * But we need to keep a dummy __setup around otherwise it would
+ * show up as an environment variable for init.
+ */
+static __init int setup_clearcpuid(char *arg)
 {
-	int bit;
-
-	if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
-		setup_clear_cpu_cap(bit);
-	else
-		return 0;
-
 	return 1;
 }
-__setup("clearcpuid=", setup_disablecpuid);
+__setup("clearcpuid=", setup_clearcpuid);
 
 #ifdef CONFIG_X86_64
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1564,9 +1595,13 @@ void cpu_init(void)
 	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, me);
 
-	load_sp0(t, &current->thread);
+	/*
+	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
+	 * task never enters user mode.
+	 */
 	set_tss_desc(cpu, t);
 	load_TR_desc();
+
 	load_mm_ldt(&init_mm);
 
 	clear_all_debug_regs();
@@ -1588,7 +1623,6 @@ void cpu_init(void)
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
-	struct thread_struct *thread = &curr->thread;
 
 	wait_for_master_cpu(cpu);
 
@@ -1619,9 +1653,13 @@ void cpu_init(void)
 	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, curr);
 
-	load_sp0(t, thread);
+	/*
+	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
+	 * task never enters user mode.
+	 */
 	set_tss_desc(cpu, t);
 	load_TR_desc();
+
 	load_mm_ldt(&init_mm);
 
 	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2584265d4745..e806b11a99af 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef ARCH_X86_CPU_H
 #define ARCH_X86_CPU_H
 
@@ -46,4 +47,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
 
 extern void get_cpu_cap(struct cpuinfo_x86 *c);
 extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+
+unsigned int aperfmperf_get_khz(int cpu);
+
 #endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 000000000000..904b0a3c4e53
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
+/* Declare dependencies between CPUIDs */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cpufeature.h>
+
+struct cpuid_dep {
+	unsigned int	feature;
+	unsigned int	depends;
+};
+
+/*
+ * Table of CPUID features that depend on others.
+ *
+ * This only includes dependencies that can be usefully disabled, not
+ * features part of the base set (like FPU).
+ *
+ * Note this all is not __init / __initdata because it can be
+ * called from cpu hotplug. It shouldn't do anything in this case,
+ * but it's difficult to tell that to the init reference checker.
+ */
+const static struct cpuid_dep cpuid_deps[] = {
+	{ X86_FEATURE_XSAVEOPT,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVEC,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVES,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_AVX,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_PKU,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_MPX,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XGETBV1,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_FXSR_OPT,		X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM,		X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM2,		X86_FEATURE_XMM       },
+	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_1,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_2,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_PCLMULQDQ,	X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SSSE3,		X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_F16C,		X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_AES,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SHA_NI,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_FMA,		X86_FEATURE_AVX       },
+	{ X86_FEATURE_AVX2,		X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512F,		X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512IFMA,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512PF,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512ER,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512CD,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512DQ,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512BW,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VL,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VBMI,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VBMI2,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_GFNI,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VAES,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VPCLMULQDQ,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_VNNI,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_BITALG,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_4VNNIW,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_4FMAPS,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
+	{}
+};
+
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	/*
+	 * Note: This could use the non atomic __*_bit() variants, but the
+	 * rest of the cpufeature code uses atomics as well, so keep it for
+	 * consistency. Cleanup all of it separately.
+	 */
+	if (!c) {
+		clear_cpu_cap(&boot_cpu_data, feature);
+		set_bit(feature, (unsigned long *)cpu_caps_cleared);
+	} else {
+		clear_bit(feature, (unsigned long *)c->x86_capability);
+	}
+}
+
+/* Take the capabilities and the BUG bits into account */
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
+
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+	const struct cpuid_dep *d;
+	bool changed;
+
+	if (WARN_ON(feature >= MAX_FEATURE_BITS))
+		return;
+
+	clear_feature(c, feature);
+
+	/* Collect all features to disable, handling dependencies */
+	memset(disable, 0, sizeof(disable));
+	__set_bit(feature, disable);
+
+	/* Loop until we get a stable state. */
+	do {
+		changed = false;
+		for (d = cpuid_deps; d->feature; d++) {
+			if (!test_bit(d->depends, disable))
+				continue;
+			if (__test_and_set_bit(d->feature, disable))
+				continue;
+
+			changed = true;
+			clear_feature(c, d->feature);
+		}
+	} while (changed);
+}
+
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	do_clear_cpu_cap(c, feature);
+}
+
+void setup_clear_cpu_cap(unsigned int feature)
+{
+	do_clear_cpu_cap(NULL, feature);
+}
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6f077445647a..6b4bb335641f 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index 4fa90006ac68..bea8d3e24f50 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -26,6 +26,12 @@
 #include <asm/processor.h>
 #include <asm/hypervisor.h>
 
+extern const struct hypervisor_x86 x86_hyper_vmware;
+extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
+extern const struct hypervisor_x86 x86_hyper_xen_pv;
+extern const struct hypervisor_x86 x86_hyper_xen_hvm;
+extern const struct hypervisor_x86 x86_hyper_kvm;
+
 static const __initconst struct hypervisor_x86 * const hypervisors[] =
 {
 #ifdef CONFIG_XEN_PV
@@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
 #endif
 };
 
-const struct hypervisor_x86 *x86_hyper;
-EXPORT_SYMBOL(x86_hyper);
+enum x86_hypervisor_type x86_hyper_type;
+EXPORT_SYMBOL(x86_hyper_type);
 
-static inline void __init
+static inline const struct hypervisor_x86 * __init
 detect_hypervisor_vendor(void)
 {
-	const struct hypervisor_x86 *h, * const *p;
+	const struct hypervisor_x86 *h = NULL, * const *p;
 	uint32_t pri, max_pri = 0;
 
 	for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
-		h = *p;
-		pri = h->detect();
-		if (pri != 0 && pri > max_pri) {
+		pri = (*p)->detect();
+		if (pri > max_pri) {
 			max_pri = pri;
-			x86_hyper = h;
+			h = *p;
 		}
 	}
 
-	if (max_pri)
-		pr_info("Hypervisor detected: %s\n", x86_hyper->name);
+	if (h)
+		pr_info("Hypervisor detected: %s\n", h->name);
+
+	return h;
 }
 
-void __init init_hypervisor_platform(void)
+static void __init copy_array(const void *src, void *target, unsigned int size)
 {
+	unsigned int i, n = size / sizeof(void *);
+	const void * const *from = (const void * const *)src;
+	const void **to = (const void **)target;
 
-	detect_hypervisor_vendor();
-
-	if (!x86_hyper)
-		return;
-
-	if (x86_hyper->init_platform)
-		x86_hyper->init_platform();
+	for (i = 0; i < n; i++)
+		if (from[i])
+			to[i] = from[i];
 }
 
-bool __init hypervisor_x2apic_available(void)
+void __init init_hypervisor_platform(void)
 {
-	return x86_hyper                   &&
-	       x86_hyper->x2apic_available &&
-	       x86_hyper->x2apic_available();
-}
+	const struct hypervisor_x86 *h;
 
-void hypervisor_pin_vcpu(int cpu)
-{
-	if (!x86_hyper)
+	h = detect_hypervisor_vendor();
+
+	if (!h)
 		return;
 
-	if (x86_hyper->pin_vcpu)
-		x86_hyper->pin_vcpu(cpu);
-	else
-		WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
+	copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
+	copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
+
+	x86_hyper_type = h->type;
+	x86_init.hyper.init_platform();
 }
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index dfa90a3a5145..b1af22073e28 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 
 #include <linux/string.h>
@@ -186,21 +187,6 @@ static void early_init_intel(struct cpuinfo_x86 *c)
 	if (c->x86 == 6 && c->x86_model < 15)
 		clear_cpu_cap(c, X86_FEATURE_PAT);
 
-#ifdef CONFIG_KMEMCHECK
-	/*
-	 * P4s have a "fast strings" feature which causes single-
-	 * stepping REP instructions to only generate a #DB on
-	 * cache-line boundaries.
-	 *
-	 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
-	 * (model 2) with the same problem.
-	 */
-	if (c->x86 == 15)
-		if (msr_clear_bit(MSR_IA32_MISC_ENABLE,
-				  MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0)
-			pr_info("kmemcheck: Disabling fast string operations\n");
-#endif
-
 	/*
 	 * If fast string is not enabled in IA32_MISC_ENABLE for any reason,
 	 * clear the fast string and enhanced fast string CPU capabilities.
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 24f749324c0f..54d04d574148 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Routines to identify caches on Intel CPU.
  *
@@ -831,7 +832,6 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
 		unsigned int apicid, nshared, first, last;
 
-		this_leaf = this_cpu_ci->info_list + index;
 		nshared = base->eax.split.num_threads_sharing + 1;
 		apicid = cpu_data(cpu).apicid;
 		first = apicid - (apicid % nshared);
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index cd5fc61ba450..88dcf8479013 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -267,6 +267,7 @@ static void rdt_get_cdp_l3_config(int type)
 	r->num_closid = r_l3->num_closid / 2;
 	r->cache.cbm_len = r_l3->cache.cbm_len;
 	r->default_ctrl = r_l3->default_ctrl;
+	r->cache.shareable_bits = r_l3->cache.shareable_bits;
 	r->data_width = (r->cache.cbm_len + 3) / 4;
 	r->alloc_capable = true;
 	/*
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index ebaddaeef023..3397244984f5 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _ASM_X86_INTEL_RDT_H
 #define _ASM_X86_INTEL_RDT_H
 
@@ -126,12 +127,15 @@ struct rdtgroup {
 #define RFTYPE_BASE			BIT(1)
 #define RF_CTRLSHIFT			4
 #define RF_MONSHIFT			5
+#define RF_TOPSHIFT			6
 #define RFTYPE_CTRL			BIT(RF_CTRLSHIFT)
 #define RFTYPE_MON			BIT(RF_MONSHIFT)
+#define RFTYPE_TOP			BIT(RF_TOPSHIFT)
 #define RFTYPE_RES_CACHE		BIT(8)
 #define RFTYPE_RES_MB			BIT(9)
 #define RF_CTRL_INFO			(RFTYPE_INFO | RFTYPE_CTRL)
 #define RF_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
+#define RF_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)
 #define RF_CTRL_BASE			(RFTYPE_BASE | RFTYPE_CTRL)
 
 /* List of all resource groups */
@@ -408,6 +412,10 @@ union cpuid_0x10_x_edx {
 	unsigned int full;
 };
 
+void rdt_last_cmd_clear(void);
+void rdt_last_cmd_puts(const char *s);
+void rdt_last_cmd_printf(const char *fmt, ...);
+
 void rdt_ctrl_update(void *arg);
 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
 void rdtgroup_kn_unlock(struct kernfs_node *kn);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index f6ea94f8954a..23e1d5c249c6 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -42,15 +42,22 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
 	/*
 	 * Only linear delay values is supported for current Intel SKUs.
 	 */
-	if (!r->membw.delay_linear)
+	if (!r->membw.delay_linear) {
+		rdt_last_cmd_puts("No support for non-linear MB domains\n");
 		return false;
+	}
 
 	ret = kstrtoul(buf, 10, &bw);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf);
 		return false;
+	}
 
-	if (bw < r->membw.min_bw || bw > r->default_ctrl)
+	if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+		rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
+				    r->membw.min_bw, r->default_ctrl);
 		return false;
+	}
 
 	*data = roundup(bw, (unsigned long)r->membw.bw_gran);
 	return true;
@@ -60,8 +67,10 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d)
 {
 	unsigned long data;
 
-	if (d->have_new_ctrl)
+	if (d->have_new_ctrl) {
+		rdt_last_cmd_printf("duplicate domain %d\n", d->id);
 		return -EINVAL;
+	}
 
 	if (!bw_validate(buf, &data, r))
 		return -EINVAL;
@@ -84,20 +93,29 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r)
 	int ret;
 
 	ret = kstrtoul(buf, 16, &val);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_printf("non-hex character in mask %s\n", buf);
 		return false;
+	}
 
-	if (val == 0 || val > r->default_ctrl)
+	if (val == 0 || val > r->default_ctrl) {
+		rdt_last_cmd_puts("mask out of range\n");
 		return false;
+	}
 
 	first_bit = find_first_bit(&val, cbm_len);
 	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
 
-	if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len)
+	if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) {
+		rdt_last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val);
 		return false;
+	}
 
-	if ((zero_bit - first_bit) < r->cache.min_cbm_bits)
+	if ((zero_bit - first_bit) < r->cache.min_cbm_bits) {
+		rdt_last_cmd_printf("Need at least %d bits in mask\n",
+				    r->cache.min_cbm_bits);
 		return false;
+	}
 
 	*data = val;
 	return true;
@@ -111,8 +129,10 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d)
 {
 	unsigned long data;
 
-	if (d->have_new_ctrl)
+	if (d->have_new_ctrl) {
+		rdt_last_cmd_printf("duplicate domain %d\n", d->id);
 		return -EINVAL;
+	}
 
 	if(!cbm_validate(buf, &data, r))
 		return -EINVAL;
@@ -139,8 +159,10 @@ next:
 		return 0;
 	dom = strsep(&line, ";");
 	id = strsep(&dom, "=");
-	if (!dom || kstrtoul(id, 10, &dom_id))
+	if (!dom || kstrtoul(id, 10, &dom_id)) {
+		rdt_last_cmd_puts("Missing '=' or non-numeric domain\n");
 		return -EINVAL;
+	}
 	dom = strim(dom);
 	list_for_each_entry(d, &r->domains, list) {
 		if (d->id == dom_id) {
@@ -196,6 +218,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid)
 		if (!strcmp(resname, r->name) && closid < r->num_closid)
 			return parse_line(tok, r);
 	}
+	rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname);
 	return -EINVAL;
 }
 
@@ -218,6 +241,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 		rdtgroup_kn_unlock(of->kn);
 		return -ENOENT;
 	}
+	rdt_last_cmd_clear();
 
 	closid = rdtgrp->closid;
 
@@ -229,6 +253,12 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 	while ((tok = strsep(&buf, "\n")) != NULL) {
 		resname = strim(strsep(&tok, ":"));
 		if (!tok) {
+			rdt_last_cmd_puts("Missing ':'\n");
+			ret = -EINVAL;
+			goto out;
+		}
+		if (tok[0] == '\0') {
+			rdt_last_cmd_printf("Missing '%s' value\n", resname);
 			ret = -EINVAL;
 			goto out;
 		}
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 30827510094b..681450eee428 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -51,7 +51,7 @@ static LIST_HEAD(rmid_free_lru);
  *     may have a occupancy value > intel_cqm_threshold. User can change
  *     the threshold occupancy value.
  */
-unsigned int rmid_limbo_count;
+static unsigned int rmid_limbo_count;
 
 /**
  * @rmid_entry - The entry in the limbo and free lists.
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index a869d4a073c5..64c5ff97ee0d 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -24,6 +24,7 @@
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
+#include <linux/seq_buf.h>
 #include <linux/seq_file.h>
 #include <linux/sched/signal.h>
 #include <linux/sched/task.h>
@@ -51,6 +52,31 @@ static struct kernfs_node *kn_mongrp;
 /* Kernel fs node for "mon_data" directory under root */
 static struct kernfs_node *kn_mondata;
 
+static struct seq_buf last_cmd_status;
+static char last_cmd_status_buf[512];
+
+void rdt_last_cmd_clear(void)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_clear(&last_cmd_status);
+}
+
+void rdt_last_cmd_puts(const char *s)
+{
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_puts(&last_cmd_status, s);
+}
+
+void rdt_last_cmd_printf(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	lockdep_assert_held(&rdtgroup_mutex);
+	seq_buf_vprintf(&last_cmd_status, fmt, ap);
+	va_end(ap);
+}
+
 /*
  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  * we can keep a bitmap of free CLOSIDs in a single integer.
@@ -238,8 +264,10 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 
 	/* Check whether cpus belong to parent ctrl group */
 	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
-	if (cpumask_weight(tmpmask))
+	if (cpumask_weight(tmpmask)) {
+		rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n");
 		return -EINVAL;
+	}
 
 	/* Check whether cpus are dropped from this group */
 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
@@ -291,8 +319,10 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
 	if (cpumask_weight(tmpmask)) {
 		/* Can't drop from default group */
-		if (rdtgrp == &rdtgroup_default)
+		if (rdtgrp == &rdtgroup_default) {
+			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
 			return -EINVAL;
+		}
 
 		/* Give any dropped cpus to rdtgroup_default */
 		cpumask_or(&rdtgroup_default.cpu_mask,
@@ -357,8 +387,10 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 	}
 
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	rdt_last_cmd_clear();
 	if (!rdtgrp) {
 		ret = -ENOENT;
+		rdt_last_cmd_puts("directory was removed\n");
 		goto unlock;
 	}
 
@@ -367,13 +399,16 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
 	else
 		ret = cpumask_parse(buf, newmask);
 
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_puts("bad cpu list/mask\n");
 		goto unlock;
+	}
 
 	/* check that user didn't specify any offline cpus */
 	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
 	if (cpumask_weight(tmpmask)) {
 		ret = -EINVAL;
+		rdt_last_cmd_puts("can only assign online cpus\n");
 		goto unlock;
 	}
 
@@ -452,6 +487,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 		 */
 		atomic_dec(&rdtgrp->waitcount);
 		kfree(callback);
+		rdt_last_cmd_puts("task exited\n");
 	} else {
 		/*
 		 * For ctrl_mon groups move both closid and rmid.
@@ -462,10 +498,12 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
 			tsk->closid = rdtgrp->closid;
 			tsk->rmid = rdtgrp->mon.rmid;
 		} else if (rdtgrp->type == RDTMON_GROUP) {
-			if (rdtgrp->mon.parent->closid == tsk->closid)
+			if (rdtgrp->mon.parent->closid == tsk->closid) {
 				tsk->rmid = rdtgrp->mon.rmid;
-			else
+			} else {
+				rdt_last_cmd_puts("Can't move task to different control group\n");
 				ret = -EINVAL;
+			}
 		}
 	}
 	return ret;
@@ -484,8 +522,10 @@ static int rdtgroup_task_write_permission(struct task_struct *task,
 	 */
 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
 	    !uid_eq(cred->euid, tcred->uid) &&
-	    !uid_eq(cred->euid, tcred->suid))
+	    !uid_eq(cred->euid, tcred->suid)) {
+		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
 		ret = -EPERM;
+	}
 
 	put_cred(tcred);
 	return ret;
@@ -502,6 +542,7 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
 		tsk = find_task_by_vpid(pid);
 		if (!tsk) {
 			rcu_read_unlock();
+			rdt_last_cmd_printf("No task %d\n", pid);
 			return -ESRCH;
 		}
 	} else {
@@ -529,6 +570,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
 	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
 		return -EINVAL;
 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
+	rdt_last_cmd_clear();
 
 	if (rdtgrp)
 		ret = rdtgroup_move_task(pid, rdtgrp, of);
@@ -569,6 +611,21 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of,
 	return ret;
 }
 
+static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
+				    struct seq_file *seq, void *v)
+{
+	int len;
+
+	mutex_lock(&rdtgroup_mutex);
+	len = seq_buf_used(&last_cmd_status);
+	if (len)
+		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
+	else
+		seq_puts(seq, "ok\n");
+	mutex_unlock(&rdtgroup_mutex);
+	return 0;
+}
+
 static int rdt_num_closids_show(struct kernfs_open_file *of,
 				struct seq_file *seq, void *v)
 {
@@ -686,6 +743,13 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
 /* rdtgroup information files for one cache resource. */
 static struct rftype res_common_files[] = {
 	{
+		.name		= "last_cmd_status",
+		.mode		= 0444,
+		.kf_ops		= &rdtgroup_kf_single_ops,
+		.seq_show	= rdt_last_cmd_status_show,
+		.fflags		= RF_TOP_INFO,
+	},
+	{
 		.name		= "num_closids",
 		.mode		= 0444,
 		.kf_ops		= &rdtgroup_kf_single_ops,
@@ -855,6 +919,10 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
 		return PTR_ERR(kn_info);
 	kernfs_get(kn_info);
 
+	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
+	if (ret)
+		goto out_destroy;
+
 	for_each_alloc_enabled_rdt_resource(r) {
 		fflags =  r->fflags | RF_CTRL_INFO;
 		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
@@ -1081,6 +1149,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 	struct dentry *dentry;
 	int ret;
 
+	cpus_read_lock();
 	mutex_lock(&rdtgroup_mutex);
 	/*
 	 * resctrl file system can only be mounted once.
@@ -1130,12 +1199,12 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type,
 		goto out_mondata;
 
 	if (rdt_alloc_capable)
-		static_branch_enable(&rdt_alloc_enable_key);
+		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
 	if (rdt_mon_capable)
-		static_branch_enable(&rdt_mon_enable_key);
+		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
 
 	if (rdt_alloc_capable || rdt_mon_capable)
-		static_branch_enable(&rdt_enable_key);
+		static_branch_enable_cpuslocked(&rdt_enable_key);
 
 	if (is_mbm_enabled()) {
 		r = &rdt_resources_all[RDT_RESOURCE_L3];
@@ -1156,7 +1225,9 @@ out_info:
 out_cdp:
 	cdp_disable();
 out:
+	rdt_last_cmd_clear();
 	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
 
 	return dentry;
 }
@@ -1295,9 +1366,7 @@ static void rmdir_all_sub(void)
 		kfree(rdtgrp);
 	}
 	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
-	get_online_cpus();
 	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
-	put_online_cpus();
 
 	kernfs_remove(kn_info);
 	kernfs_remove(kn_mongrp);
@@ -1308,6 +1377,7 @@ static void rdt_kill_sb(struct super_block *sb)
 {
 	struct rdt_resource *r;
 
+	cpus_read_lock();
 	mutex_lock(&rdtgroup_mutex);
 
 	/*Put everything back to default values. */
@@ -1315,11 +1385,12 @@ static void rdt_kill_sb(struct super_block *sb)
 		reset_all_ctrls(r);
 	cdp_disable();
 	rmdir_all_sub();
-	static_branch_disable(&rdt_alloc_enable_key);
-	static_branch_disable(&rdt_mon_enable_key);
-	static_branch_disable(&rdt_enable_key);
+	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
+	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
+	static_branch_disable_cpuslocked(&rdt_enable_key);
 	kernfs_kill_sb(sb);
 	mutex_unlock(&rdtgroup_mutex);
+	cpus_read_unlock();
 }
 
 static struct file_system_type rdt_fs_type = {
@@ -1524,8 +1595,10 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	int ret;
 
 	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
+	rdt_last_cmd_clear();
 	if (!prdtgrp) {
 		ret = -ENODEV;
+		rdt_last_cmd_puts("directory was removed\n");
 		goto out_unlock;
 	}
 
@@ -1533,6 +1606,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
 	if (!rdtgrp) {
 		ret = -ENOSPC;
+		rdt_last_cmd_puts("kernel out of memory\n");
 		goto out_unlock;
 	}
 	*r = rdtgrp;
@@ -1544,6 +1618,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
 	if (IS_ERR(kn)) {
 		ret = PTR_ERR(kn);
+		rdt_last_cmd_puts("kernfs create error\n");
 		goto out_free_rgrp;
 	}
 	rdtgrp->kn = kn;
@@ -1557,24 +1632,31 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
 	kernfs_get(kn);
 
 	ret = rdtgroup_kn_set_ugid(kn);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_puts("kernfs perm error\n");
 		goto out_destroy;
+	}
 
-	files = RFTYPE_BASE | RFTYPE_CTRL;
 	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
 	ret = rdtgroup_add_files(kn, files);
-	if (ret)
+	if (ret) {
+		rdt_last_cmd_puts("kernfs fill error\n");
 		goto out_destroy;
+	}
 
 	if (rdt_mon_capable) {
 		ret = alloc_rmid();
-		if (ret < 0)
+		if (ret < 0) {
+			rdt_last_cmd_puts("out of RMIDs\n");
 			goto out_destroy;
+		}
 		rdtgrp->mon.rmid = ret;
 
 		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
-		if (ret)
+		if (ret) {
+			rdt_last_cmd_puts("kernfs subdir error\n");
 			goto out_idfree;
+		}
 	}
 	kernfs_activate(kn);
 
@@ -1652,8 +1734,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 
 	kn = rdtgrp->kn;
 	ret = closid_alloc();
-	if (ret < 0)
+	if (ret < 0) {
+		rdt_last_cmd_puts("out of CLOSIDs\n");
 		goto out_common_fail;
+	}
 	closid = ret;
 
 	rdtgrp->closid = closid;
@@ -1665,8 +1749,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
 		 * of tasks and cpus to monitor.
 		 */
 		ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
-		if (ret)
+		if (ret) {
+			rdt_last_cmd_puts("kernfs subdir error\n");
 			goto out_id_free;
+		}
 	}
 
 	goto out_unlock;
@@ -1902,6 +1988,9 @@ int __init rdtgroup_init(void)
 {
 	int ret = 0;
 
+	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
+		     sizeof(last_cmd_status_buf));
+
 	ret = rdtgroup_setup_root();
 	if (ret)
 		return ret;
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
index e42117d5f4d7..3fed38812eea 100644
--- a/arch/x86/kernel/cpu/match.c
+++ b/arch/x86/kernel/cpu/match.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 #include <linux/cpu.h>
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 43051f0777d4..bcc7c54c7041 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
 obj-y				=  mce.o mce-severity.o mce-genpool.o
 
 obj-$(CONFIG_X86_ANCIENT_MCE)	+= winchip.o p5.o
diff --git a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
index 10cec43aac38..7f85b76f43bc 100644
--- a/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
+++ b/arch/x86/kernel/cpu/mcheck/dev-mcelog.c
@@ -24,14 +24,6 @@ static DEFINE_MUTEX(mce_chrdev_read_mutex);
 static char mce_helper[128];
 static char *mce_helper_argv[2] = { mce_helper, NULL };
 
-#define mce_log_get_idx_check(p) \
-({ \
-	RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \
-			 !lockdep_is_held(&mce_chrdev_read_mutex), \
-			 "suspicious mce_log_get_idx_check() usage"); \
-	smp_load_acquire(&(p)); \
-})
-
 /*
  * Lockless MCE logging infrastructure.
  * This avoids deadlocks on printk locks without having to break locks. Also
@@ -53,43 +45,32 @@ static int dev_mce_log(struct notifier_block *nb, unsigned long val,
 				void *data)
 {
 	struct mce *mce = (struct mce *)data;
-	unsigned int next, entry;
-
-	wmb();
-	for (;;) {
-		entry = mce_log_get_idx_check(mcelog.next);
-		for (;;) {
-
-			/*
-			 * When the buffer fills up discard new entries.
-			 * Assume that the earlier errors are the more
-			 * interesting ones:
-			 */
-			if (entry >= MCE_LOG_LEN) {
-				set_bit(MCE_OVERFLOW,
-					(unsigned long *)&mcelog.flags);
-				return NOTIFY_OK;
-			}
-			/* Old left over entry. Skip: */
-			if (mcelog.entry[entry].finished) {
-				entry++;
-				continue;
-			}
-			break;
-		}
-		smp_rmb();
-		next = entry + 1;
-		if (cmpxchg(&mcelog.next, entry, next) == entry)
-			break;
+	unsigned int entry;
+
+	mutex_lock(&mce_chrdev_read_mutex);
+
+	entry = mcelog.next;
+
+	/*
+	 * When the buffer fills up discard new entries. Assume that the
+	 * earlier errors are the more interesting ones:
+	 */
+	if (entry >= MCE_LOG_LEN) {
+		set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags);
+		goto unlock;
 	}
+
+	mcelog.next = entry + 1;
+
 	memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
-	wmb();
 	mcelog.entry[entry].finished = 1;
-	wmb();
 
 	/* wake processes polling /dev/mcelog */
 	wake_up_interruptible(&mce_chrdev_wait);
 
+unlock:
+	mutex_unlock(&mce_chrdev_read_mutex);
+
 	return NOTIFY_OK;
 }
 
@@ -177,13 +158,6 @@ static int mce_chrdev_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static void collect_tscs(void *data)
-{
-	unsigned long *cpu_tsc = (unsigned long *)data;
-
-	cpu_tsc[smp_processor_id()] = rdtsc();
-}
-
 static int mce_apei_read_done;
 
 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */
@@ -231,14 +205,9 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 				size_t usize, loff_t *off)
 {
 	char __user *buf = ubuf;
-	unsigned long *cpu_tsc;
-	unsigned prev, next;
+	unsigned next;
 	int i, err;
 
-	cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL);
-	if (!cpu_tsc)
-		return -ENOMEM;
-
 	mutex_lock(&mce_chrdev_read_mutex);
 
 	if (!mce_apei_read_done) {
@@ -247,65 +216,29 @@ static ssize_t mce_chrdev_read(struct file *filp, char __user *ubuf,
 			goto out;
 	}
 
-	next = mce_log_get_idx_check(mcelog.next);
-
 	/* Only supports full reads right now */
 	err = -EINVAL;
 	if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce))
 		goto out;
 
+	next = mcelog.next;
 	err = 0;
-	prev = 0;
-	do {
-		for (i = prev; i < next; i++) {
-			unsigned long start = jiffies;
-			struct mce *m = &mcelog.entry[i];
-
-			while (!m->finished) {
-				if (time_after_eq(jiffies, start + 2)) {
-					memset(m, 0, sizeof(*m));
-					goto timeout;
-				}
-				cpu_relax();
-			}
-			smp_rmb();
-			err |= copy_to_user(buf, m, sizeof(*m));
-			buf += sizeof(*m);
-timeout:
-			;
-		}
-
-		memset(mcelog.entry + prev, 0,
-		       (next - prev) * sizeof(struct mce));
-		prev = next;
-		next = cmpxchg(&mcelog.next, prev, 0);
-	} while (next != prev);
-
-	synchronize_sched();
 
-	/*
-	 * Collect entries that were still getting written before the
-	 * synchronize.
-	 */
-	on_each_cpu(collect_tscs, cpu_tsc, 1);
-
-	for (i = next; i < MCE_LOG_LEN; i++) {
+	for (i = 0; i < next; i++) {
 		struct mce *m = &mcelog.entry[i];
 
-		if (m->finished && m->tsc < cpu_tsc[m->cpu]) {
-			err |= copy_to_user(buf, m, sizeof(*m));
-			smp_rmb();
-			buf += sizeof(*m);
-			memset(m, 0, sizeof(*m));
-		}
+		err |= copy_to_user(buf, m, sizeof(*m));
+		buf += sizeof(*m);
 	}
 
+	memset(mcelog.entry, 0, next * sizeof(struct mce));
+	mcelog.next = 0;
+
 	if (err)
 		err = -EFAULT;
 
 out:
 	mutex_unlock(&mce_chrdev_read_mutex);
-	kfree(cpu_tsc);
 
 	return err ? err : buf - ubuf;
 }
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 098530a93bb7..aa0d5df9dc60 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __X86_MCE_INTERNAL_H__
+#define __X86_MCE_INTERNAL_H__
+
 #include <linux/device.h>
 #include <asm/mce.h>
 
@@ -108,3 +112,7 @@ static inline void mce_work_trigger(void)	{ }
 static inline void mce_register_injector_chain(struct notifier_block *nb)	{ }
 static inline void mce_unregister_injector_chain(struct notifier_block *nb)	{ }
 #endif
+
+extern struct mca_config mca_cfg;
+
+#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 87cc9ab7a13c..4ca632a06e0b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -204,7 +204,7 @@ static int error_context(struct mce *m)
 	return IN_KERNEL;
 }
 
-static int mce_severity_amd_smca(struct mce *m, int err_ctx)
+static int mce_severity_amd_smca(struct mce *m, enum context err_ctx)
 {
 	u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
 	u32 low, high;
@@ -245,6 +245,9 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
 
 	if (m->status & MCI_STATUS_UC) {
 
+		if (ctx == IN_KERNEL)
+			return MCE_PANIC_SEVERITY;
+
 		/*
 		 * On older systems where overflow_recov flag is not present, we
 		 * should simply panic if an error overflow occurs. If
@@ -255,10 +258,6 @@ static int mce_severity_amd(struct mce *m, int tolerant, char **msg, bool is_exc
 			if (mce_flags.smca)
 				return mce_severity_amd_smca(m, ctx);
 
-			/* software can try to contain */
-			if (!(m->mcgstatus & MCG_STATUS_RIPV) && (ctx == IN_KERNEL))
-				return MCE_PANIC_SEVERITY;
-
 			/* kill current process */
 			return MCE_AR_SEVERITY;
 		} else {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 3b413065c613..b1d616d08eee 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1367,13 +1367,12 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
 	local_irq_restore(flags);
 }
 
-static void mce_timer_fn(unsigned long data)
+static void mce_timer_fn(struct timer_list *t)
 {
-	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	int cpu = smp_processor_id();
+	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
 	unsigned long iv;
 
-	WARN_ON(cpu != data);
+	WARN_ON(cpu_t != t);
 
 	iv = __this_cpu_read(mce_next_interval);
 
@@ -1763,17 +1762,15 @@ static void mce_start_timer(struct timer_list *t)
 static void __mcheck_cpu_setup_timer(void)
 {
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	unsigned int cpu = smp_processor_id();
 
-	setup_pinned_timer(t, mce_timer_fn, cpu);
+	timer_setup(t, mce_timer_fn, TIMER_PINNED);
 }
 
 static void __mcheck_cpu_init_timer(void)
 {
 	struct timer_list *t = this_cpu_ptr(&mce_timer);
-	unsigned int cpu = smp_processor_id();
 
-	setup_pinned_timer(t, mce_timer_fn, cpu);
+	timer_setup(t, mce_timer_fn, TIMER_PINNED);
 	mce_start_timer(t);
 }
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 40e28ed77fbf..486f640b02ef 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -28,6 +28,8 @@
 #include <asm/msr.h>
 #include <asm/trace/irq_vectors.h>
 
+#include "mce-internal.h"
+
 #define NR_BLOCKS         5
 #define THRESHOLD_MAX     0xFFF
 #define INT_TYPE_APIC     0x00020000
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e84db79ef272..d05be307d081 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Intel specific MCE features.
  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c
index 2a0717bf8033..5cddf831720f 100644
--- a/arch/x86/kernel/cpu/mcheck/p5.c
+++ b/arch/x86/kernel/cpu/mcheck/p5.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * P5 specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c
index 5e7249e42f8f..2b584b319eff 100644
--- a/arch/x86/kernel/cpu/mcheck/threshold.c
+++ b/arch/x86/kernel/cpu/mcheck/threshold.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Common corrected MCE threshold handler code:
  */
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c
index c6a722e1d011..3b45b270a865 100644
--- a/arch/x86/kernel/cpu/mcheck/winchip.c
+++ b/arch/x86/kernel/cpu/mcheck/winchip.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * IDT Winchip specific Machine Check Exception Reporting
  * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 86e8f0b2537b..c4fa4a85d4cb 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -122,9 +122,6 @@ static bool __init check_loader_disabled_bsp(void)
 	bool *res = &dis_ucode_ldr;
 #endif
 
-	if (!have_cpuid_p())
-		return *res;
-
 	/*
 	 * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not
 	 * completely accurate as xen pv guests don't see that CPUID bit set but
@@ -166,24 +163,36 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name)
 void __init load_ucode_bsp(void)
 {
 	unsigned int cpuid_1_eax;
+	bool intel = true;
 
-	if (check_loader_disabled_bsp())
+	if (!have_cpuid_p())
 		return;
 
 	cpuid_1_eax = native_cpuid_eax(1);
 
 	switch (x86_cpuid_vendor()) {
 	case X86_VENDOR_INTEL:
-		if (x86_family(cpuid_1_eax) >= 6)
-			load_ucode_intel_bsp();
+		if (x86_family(cpuid_1_eax) < 6)
+			return;
 		break;
+
 	case X86_VENDOR_AMD:
-		if (x86_family(cpuid_1_eax) >= 0x10)
-			load_ucode_amd_bsp(cpuid_1_eax);
+		if (x86_family(cpuid_1_eax) < 0x10)
+			return;
+		intel = false;
 		break;
+
 	default:
-		break;
+		return;
 	}
+
+	if (check_loader_disabled_bsp())
+		return;
+
+	if (intel)
+		load_ucode_intel_bsp();
+	else
+		load_ucode_amd_bsp(cpuid_1_eax);
 }
 
 static bool check_loader_disabled_ap(void)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 8f7a9bbad514..7dbcb7adf797 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -34,6 +34,7 @@
 #include <linux/mm.h>
 
 #include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
 #include <asm/processor.h>
 #include <asm/tlbflush.h>
 #include <asm/setup.h>
@@ -918,6 +919,18 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
 	return 0;
 }
 
+static bool is_blacklisted(unsigned int cpu)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+	if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
+		pr_err_once("late loading on model 79 is disabled.\n");
+		return true;
+	}
+
+	return false;
+}
+
 static enum ucode_state request_microcode_fw(int cpu, struct device *device,
 					     bool refresh_fw)
 {
@@ -926,6 +939,9 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
 	const struct firmware *firmware;
 	enum ucode_state ret;
 
+	if (is_blacklisted(cpu))
+		return UCODE_NFOUND;
+
 	sprintf(name, "intel-ucode/%02x-%02x-%02x",
 		c->x86, c->x86_model, c->x86_mask);
 
@@ -950,6 +966,9 @@ static int get_ucode_user(void *to, const void *from, size_t n)
 static enum ucode_state
 request_microcode_user(int cpu, const void __user *buf, size_t size)
 {
+	if (is_blacklisted(cpu))
+		return UCODE_NFOUND;
+
 	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
 }
 
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 6988c74409a8..d0dfb892c72f 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -1,4 +1,5 @@
 #!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
 #
 # Generate the x86_cap/bug_flags[] arrays from include/asm/cpufeatures.h
 #
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 236324e83a3a..85eb5fc180c8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void)
 #endif
 }
 
-const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
+const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
 	.name			= "Microsoft Hyper-V",
 	.detect			= ms_hyperv_platform,
-	.init_platform		= ms_hyperv_init_platform,
+	.type			= X86_HYPER_MS_HYPERV,
+	.init.init_platform	= ms_hyperv_init_platform,
 };
-EXPORT_SYMBOL(x86_hyper_ms_hyperv);
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index 92ba9cd31c9a..a65a0272096d 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
 #include <linux/mm.h>
 #include <asm/mtrr.h>
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index 3d689937fc1b..f27177816569 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
 #include <linux/mm.h>
 
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index b1086f79e57e..4296c702a3f7 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/mm.h>
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index 6d9b45549109..558444b23923 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/capability.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index ad8bd763efa5..2ac99e561181 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * local MTRR defines.
  */
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 181eabecae25..d389083330c5 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * local apic based NMI watchdog for various CPUs.
  *
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
index 1dd8294fd730..fd6ec2aa0303 100644
--- a/arch/x86/kernel/cpu/powerflags.c
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Strings for the various x86 power flags
  *
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 218f79825b3c..e7ecedafa1c8 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -1,7 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/smp.h>
 #include <linux/timex.h>
 #include <linux/string.h>
 #include <linux/seq_file.h>
+#include <linux/cpufreq.h>
+
+#include "cpu.h"
 
 /*
  *	Get CPU information for use by the procfs.
@@ -75,9 +79,16 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 	if (c->microcode)
 		seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
 
-	if (cpu_has(c, X86_FEATURE_TSC))
+	if (cpu_has(c, X86_FEATURE_TSC)) {
+		unsigned int freq = aperfmperf_get_khz(cpu);
+
+		if (!freq)
+			freq = cpufreq_quick_get(cpu);
+		if (!freq)
+			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			   cpu_khz / 1000, (cpu_khz % 1000));
+			   freq / 1000, (freq % 1000));
+	}
 
 	/* Cache size */
 	if (c->x86_cache_size >= 0)
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index cd531355e838..b099024d339c 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Check for extended topology enumeration cpuid leaf 0xb and if it
  * exists, use it for populating initial_apicid and cpu topology
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index d77d07ab310b..42c939827621 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index ef9c2a0078bd..65a58a390fc3 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <asm/processor.h>
 #include "cpu.h"
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 40ed26852ebd..8e005329648b 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
 	       (eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
 }
 
-const __refconst struct hypervisor_x86 x86_hyper_vmware = {
+const __initconst struct hypervisor_x86 x86_hyper_vmware = {
 	.name			= "VMware",
 	.detect			= vmware_platform,
-	.init_platform		= vmware_platform_setup,
-	.x2apic_available	= vmware_legacy_x2apic_available,
+	.type			= X86_HYPER_VMWARE,
+	.init.init_platform	= vmware_platform_setup,
+	.init.x2apic_available	= vmware_legacy_x2apic_available,
 };
-EXPORT_SYMBOL(x86_hyper_vmware);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 44404e2307bb..10e74d4778a1 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -209,7 +209,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 }
 
 #ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
+static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
 {
 	unsigned int *nr_ranges = arg;
 
@@ -342,7 +342,7 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced,
 	return ret;
 }
 
-static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
+static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg)
 {
 	struct crash_elf_data *ced = arg;
 	Elf64_Ehdr *ehdr;
@@ -355,7 +355,7 @@ static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
 	ehdr = ced->ehdr;
 
 	/* Exclude unwanted mem ranges */
-	ret = elf_header_exclude_ranges(ced, start, end);
+	ret = elf_header_exclude_ranges(ced, res->start, res->end);
 	if (ret)
 		return ret;
 
@@ -518,14 +518,14 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
 	return 0;
 }
 
-static int memmap_entry_callback(u64 start, u64 end, void *arg)
+static int memmap_entry_callback(struct resource *res, void *arg)
 {
 	struct crash_memmap_data *cmd = arg;
 	struct boot_params *params = cmd->params;
 	struct e820_entry ei;
 
-	ei.addr = start;
-	ei.size = end - start + 1;
+	ei.addr = res->start;
+	ei.size = resource_size(res);
 	ei.type = cmd->type;
 	add_e820_entry(params, &ei);
 
@@ -619,12 +619,12 @@ out:
 	return ret;
 }
 
-static int determine_backup_region(u64 start, u64 end, void *arg)
+static int determine_backup_region(struct resource *res, void *arg)
 {
 	struct kimage *image = arg;
 
-	image->arch.backup_src_start = start;
-	image->arch.backup_src_sz = end - start + 1;
+	image->arch.backup_src_start = res->start;
+	image->arch.backup_src_sz = resource_size(res);
 
 	/* Expecting only one range for backup region */
 	return 1;
diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index 538fedea9b3f..33ee47670b99 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Memory preserving reboot related code.
  *
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index afa64adb75ee..4f2e0778feac 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Memory preserving reboot related code.
  *
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index cbf1f6ba39a8..76e07698e6d1 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Architecture specific OF callbacks.
  */
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index f9c324e08d85..0e662c55ae90 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/sched/debug.h>
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 4f0481474903..daefae83a3aa 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 225af4184f06..88ce2ffdb110 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 927abeaf63e2..1e82f787c160 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Various workarounds for chipset bugs.
    This code runs very early and can't use the regular PCI subsystem
    The entries are keyed to PCI bridges which usually identify chipsets
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 0f0840304452..5e801c8c8ce7 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/console.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
diff --git a/arch/x86/kernel/ebda.c b/arch/x86/kernel/ebda.c
index 4312f8ae71b7..38e7d597b660 100644
--- a/arch/x86/kernel/ebda.c
+++ b/arch/x86/kernel/ebda.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 9c4e7ba6870c..e5ec3cafa72e 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -57,7 +57,7 @@
 # error "Need more virtual address space for the ESPFIX hack"
 #endif
 
-#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
+#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
 
 /* This contains the *bottom* address of the espfix stack */
 DEFINE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack);
@@ -155,14 +155,14 @@ void init_espfix_ap(int cpu)
 	page = cpu/ESPFIX_STACKS_PER_PAGE;
 
 	/* Did another CPU already set this up? */
-	stack_page = ACCESS_ONCE(espfix_pages[page]);
+	stack_page = READ_ONCE(espfix_pages[page]);
 	if (likely(stack_page))
 		goto done;
 
 	mutex_lock(&espfix_init_mutex);
 
 	/* Did we race on the lock? */
-	stack_page = ACCESS_ONCE(espfix_pages[page]);
+	stack_page = READ_ONCE(espfix_pages[page]);
 	if (stack_page)
 		goto unlock_done;
 
@@ -200,7 +200,7 @@ void init_espfix_ap(int cpu)
 		set_pte(&pte_p[n*PTE_STRIDE], pte);
 
 	/* Job is done for this CPU and any CPU which shares this page */
-	ACCESS_ONCE(espfix_pages[page]) = stack_page;
+	WRITE_ONCE(espfix_pages[page], stack_page);
 
 unlock_done:
 	mutex_unlock(&espfix_init_mutex);
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index d913047f832c..2954fab15e51 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * x86 FPU bug checks:
  */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index e1114f070c2d..f92a6593de1e 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -100,7 +100,7 @@ void __kernel_fpu_begin(void)
 
 	kernel_fpu_disable();
 
-	if (fpu->fpregs_active) {
+	if (fpu->initialized) {
 		/*
 		 * Ignore return value -- we don't care if reg state
 		 * is clobbered.
@@ -116,7 +116,7 @@ void __kernel_fpu_end(void)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (fpu->fpregs_active)
+	if (fpu->initialized)
 		copy_kernel_to_fpregs(&fpu->state);
 
 	kernel_fpu_enable();
@@ -148,7 +148,7 @@ void fpu__save(struct fpu *fpu)
 
 	preempt_disable();
 	trace_x86_fpu_before_save(fpu);
-	if (fpu->fpregs_active) {
+	if (fpu->initialized) {
 		if (!copy_fpregs_to_fpstate(fpu)) {
 			copy_kernel_to_fpregs(&fpu->state);
 		}
@@ -189,10 +189,9 @@ EXPORT_SYMBOL_GPL(fpstate_init);
 
 int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 {
-	dst_fpu->fpregs_active = 0;
 	dst_fpu->last_cpu = -1;
 
-	if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
+	if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
 		return 0;
 
 	WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -206,26 +205,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
 	/*
 	 * Save current FPU registers directly into the child
 	 * FPU context, without any memory-to-memory copying.
-	 * In lazy mode, if the FPU context isn't loaded into
-	 * fpregs, CR0.TS will be set and do_device_not_available
-	 * will load the FPU context.
 	 *
-	 * We have to do all this with preemption disabled,
-	 * mostly because of the FNSAVE case, because in that
-	 * case we must not allow preemption in the window
-	 * between the FNSAVE and us marking the context lazy.
-	 *
-	 * It shouldn't be an issue as even FNSAVE is plenty
-	 * fast in terms of critical section length.
+	 * ( The function 'fails' in the FNSAVE case, which destroys
+	 *   register contents so we have to copy them back. )
 	 */
-	preempt_disable();
 	if (!copy_fpregs_to_fpstate(dst_fpu)) {
-		memcpy(&src_fpu->state, &dst_fpu->state,
-		       fpu_kernel_xstate_size);
-
+		memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
 		copy_kernel_to_fpregs(&src_fpu->state);
 	}
-	preempt_enable();
 
 	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
@@ -237,45 +224,48 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
  * Activate the current task's in-memory FPU context,
  * if it has not been used before:
  */
-void fpu__activate_curr(struct fpu *fpu)
+void fpu__initialize(struct fpu *fpu)
 {
 	WARN_ON_FPU(fpu != &current->thread.fpu);
 
-	if (!fpu->fpstate_active) {
+	if (!fpu->initialized) {
 		fpstate_init(&fpu->state);
 		trace_x86_fpu_init_state(fpu);
 
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for the current task: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 }
-EXPORT_SYMBOL_GPL(fpu__activate_curr);
+EXPORT_SYMBOL_GPL(fpu__initialize);
 
 /*
  * This function must be called before we read a task's fpstate.
  *
- * If the task has not used the FPU before then initialize its
- * fpstate.
+ * There's two cases where this gets called:
+ *
+ * - for the current task (when coredumping), in which case we have
+ *   to save the latest FPU registers into the fpstate,
+ *
+ * - or it's called for stopped tasks (ptrace), in which case the
+ *   registers were already saved by the context-switch code when
+ *   the task scheduled out - we only have to initialize the registers
+ *   if they've never been initialized.
  *
  * If the task has used the FPU before then save it.
  */
-void fpu__activate_fpstate_read(struct fpu *fpu)
+void fpu__prepare_read(struct fpu *fpu)
 {
-	/*
-	 * If fpregs are active (in the current CPU), then
-	 * copy them to the fpstate:
-	 */
-	if (fpu->fpregs_active) {
+	if (fpu == &current->thread.fpu) {
 		fpu__save(fpu);
 	} else {
-		if (!fpu->fpstate_active) {
+		if (!fpu->initialized) {
 			fpstate_init(&fpu->state);
 			trace_x86_fpu_init_state(fpu);
 
 			trace_x86_fpu_activate_state(fpu);
 			/* Safe to do for current and for stopped child tasks: */
-			fpu->fpstate_active = 1;
+			fpu->initialized = 1;
 		}
 	}
 }
@@ -283,17 +273,17 @@ void fpu__activate_fpstate_read(struct fpu *fpu)
 /*
  * This function must be called before we write a task's fpstate.
  *
- * If the task has used the FPU before then unlazy it.
+ * If the task has used the FPU before then invalidate any cached FPU registers.
  * If the task has not used the FPU before then initialize its fpstate.
  *
  * After this function call, after registers in the fpstate are
  * modified and the child task has woken up, the child task will
  * restore the modified FPU state from the modified context. If we
- * didn't clear its lazy status here then the lazy in-registers
+ * didn't clear its cached status here then the cached in-registers
  * state pending on its former CPU could be restored, corrupting
  * the modifications.
  */
-void fpu__activate_fpstate_write(struct fpu *fpu)
+void fpu__prepare_write(struct fpu *fpu)
 {
 	/*
 	 * Only stopped child tasks can be used to modify the FPU
@@ -301,8 +291,8 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 	 */
 	WARN_ON_FPU(fpu == &current->thread.fpu);
 
-	if (fpu->fpstate_active) {
-		/* Invalidate any lazy state: */
+	if (fpu->initialized) {
+		/* Invalidate any cached state: */
 		__fpu_invalidate_fpregs_state(fpu);
 	} else {
 		fpstate_init(&fpu->state);
@@ -310,74 +300,11 @@ void fpu__activate_fpstate_write(struct fpu *fpu)
 
 		trace_x86_fpu_activate_state(fpu);
 		/* Safe to do for stopped child tasks: */
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 	}
 }
 
 /*
- * This function must be called before we write the current
- * task's fpstate.
- *
- * This call gets the current FPU register state and moves
- * it in to the 'fpstate'.  Preemption is disabled so that
- * no writes to the 'fpstate' can occur from context
- * swiches.
- *
- * Must be followed by a fpu__current_fpstate_write_end().
- */
-void fpu__current_fpstate_write_begin(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * Ensure that the context-switching code does not write
-	 * over the fpstate while we are doing our update.
-	 */
-	preempt_disable();
-
-	/*
-	 * Move the fpregs in to the fpu's 'fpstate'.
-	 */
-	fpu__activate_fpstate_read(fpu);
-
-	/*
-	 * The caller is about to write to 'fpu'.  Ensure that no
-	 * CPU thinks that its fpregs match the fpstate.  This
-	 * ensures we will not be lazy and skip a XRSTOR in the
-	 * future.
-	 */
-	__fpu_invalidate_fpregs_state(fpu);
-}
-
-/*
- * This function must be paired with fpu__current_fpstate_write_begin()
- *
- * This will ensure that the modified fpstate gets placed back in
- * the fpregs if necessary.
- *
- * Note: This function may be called whether or not an _actual_
- * write to the fpstate occurred.
- */
-void fpu__current_fpstate_write_end(void)
-{
-	struct fpu *fpu = &current->thread.fpu;
-
-	/*
-	 * 'fpu' now has an updated copy of the state, but the
-	 * registers may still be out of date.  Update them with
-	 * an XRSTOR if they are active.
-	 */
-	if (fpregs_active())
-		copy_kernel_to_fpregs(&fpu->state);
-
-	/*
-	 * Our update is done and the fpregs/fpstate are in sync
-	 * if necessary.  Context switches can happen again.
-	 */
-	preempt_enable();
-}
-
-/*
  * 'fpu__restore()' is called to copy FPU registers from
  * the FPU fpstate to the live hw registers and to activate
  * access to the hardware registers, so that FPU instructions
@@ -389,7 +316,7 @@ void fpu__current_fpstate_write_end(void)
  */
 void fpu__restore(struct fpu *fpu)
 {
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	/* Avoid __kernel_fpu_begin() right after fpregs_activate() */
 	kernel_fpu_disable();
@@ -414,15 +341,17 @@ void fpu__drop(struct fpu *fpu)
 {
 	preempt_disable();
 
-	if (fpu->fpregs_active) {
-		/* Ignore delayed exceptions from user space */
-		asm volatile("1: fwait\n"
-			     "2:\n"
-			     _ASM_EXTABLE(1b, 2b));
-		fpregs_deactivate(fpu);
+	if (fpu == &current->thread.fpu) {
+		if (fpu->initialized) {
+			/* Ignore delayed exceptions from user space */
+			asm volatile("1: fwait\n"
+				     "2:\n"
+				     _ASM_EXTABLE(1b, 2b));
+			fpregs_deactivate(fpu);
+		}
 	}
 
-	fpu->fpstate_active = 0;
+	fpu->initialized = 0;
 
 	trace_x86_fpu_dropped(fpu);
 
@@ -462,9 +391,11 @@ void fpu__clear(struct fpu *fpu)
 	 * Make sure fpstate is cleared and initialized.
 	 */
 	if (static_cpu_has(X86_FEATURE_FPU)) {
-		fpu__activate_curr(fpu);
+		preempt_disable();
+		fpu__initialize(fpu);
 		user_fpu_begin();
 		copy_init_fpstate_to_fpregs();
+		preempt_enable();
 	}
 }
 
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index d5d44c452624..6abd83572b01 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -240,7 +240,7 @@ static void __init fpu__init_system_ctx_switch(void)
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
 
-	WARN_ON_FPU(current->thread.fpu.fpstate_active);
+	WARN_ON_FPU(current->thread.fpu.initialized);
 }
 
 /*
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
  */
 static void __init fpu__init_parse_early_param(void)
 {
+	char arg[32];
+	char *argptr = arg;
+	int bit;
+
 	if (cmdline_find_option_bool(boot_command_line, "no387"))
 		setup_clear_cpu_cap(X86_FEATURE_FPU);
 
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
 
 	if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
 		setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+	if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
+				sizeof(arg)) &&
+	    get_option(&argptr, &bit) &&
+	    bit >= 0 &&
+	    bit < NCAPINTS * 32)
+		setup_clear_cpu_cap(bit);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index b188b16841e3..bc02f5144b95 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPU register's regset abstraction, for ptrace, core dumps, etc.
  */
@@ -16,14 +17,14 @@ int regset_fpregs_active(struct task_struct *target, const struct user_regset *r
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 
-	return target_fpu->fpstate_active ? regset->n : 0;
+	return target_fpu->initialized ? regset->n : 0;
 }
 
 int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
 {
 	struct fpu *target_fpu = &target->thread.fpu;
 
-	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+	if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
 		return regset->n;
 	else
 		return 0;
@@ -38,7 +39,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
@@ -55,7 +56,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
 	if (!boot_cpu_has(X86_FEATURE_FXSR))
 		return -ENODEV;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
@@ -89,10 +90,13 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
 
 	xsave = &fpu->state.xsave;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 	if (using_compacted_format()) {
-		ret = copyout_from_xsaves(pos, count, kbuf, ubuf, xsave);
+		if (kbuf)
+			ret = copy_xstate_to_kernel(kbuf, xsave, pos, count);
+		else
+			ret = copy_xstate_to_user(ubuf, xsave, pos, count);
 	} else {
 		fpstate_sanitize_xstate(fpu);
 		/*
@@ -129,28 +133,29 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
 
 	xsave = &fpu->state.xsave;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 
-	if (boot_cpu_has(X86_FEATURE_XSAVES))
-		ret = copyin_to_xsaves(kbuf, ubuf, xsave);
-	else
+	if (using_compacted_format()) {
+		if (kbuf)
+			ret = copy_kernel_to_xstate(xsave, kbuf);
+		else
+			ret = copy_user_to_xstate(xsave, ubuf);
+	} else {
 		ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
-
-	/*
-	 * In case of failure, mark all states as init:
-	 */
-	if (ret)
-		fpstate_init(&fpu->state);
+		if (!ret)
+			ret = validate_xstate_header(&xsave->header);
+	}
 
 	/*
 	 * mxcsr reserved bits must be masked to zero for security reasons.
 	 */
 	xsave->i387.mxcsr &= mxcsr_feature_mask;
-	xsave->header.xfeatures &= xfeatures_mask;
+
 	/*
-	 * These bits must be zero.
+	 * In case of failure, mark all states as init:
 	 */
-	memset(&xsave->header.reserved, 0, 48);
+	if (ret)
+		fpstate_init(&fpu->state);
 
 	return ret;
 }
@@ -299,7 +304,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
 	struct fpu *fpu = &target->thread.fpu;
 	struct user_i387_ia32_struct env;
 
-	fpu__activate_fpstate_read(fpu);
+	fpu__prepare_read(fpu);
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
@@ -329,7 +334,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
 	struct user_i387_ia32_struct env;
 	int ret;
 
-	fpu__activate_fpstate_write(fpu);
+	fpu__prepare_write(fpu);
 	fpstate_sanitize_xstate(fpu);
 
 	if (!boot_cpu_has(X86_FEATURE_FPU))
@@ -369,7 +374,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
 	struct fpu *fpu = &tsk->thread.fpu;
 	int fpvalid;
 
-	fpvalid = fpu->fpstate_active;
+	fpvalid = fpu->initialized;
 	if (fpvalid)
 		fpvalid = !fpregs_get(tsk, NULL,
 				      0, sizeof(struct user_i387_ia32_struct),
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 83c23c230b4c..23f1691670b6 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * FPU signal frame handling routines.
  */
@@ -155,7 +156,8 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
  */
 int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 {
-	struct xregs_state *xsave = &current->thread.fpu.state.xsave;
+	struct fpu *fpu = &current->thread.fpu;
+	struct xregs_state *xsave = &fpu->state.xsave;
 	struct task_struct *tsk = current;
 	int ia32_fxstate = (buf != buf_fx);
 
@@ -170,13 +172,13 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			sizeof(struct user_i387_ia32_struct), NULL,
 			(struct _fpstate_32 __user *) buf) ? -1 : 1;
 
-	if (fpregs_active() || using_compacted_format()) {
+	if (fpu->initialized || using_compacted_format()) {
 		/* Save the live register state to the user directly. */
 		if (copy_fpregs_to_sigframe(buf_fx))
 			return -1;
 		/* Update the thread's fxstate to save the fsave header. */
 		if (ia32_fxstate)
-			copy_fxregs_to_kernel(&tsk->thread.fpu);
+			copy_fxregs_to_kernel(fpu);
 	} else {
 		/*
 		 * It is a *bug* if kernel uses compacted-format for xsave
@@ -189,7 +191,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
 			return -1;
 		}
 
-		fpstate_sanitize_xstate(&tsk->thread.fpu);
+		fpstate_sanitize_xstate(fpu);
 		if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
 			return -1;
 	}
@@ -213,8 +215,11 @@ sanitize_restored_xstate(struct task_struct *tsk,
 	struct xstate_header *header = &xsave->header;
 
 	if (use_xsave()) {
-		/* These bits must be zero. */
-		memset(header->reserved, 0, 48);
+		/*
+		 * Note: we don't need to zero the reserved bits in the
+		 * xstate_header here because we either didn't copy them at all,
+		 * or we checked earlier that they aren't set.
+		 */
 
 		/*
 		 * Init the state that is not present in the memory
@@ -223,7 +228,7 @@ sanitize_restored_xstate(struct task_struct *tsk,
 		if (fx_only)
 			header->xfeatures = XFEATURE_MASK_FPSSE;
 		else
-			header->xfeatures &= (xfeatures_mask & xfeatures);
+			header->xfeatures &= xfeatures;
 	}
 
 	if (use_fxsr()) {
@@ -279,7 +284,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 	if (!access_ok(VERIFY_READ, buf, size))
 		return -EACCES;
 
-	fpu__activate_curr(fpu);
+	fpu__initialize(fpu);
 
 	if (!static_cpu_has(X86_FEATURE_FPU))
 		return fpregs_soft_set(current, NULL,
@@ -307,28 +312,29 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 		/*
 		 * For 32-bit frames with fxstate, copy the user state to the
 		 * thread's fpu state, reconstruct fxstate from the fsave
-		 * header. Sanitize the copied state etc.
+		 * header. Validate and sanitize the copied state.
 		 */
 		struct fpu *fpu = &tsk->thread.fpu;
 		struct user_i387_ia32_struct env;
 		int err = 0;
 
 		/*
-		 * Drop the current fpu which clears fpu->fpstate_active. This ensures
+		 * Drop the current fpu which clears fpu->initialized. This ensures
 		 * that any context-switch during the copy of the new state,
 		 * avoids the intermediate state from getting restored/saved.
 		 * Thus avoiding the new restored state from getting corrupted.
 		 * We will be ready to restore/save the state only after
-		 * fpu->fpstate_active is again set.
+		 * fpu->initialized is again set.
 		 */
 		fpu__drop(fpu);
 
 		if (using_compacted_format()) {
-			err = copyin_to_xsaves(NULL, buf_fx,
-					       &fpu->state.xsave);
+			err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
 		} else {
-			err = __copy_from_user(&fpu->state.xsave,
-					       buf_fx, state_size);
+			err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+
+			if (!err && state_size > offsetof(struct xregs_state, header))
+				err = validate_xstate_header(&fpu->state.xsave.header);
 		}
 
 		if (err || __copy_from_user(&env, buf, sizeof(env))) {
@@ -339,7 +345,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
 			sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
 		}
 
-		fpu->fpstate_active = 1;
+		fpu->initialized = 1;
 		preempt_disable();
 		fpu__restore(fpu);
 		preempt_enable();
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index c24ac1efb12d..87a57b7642d3 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
 #include <asm/fpu/xstate.h>
 
 #include <asm/tlbflush.h>
+#include <asm/cpufeature.h>
 
 /*
  * Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
 	"unknown xstate feature"	,
 };
 
+static short xsave_cpuid_features[] __initdata = {
+	X86_FEATURE_FPU,
+	X86_FEATURE_XMM,
+	X86_FEATURE_AVX,
+	X86_FEATURE_MPX,
+	X86_FEATURE_MPX,
+	X86_FEATURE_AVX512F,
+	X86_FEATURE_AVX512F,
+	X86_FEATURE_AVX512F,
+	X86_FEATURE_INTEL_PT,
+	X86_FEATURE_PKU,
+};
+
 /*
  * Mask of xstate features supported by the CPU and the kernel:
  */
@@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
 void fpu__xstate_clear_all_cpu_caps(void)
 {
 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-	setup_clear_cpu_cap(X86_FEATURE_AVX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX2);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
-	setup_clear_cpu_cap(X86_FEATURE_MPX);
-	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
-	setup_clear_cpu_cap(X86_FEATURE_PKU);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
 }
 
 /*
@@ -483,6 +477,30 @@ int using_compacted_format(void)
 	return boot_cpu_has(X86_FEATURE_XSAVES);
 }
 
+/* Validate an xstate header supplied by userspace (ptrace or sigreturn) */
+int validate_xstate_header(const struct xstate_header *hdr)
+{
+	/* No unknown or supervisor features may be set */
+	if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR))
+		return -EINVAL;
+
+	/* Userspace must use the uncompacted format */
+	if (hdr->xcomp_bv)
+		return -EINVAL;
+
+	/*
+	 * If 'reserved' is shrunken to add a new field, make sure to validate
+	 * that new field here!
+	 */
+	BUILD_BUG_ON(sizeof(hdr->reserved) != 48);
+
+	/* No reserved bits may be set */
+	if (memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
+		return -EINVAL;
+
+	return 0;
+}
+
 static void __xstate_dump_leaves(void)
 {
 	int i;
@@ -702,6 +720,7 @@ void __init fpu__init_system_xstate(void)
 	unsigned int eax, ebx, ecx, edx;
 	static int on_boot_cpu __initdata = 1;
 	int err;
+	int i;
 
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
@@ -735,6 +754,14 @@ void __init fpu__init_system_xstate(void)
 		goto out_disable;
 	}
 
+	/*
+	 * Clear XSAVE features that are disabled in the normal CPUID.
+	 */
+	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
+		if (!boot_cpu_has(xsave_cpuid_features[i]))
+			xfeatures_mask &= ~BIT(i);
+	}
+
 	xfeatures_mask &= fpu__get_supported_xfeatures_mask();
 
 	/* Enable xstate instructions to be able to continue with initialization: */
@@ -867,7 +894,7 @@ const void *get_xsave_field_ptr(int xsave_state)
 {
 	struct fpu *fpu = &current->thread.fpu;
 
-	if (!fpu->fpstate_active)
+	if (!fpu->initialized)
 		return NULL;
 	/*
 	 * fpu__save() takes the CPU's xstate registers
@@ -921,38 +948,129 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 #endif /* ! CONFIG_ARCH_HAS_PKEYS */
 
 /*
+ * Weird legacy quirk: SSE and YMM states store information in the
+ * MXCSR and MXCSR_FLAGS fields of the FP area. That means if the FP
+ * area is marked as unused in the xfeatures header, we need to copy
+ * MXCSR and MXCSR_FLAGS if either SSE or YMM are in use.
+ */
+static inline bool xfeatures_mxcsr_quirk(u64 xfeatures)
+{
+	if (!(xfeatures & (XFEATURE_MASK_SSE|XFEATURE_MASK_YMM)))
+		return false;
+
+	if (xfeatures & XFEATURE_MASK_FP)
+		return false;
+
+	return true;
+}
+
+/*
  * This is similar to user_regset_copyout(), but will not add offset to
  * the source data pointer or increment pos, count, kbuf, and ubuf.
  */
-static inline int xstate_copyout(unsigned int pos, unsigned int count,
-				 void *kbuf, void __user *ubuf,
-				 const void *data, const int start_pos,
-				 const int end_pos)
+static inline void
+__copy_xstate_to_kernel(void *kbuf, const void *data,
+			unsigned int offset, unsigned int size, unsigned int size_total)
 {
-	if ((count == 0) || (pos < start_pos))
-		return 0;
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
 
-	if (end_pos < 0 || pos < end_pos) {
-		unsigned int copy = (end_pos < 0 ? count : min(count, end_pos - pos));
+		memcpy(kbuf + offset, data, copy);
+	}
+}
 
-		if (kbuf) {
-			memcpy(kbuf + pos, data, copy);
-		} else {
-			if (__copy_to_user(ubuf + pos, data, copy))
-				return -EFAULT;
+/*
+ * Convert from kernel XSAVES compacted format to standard format and copy
+ * to a kernel-space ptrace buffer.
+ *
+ * It supports partial copy but pos always starts from zero. This is called
+ * from xstateregs_get() and there we check the CPU has XSAVES.
+ */
+int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
+{
+	unsigned int offset, size;
+	struct xstate_header header;
+	int i;
+
+	/*
+	 * Currently copy_regset_to_user() starts from pos 0:
+	 */
+	if (unlikely(offset_start != 0))
+		return -EFAULT;
+
+	/*
+	 * The destination is a ptrace buffer; we put in only user xstates:
+	 */
+	memset(&header, 0, sizeof(header));
+	header.xfeatures = xsave->header.xfeatures;
+	header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Copy xregs_state->header:
+	 */
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(header);
+
+	__copy_xstate_to_kernel(kbuf, &header, offset, size, size_total);
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		/*
+		 * Copy only in-use xstates:
+		 */
+		if ((header.xfeatures >> i) & 1) {
+			void *src = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
+
+			__copy_xstate_to_kernel(kbuf, src, offset, size, size_total);
 		}
+
+	}
+
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_kernel(kbuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
+	/*
+	 * Fill xsave->i387.sw_reserved value for ptrace frame:
+	 */
+	offset = offsetof(struct fxregs_state, sw_reserved);
+	size = sizeof(xstate_fx_sw_bytes);
+
+	__copy_xstate_to_kernel(kbuf, xstate_fx_sw_bytes, offset, size, size_total);
+
+	return 0;
+}
+
+static inline int
+__copy_xstate_to_user(void __user *ubuf, const void *data, unsigned int offset, unsigned int size, unsigned int size_total)
+{
+	if (!size)
+		return 0;
+
+	if (offset < size_total) {
+		unsigned int copy = min(size, size_total - offset);
+
+		if (__copy_to_user(ubuf + offset, data, copy))
+			return -EFAULT;
 	}
 	return 0;
 }
 
 /*
  * Convert from kernel XSAVES compacted format to standard format and copy
- * to a ptrace buffer. It supports partial copy but pos always starts from
+ * to a user-space buffer. It supports partial copy but pos always starts from
  * zero. This is called from xstateregs_get() and there we check the CPU
  * has XSAVES.
  */
-int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
-			void __user *ubuf, struct xregs_state *xsave)
+int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset_start, unsigned int size_total)
 {
 	unsigned int offset, size;
 	int ret, i;
@@ -961,7 +1079,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 	/*
 	 * Currently copy_regset_to_user() starts from pos 0:
 	 */
-	if (unlikely(pos != 0))
+	if (unlikely(offset_start != 0))
 		return -EFAULT;
 
 	/*
@@ -977,8 +1095,7 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 	offset = offsetof(struct xregs_state, header);
 	size = sizeof(header);
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, &header, 0, count);
-
+	ret = __copy_xstate_to_user(ubuf, &header, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -992,25 +1109,30 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			ret = xstate_copyout(offset, size, kbuf, ubuf, src, 0, count);
+			/* The next component has to fit fully into the output buffer: */
+			if (offset + size > size_total)
+				break;
 
+			ret = __copy_xstate_to_user(ubuf, src, offset, size, size_total);
 			if (ret)
 				return ret;
-
-			if (offset + size >= count)
-				break;
 		}
 
 	}
 
+	if (xfeatures_mxcsr_quirk(header.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		__copy_xstate_to_user(ubuf, &xsave->i387.mxcsr, offset, size, size_total);
+	}
+
 	/*
 	 * Fill xsave->i387.sw_reserved value for ptrace frame:
 	 */
 	offset = offsetof(struct fxregs_state, sw_reserved);
 	size = sizeof(xstate_fx_sw_bytes);
 
-	ret = xstate_copyout(offset, size, kbuf, ubuf, xstate_fx_sw_bytes, 0, count);
-
+	ret = __copy_xstate_to_user(ubuf, xstate_fx_sw_bytes, offset, size, size_total);
 	if (ret)
 		return ret;
 
@@ -1018,55 +1140,98 @@ int copyout_from_xsaves(unsigned int pos, unsigned int count, void *kbuf,
 }
 
 /*
- * Convert from a ptrace standard-format buffer to kernel XSAVES format
- * and copy to the target thread. This is called from xstateregs_set() and
- * there we check the CPU has XSAVES and a whole standard-sized buffer
- * exists.
+ * Convert from a ptrace standard-format kernel buffer to kernel XSAVES format
+ * and copy to the target thread. This is called from xstateregs_set().
  */
-int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
-		     struct xregs_state *xsave)
+int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
 {
 	unsigned int offset, size;
 	int i;
-	u64 xfeatures;
-	u64 allowed_features;
+	struct xstate_header hdr;
 
 	offset = offsetof(struct xregs_state, header);
-	size = sizeof(xfeatures);
+	size = sizeof(hdr);
 
-	if (kbuf) {
-		memcpy(&xfeatures, kbuf + offset, size);
-	} else {
-		if (__copy_from_user(&xfeatures, ubuf + offset, size))
-			return -EFAULT;
+	memcpy(&hdr, kbuf + offset, size);
+
+	if (validate_xstate_header(&hdr))
+		return -EINVAL;
+
+	for (i = 0; i < XFEATURE_MAX; i++) {
+		u64 mask = ((u64)1 << i);
+
+		if (hdr.xfeatures & mask) {
+			void *dst = __raw_xsave_addr(xsave, 1 << i);
+
+			offset = xstate_offsets[i];
+			size = xstate_sizes[i];
+
+			memcpy(dst, kbuf + offset, size);
+		}
+	}
+
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		memcpy(&xsave->i387.mxcsr, kbuf + offset, size);
 	}
 
 	/*
-	 * Reject if the user sets any disabled or supervisor features:
+	 * The state that came in from userspace was user-state only.
+	 * Mask all the user states out of 'xfeatures':
+	 */
+	xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR;
+
+	/*
+	 * Add back in the features that came in from userspace:
 	 */
-	allowed_features = xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR;
+	xsave->header.xfeatures |= hdr.xfeatures;
+
+	return 0;
+}
 
-	if (xfeatures & ~allowed_features)
+/*
+ * Convert from a ptrace or sigreturn standard-format user-space buffer to
+ * kernel XSAVES format and copy to the target thread. This is called from
+ * xstateregs_set(), as well as potentially from the sigreturn() and
+ * rt_sigreturn() system calls.
+ */
+int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
+{
+	unsigned int offset, size;
+	int i;
+	struct xstate_header hdr;
+
+	offset = offsetof(struct xregs_state, header);
+	size = sizeof(hdr);
+
+	if (__copy_from_user(&hdr, ubuf + offset, size))
+		return -EFAULT;
+
+	if (validate_xstate_header(&hdr))
 		return -EINVAL;
 
 	for (i = 0; i < XFEATURE_MAX; i++) {
 		u64 mask = ((u64)1 << i);
 
-		if (xfeatures & mask) {
+		if (hdr.xfeatures & mask) {
 			void *dst = __raw_xsave_addr(xsave, 1 << i);
 
 			offset = xstate_offsets[i];
 			size = xstate_sizes[i];
 
-			if (kbuf) {
-				memcpy(dst, kbuf + offset, size);
-			} else {
-				if (__copy_from_user(dst, ubuf + offset, size))
-					return -EFAULT;
-			}
+			if (__copy_from_user(dst, ubuf + offset, size))
+				return -EFAULT;
 		}
 	}
 
+	if (xfeatures_mxcsr_quirk(hdr.xfeatures)) {
+		offset = offsetof(struct fxregs_state, mxcsr);
+		size = MXCSR_AND_FLAGS_SIZE;
+		if (__copy_from_user(&xsave->i387.mxcsr, ubuf + offset, size))
+			return -EFAULT;
+	}
+
 	/*
 	 * The state that came in from userspace was user-state only.
 	 * Mask all the user states out of 'xfeatures':
@@ -1076,7 +1241,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf,
 	/*
 	 * Add back in the features that came in from userspace:
 	 */
-	xsave->header.xfeatures |= xfeatures;
+	xsave->header.xfeatures |= hdr.xfeatures;
 
 	return 0;
 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 9bef1bbeba63..01ebcb6f263e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Dynamic function tracing support.
  *
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 722a145b4139..b6c6468e10bc 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  Copyright (C) 2017  Steven Rostedt, VMware Inc.
  */
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 1dfac634bbf7..c832291d948a 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  Copyright (C) 2014  Steven Rostedt, Red Hat Inc
  */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index cf2ce063f65a..ec6fefbfd3c0 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  linux/arch/i386/kernel/head32.c -- prepare to run common code
  *
@@ -30,10 +31,11 @@ static void __init i386_default_early_setup(void)
 
 asmlinkage __visible void __init i386_start_kernel(void)
 {
-	cr4_init_shadow();
-
+	/* Make sure IDT is set up before any exception happens */
 	idt_setup_early_handler();
 
+	cr4_init_shadow();
+
 	sanitize_boot_params(&boot_params);
 
 	x86_early_init_platform_quirks();
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index bab4fa579450..6a5d757b9cfd 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  prepare to run common code
  *
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 9ed3074d0d27..c29020907886 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
@@ -211,9 +212,6 @@ ENTRY(startup_32_smp)
 #endif
 
 .Ldefault_entry:
-#define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
-			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
-			 X86_CR0_PG)
 	movl $(CR0_STATE & ~X86_CR0_PG),%eax
 	movl %eax,%cr0
 
@@ -401,7 +399,7 @@ ENTRY(early_idt_handler_array)
 	# 24(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
+	.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
 	pushl $0		# Dummy error code, to make stack frame uniform
 	.endif
 	pushl $i		# 20(%esp) Vector number
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 513cbb012ecc..7dca675fe78d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  *  linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
  *
@@ -37,11 +38,12 @@
  *
  */
 
-#define p4d_index(x)	(((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
 #define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
 PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
 PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
+#endif
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
 	.text
@@ -49,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
 	.code64
 	.globl startup_64
 startup_64:
+	UNWIND_HINT_EMPTY
 	/*
 	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
 	 * and someone has loaded an identity mapped page table
@@ -88,6 +91,7 @@ startup_64:
 	addq	$(early_top_pgt - __START_KERNEL_map), %rax
 	jmp 1f
 ENTRY(secondary_startup_64)
+	UNWIND_HINT_EMPTY
 	/*
 	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
 	 * and someone has loaded a mapped page table.
@@ -132,6 +136,7 @@ ENTRY(secondary_startup_64)
 	movq	$1f, %rax
 	jmp	*%rax
 1:
+	UNWIND_HINT_EMPTY
 
 	/* Check if nx is implemented */
 	movl	$0x80000001, %eax
@@ -149,9 +154,6 @@ ENTRY(secondary_startup_64)
 1:	wrmsr				/* Make changes effective */
 
 	/* Setup cr0 */
-#define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
-			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
-			 X86_CR0_PG)
 	movl	$CR0_STATE, %eax
 	/* Make changes effective */
 	movq	%rax, %cr0
@@ -234,7 +236,7 @@ ENTRY(secondary_startup_64)
 	pushq	%rax		# target address in negative space
 	lretq
 .Lafter_lret:
-ENDPROC(secondary_startup_64)
+END(secondary_startup_64)
 
 #include "verify_cpu.S"
 
@@ -246,6 +248,7 @@ ENDPROC(secondary_startup_64)
  */
 ENTRY(start_cpu0)
 	movq	initial_stack(%rip), %rsp
+	UNWIND_HINT_EMPTY
 	jmp	.Ljump_to_C_code
 ENDPROC(start_cpu0)
 #endif
@@ -265,26 +268,24 @@ ENDPROC(start_cpu0)
 	.quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
 	__FINITDATA
 
-bad_address:
-	jmp bad_address
-
 	__INIT
 ENTRY(early_idt_handler_array)
-	# 104(%rsp) %rflags
-	#  96(%rsp) %cs
-	#  88(%rsp) %rip
-	#  80(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
-	pushq $0		# Dummy error code, to make stack frame uniform
+	.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
+		UNWIND_HINT_IRET_REGS
+		pushq $0	# Dummy error code, to make stack frame uniform
+	.else
+		UNWIND_HINT_IRET_REGS offset=8
 	.endif
 	pushq $i		# 72(%rsp) Vector number
 	jmp early_idt_handler_common
+	UNWIND_HINT_IRET_REGS
 	i = i + 1
 	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
-ENDPROC(early_idt_handler_array)
+	UNWIND_HINT_IRET_REGS offset=16
+END(early_idt_handler_array)
 
 early_idt_handler_common:
 	/*
@@ -312,6 +313,7 @@ early_idt_handler_common:
 	pushq %r13				/* pt_regs->r13 */
 	pushq %r14				/* pt_regs->r14 */
 	pushq %r15				/* pt_regs->r15 */
+	UNWIND_HINT_REGS
 
 	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
@@ -326,8 +328,8 @@ early_idt_handler_common:
 
 20:
 	decl early_recursion_flag(%rip)
-	jmp restore_regs_and_iret
-ENDPROC(early_idt_handler_common)
+	jmp restore_regs_and_return_to_kernel
+END(early_idt_handler_common)
 
 	__INITDATA
 
@@ -361,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
 
 	.data
 
-#ifndef CONFIG_XEN
-NEXT_PAGE(init_top_pgt)
-	.fill	512,8,0
-#else
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
 NEXT_PAGE(init_top_pgt)
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
@@ -381,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
 	 * Don't set NX because code runs from these pages.
 	 */
 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+#else
+NEXT_PAGE(init_top_pgt)
+	.fill	512,8,0
 #endif
 
 #ifdef CONFIG_X86_5LEVEL
@@ -434,7 +436,7 @@ ENTRY(phys_base)
 EXPORT_SYMBOL(phys_base)
 
 #include "../../x86/xen/xen-head.S"
-	
+
 	__PAGE_ALIGNED_BSS
 NEXT_PAGE(empty_zero_page)
 	.skip PAGE_SIZE
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c
index 6ebe00cb4a3b..0d307a657abb 100644
--- a/arch/x86/kernel/i8253.c
+++ b/arch/x86/kernel/i8253.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * 8253/PIT functions
  *
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 4e3b8a587c88..86c4439f9d74 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
@@ -113,6 +114,7 @@ static void make_8259A_irq(unsigned int irq)
 	io_apic_irqs &= ~(1<<irq);
 	irq_set_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
 	enable_irq(irq);
+	lapic_assign_legacy_vector(irq, true);
 }
 
 /*
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 6107ee1cb8d5..d985cef3984f 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -92,8 +92,6 @@ static const __initdata struct idt_data def_idts[] = {
 	INTG(X86_TRAP_DF,		double_fault),
 #endif
 	INTG(X86_TRAP_DB,		debug),
-	INTG(X86_TRAP_NMI,		nmi),
-	INTG(X86_TRAP_BP,		int3),
 
 #ifdef CONFIG_X86_MCE
 	INTG(X86_TRAP_MC,		&machine_check),
@@ -225,7 +223,7 @@ idt_setup_from_table(gate_desc *idt, const struct idt_data *t, int size, bool sy
 		idt_init_desc(&desc, t);
 		write_idt_entry(idt, t->vector, &desc);
 		if (sys)
-			set_bit(t->vector, used_vectors);
+			set_bit(t->vector, system_vectors);
 	}
 }
 
@@ -313,14 +311,14 @@ void __init idt_setup_apic_and_irq_gates(void)
 
 	idt_setup_from_table(idt_table, apic_idts, ARRAY_SIZE(apic_idts), true);
 
-	for_each_clear_bit_from(i, used_vectors, FIRST_SYSTEM_VECTOR) {
+	for_each_clear_bit_from(i, system_vectors, FIRST_SYSTEM_VECTOR) {
 		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
 		set_intr_gate(i, entry);
 	}
 
-	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+	for_each_clear_bit_from(i, system_vectors, NR_VECTORS) {
 #ifdef CONFIG_X86_LOCAL_APIC
-		set_bit(i, used_vectors);
+		set_bit(i, system_vectors);
 		set_intr_gate(i, spurious_interrupt);
 #else
 		entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR);
@@ -358,7 +356,7 @@ void idt_invalidate(void *addr)
 
 void __init update_intr_gate(unsigned int n, const void *addr)
 {
-	if (WARN_ON_ONCE(!test_bit(n, used_vectors)))
+	if (WARN_ON_ONCE(!test_bit(n, system_vectors)))
 		return;
 	set_intr_gate(n, addr);
 }
@@ -366,6 +364,6 @@ void __init update_intr_gate(unsigned int n, const void *addr)
 void alloc_intr_gate(unsigned int n, const void *addr)
 {
 	BUG_ON(n < FIRST_SYSTEM_VECTOR);
-	if (!test_and_set_bit(n, used_vectors))
+	if (!test_and_set_bit(n, system_vectors))
 		set_intr_gate(n, addr);
 }
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c
index 7ebcc4a74438..805b7a341aca 100644
--- a/arch/x86/kernel/io_delay.c
+++ b/arch/x86/kernel/io_delay.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * I/O delay strategies for inb_p/outb_p
  *
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 9c3cf0944bce..3feb648781c4 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * This contains the io-permission bitmap code - written by obz, with changes
  * by Linus. 32/64 bits code unification by Miguel Botón.
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 52089c043160..49cfd9fe7589 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -134,7 +134,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_puts(p, "  Machine check polls\n");
 #endif
 #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN)
-	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) {
+	if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
 		seq_printf(p, "%*s: ", prec, "HYP");
 		for_each_online_cpu(j)
 			seq_printf(p, "%10u ",
@@ -333,105 +333,6 @@ __visible void smp_kvm_posted_intr_nested_ipi(struct pt_regs *regs)
 
 
 #ifdef CONFIG_HOTPLUG_CPU
-
-/* These two declarations are only used in check_irq_vectors_for_cpu_disable()
- * below, which is protected by stop_machine().  Putting them on the stack
- * results in a stack frame overflow.  Dynamically allocating could result in a
- * failure so declare these two cpumasks as global.
- */
-static struct cpumask affinity_new, online_new;
-
-/*
- * This cpu is going to be removed and its vectors migrated to the remaining
- * online cpus.  Check to see if there are enough vectors in the remaining cpus.
- * This function is protected by stop_machine().
- */
-int check_irq_vectors_for_cpu_disable(void)
-{
-	unsigned int this_cpu, vector, this_count, count;
-	struct irq_desc *desc;
-	struct irq_data *data;
-	int cpu;
-
-	this_cpu = smp_processor_id();
-	cpumask_copy(&online_new, cpu_online_mask);
-	cpumask_clear_cpu(this_cpu, &online_new);
-
-	this_count = 0;
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		desc = __this_cpu_read(vector_irq[vector]);
-		if (IS_ERR_OR_NULL(desc))
-			continue;
-		/*
-		 * Protect against concurrent action removal, affinity
-		 * changes etc.
-		 */
-		raw_spin_lock(&desc->lock);
-		data = irq_desc_get_irq_data(desc);
-		cpumask_copy(&affinity_new,
-			     irq_data_get_affinity_mask(data));
-		cpumask_clear_cpu(this_cpu, &affinity_new);
-
-		/* Do not count inactive or per-cpu irqs. */
-		if (!irq_desc_has_action(desc) || irqd_is_per_cpu(data)) {
-			raw_spin_unlock(&desc->lock);
-			continue;
-		}
-
-		raw_spin_unlock(&desc->lock);
-		/*
-		 * A single irq may be mapped to multiple cpu's
-		 * vector_irq[] (for example IOAPIC cluster mode).  In
-		 * this case we have two possibilities:
-		 *
-		 * 1) the resulting affinity mask is empty; that is
-		 * this the down'd cpu is the last cpu in the irq's
-		 * affinity mask, or
-		 *
-		 * 2) the resulting affinity mask is no longer a
-		 * subset of the online cpus but the affinity mask is
-		 * not zero; that is the down'd cpu is the last online
-		 * cpu in a user set affinity mask.
-		 */
-		if (cpumask_empty(&affinity_new) ||
-		    !cpumask_subset(&affinity_new, &online_new))
-			this_count++;
-	}
-	/* No need to check any further. */
-	if (!this_count)
-		return 0;
-
-	count = 0;
-	for_each_online_cpu(cpu) {
-		if (cpu == this_cpu)
-			continue;
-		/*
-		 * We scan from FIRST_EXTERNAL_VECTOR to first system
-		 * vector. If the vector is marked in the used vectors
-		 * bitmap or an irq is assigned to it, we don't count
-		 * it as available.
-		 *
-		 * As this is an inaccurate snapshot anyway, we can do
-		 * this w/o holding vector_lock.
-		 */
-		for (vector = FIRST_EXTERNAL_VECTOR;
-		     vector < FIRST_SYSTEM_VECTOR; vector++) {
-			if (!test_bit(vector, used_vectors) &&
-			    IS_ERR_OR_NULL(per_cpu(vector_irq, cpu)[vector])) {
-				if (++count == this_count)
-					return 0;
-			}
-		}
-	}
-
-	if (count < this_count) {
-		pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n",
-			this_cpu, this_count, count);
-		return -ERANGE;
-	}
-	return 0;
-}
-
 /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
 void fixup_irqs(void)
 {
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 1f38d9a4d9de..a83b3346a0e1 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  *
@@ -64,7 +65,7 @@ static void call_on_stack(void *func, void *stack)
 
 static inline void *current_stack(void)
 {
-	return (void *)(current_stack_pointer() & ~(THREAD_SIZE - 1));
+	return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1));
 }
 
 static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
@@ -88,7 +89,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 
 	/* Save the next esp at the bottom of the stack */
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer();
+	*prev_esp = current_stack_pointer;
 
 	if (unlikely(overflow))
 		call_on_stack(print_stack_overflow, isp);
@@ -139,7 +140,7 @@ void do_softirq_own_stack(void)
 
 	/* Push the previous esp onto the stack */
 	prev_esp = (u32 *)irqstk;
-	*prev_esp = current_stack_pointer();
+	*prev_esp = current_stack_pointer;
 
 	call_on_stack(__do_softirq, isp);
 }
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 3be74fbdeff2..020efbf5786b 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  *
diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c
index 70dee056f92b..80bee7695a20 100644
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * x86 specific code for irq_work
  *
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1add9e08e83e..8da3e909e967 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/linkage.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
@@ -60,9 +61,6 @@ void __init init_ISA_irqs(void)
 	struct irq_chip *chip = legacy_pic->chip;
 	int i;
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
-	init_bsp_APIC();
-#endif
 	legacy_pic->init(0);
 
 	for (i = 0; i < nr_legacy_irqs(); i++)
@@ -93,6 +91,7 @@ void __init native_init_IRQ(void)
 	x86_init.irqs.pre_vector_init();
 
 	idt_setup_apic_and_irq_gates();
+	lapic_assign_system_vectors();
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index ab4f491da2a9..e56c95be2808 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * jump label x86 support
  *
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index db2182d63ed0..ae38dccf0c8f 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -1,8 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __X86_KERNEL_KPROBES_COMMON_H
 #define __X86_KERNEL_KPROBES_COMMON_H
 
 /* Kprobes and Optprobes common header */
 
+#include <asm/asm.h>
+
+#ifdef CONFIG_FRAME_POINTER
+# define SAVE_RBP_STRING "	push %" _ASM_BP "\n" \
+			 "	mov  %" _ASM_SP ", %" _ASM_BP "\n"
+#else
+# define SAVE_RBP_STRING "	push %" _ASM_BP "\n"
+#endif
+
 #ifdef CONFIG_X86_64
 #define SAVE_REGS_STRING			\
 	/* Skip cs, ip, orig_ax. */		\
@@ -17,7 +27,7 @@
 	"	pushq %r10\n"			\
 	"	pushq %r11\n"			\
 	"	pushq %rbx\n"			\
-	"	pushq %rbp\n"			\
+	SAVE_RBP_STRING				\
 	"	pushq %r12\n"			\
 	"	pushq %r13\n"			\
 	"	pushq %r14\n"			\
@@ -48,7 +58,7 @@
 	"	pushl %es\n"			\
 	"	pushl %ds\n"			\
 	"	pushl %eax\n"			\
-	"	pushl %ebp\n"			\
+	SAVE_RBP_STRING				\
 	"	pushl %edi\n"			\
 	"	pushl %esi\n"			\
 	"	pushl %edx\n"			\
@@ -75,11 +85,11 @@ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf,
  * Copy an instruction and adjust the displacement if the instruction
  * uses the %rip-relative addressing mode.
  */
-extern int __copy_instruction(u8 *dest, u8 *src, struct insn *insn);
+extern int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn);
 
 /* Generate a relative-jump/call instruction */
-extern void synthesize_reljump(void *from, void *to);
-extern void synthesize_relcall(void *from, void *to);
+extern void synthesize_reljump(void *dest, void *from, void *to);
+extern void synthesize_relcall(void *dest, void *from, void *to);
 
 #ifdef	CONFIG_OPTPROBES
 extern int setup_detour_execution(struct kprobe *p, struct pt_regs *regs, int reenter);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index f0153714ddac..bd36f3c33cd0 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -119,29 +119,29 @@ struct kretprobe_blackpoint kretprobe_blacklist[] = {
 const int kretprobe_blacklist_size = ARRAY_SIZE(kretprobe_blacklist);
 
 static nokprobe_inline void
-__synthesize_relative_insn(void *from, void *to, u8 op)
+__synthesize_relative_insn(void *dest, void *from, void *to, u8 op)
 {
 	struct __arch_relative_insn {
 		u8 op;
 		s32 raddr;
 	} __packed *insn;
 
-	insn = (struct __arch_relative_insn *)from;
+	insn = (struct __arch_relative_insn *)dest;
 	insn->raddr = (s32)((long)(to) - ((long)(from) + 5));
 	insn->op = op;
 }
 
 /* Insert a jump instruction at address 'from', which jumps to address 'to'.*/
-void synthesize_reljump(void *from, void *to)
+void synthesize_reljump(void *dest, void *from, void *to)
 {
-	__synthesize_relative_insn(from, to, RELATIVEJUMP_OPCODE);
+	__synthesize_relative_insn(dest, from, to, RELATIVEJUMP_OPCODE);
 }
 NOKPROBE_SYMBOL(synthesize_reljump);
 
 /* Insert a call instruction at address 'from', which calls address 'to'.*/
-void synthesize_relcall(void *from, void *to)
+void synthesize_relcall(void *dest, void *from, void *to)
 {
-	__synthesize_relative_insn(from, to, RELATIVECALL_OPCODE);
+	__synthesize_relative_insn(dest, from, to, RELATIVECALL_OPCODE);
 }
 NOKPROBE_SYMBOL(synthesize_relcall);
 
@@ -346,10 +346,11 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
 /*
  * Copy an instruction with recovering modified instruction by kprobes
  * and adjust the displacement if the instruction uses the %rip-relative
- * addressing mode.
+ * addressing mode. Note that since @real will be the final place of copied
+ * instruction, displacement must be adjust by @real, not @dest.
  * This returns the length of copied instruction, or 0 if it has an error.
  */
-int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
+int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
 {
 	kprobe_opcode_t buf[MAX_INSN_SIZE];
 	unsigned long recovered_insn =
@@ -387,11 +388,11 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
 		 * have given.
 		 */
 		newdisp = (u8 *) src + (s64) insn->displacement.value
-			  - (u8 *) dest;
+			  - (u8 *) real;
 		if ((s64) (s32) newdisp != newdisp) {
 			pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp);
 			pr_err("\tSrc: %p, Dest: %p, old disp: %x\n",
-				src, dest, insn->displacement.value);
+				src, real, insn->displacement.value);
 			return 0;
 		}
 		disp = (u8 *) dest + insn_offset_displacement(insn);
@@ -402,20 +403,38 @@ int __copy_instruction(u8 *dest, u8 *src, struct insn *insn)
 }
 
 /* Prepare reljump right after instruction to boost */
-static void prepare_boost(struct kprobe *p, struct insn *insn)
+static int prepare_boost(kprobe_opcode_t *buf, struct kprobe *p,
+			  struct insn *insn)
 {
+	int len = insn->length;
+
 	if (can_boost(insn, p->addr) &&
-	    MAX_INSN_SIZE - insn->length >= RELATIVEJUMP_SIZE) {
+	    MAX_INSN_SIZE - len >= RELATIVEJUMP_SIZE) {
 		/*
 		 * These instructions can be executed directly if it
 		 * jumps back to correct address.
 		 */
-		synthesize_reljump(p->ainsn.insn + insn->length,
+		synthesize_reljump(buf + len, p->ainsn.insn + len,
 				   p->addr + insn->length);
+		len += RELATIVEJUMP_SIZE;
 		p->ainsn.boostable = true;
 	} else {
 		p->ainsn.boostable = false;
 	}
+
+	return len;
+}
+
+/* Make page to RO mode when allocate it */
+void *alloc_insn_page(void)
+{
+	void *page;
+
+	page = module_alloc(PAGE_SIZE);
+	if (page)
+		set_memory_ro((unsigned long)page & PAGE_MASK, 1);
+
+	return page;
 }
 
 /* Recover page to RW mode before releasing it */
@@ -429,12 +448,11 @@ void free_insn_page(void *page)
 static int arch_copy_kprobe(struct kprobe *p)
 {
 	struct insn insn;
+	kprobe_opcode_t buf[MAX_INSN_SIZE];
 	int len;
 
-	set_memory_rw((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
-
 	/* Copy an instruction with recovering if other optprobe modifies it.*/
-	len = __copy_instruction(p->ainsn.insn, p->addr, &insn);
+	len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn);
 	if (!len)
 		return -EINVAL;
 
@@ -442,15 +460,16 @@ static int arch_copy_kprobe(struct kprobe *p)
 	 * __copy_instruction can modify the displacement of the instruction,
 	 * but it doesn't affect boostable check.
 	 */
-	prepare_boost(p, &insn);
-
-	set_memory_ro((unsigned long)p->ainsn.insn & PAGE_MASK, 1);
+	len = prepare_boost(buf, p, &insn);
 
 	/* Check whether the instruction modifies Interrupt Flag or not */
-	p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
+	p->ainsn.if_modifier = is_IF_modifier(buf);
 
 	/* Also, displacement change doesn't affect the first byte */
-	p->opcode = p->ainsn.insn[0];
+	p->opcode = buf[0];
+
+	/* OK, write back the instruction(s) into ROX insn buffer */
+	text_poke(p->ainsn.insn, buf, len);
 
 	return 0;
 }
@@ -1080,8 +1099,6 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
 	 * raw stack chunk with redzones:
 	 */
 	__memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr));
-	regs->flags &= ~X86_EFLAGS_IF;
-	trace_hardirqs_off();
 	regs->ip = (unsigned long)(jp->entry);
 
 	/*
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 041f7b6dfa0f..8dc0161cec8f 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -26,7 +26,7 @@
 #include "common.h"
 
 static nokprobe_inline
-int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+void __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 		      struct kprobe_ctlblk *kcb, unsigned long orig_ip)
 {
 	/*
@@ -41,33 +41,31 @@ int __skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 	__this_cpu_write(current_kprobe, NULL);
 	if (orig_ip)
 		regs->ip = orig_ip;
-	return 1;
 }
 
 int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
 		    struct kprobe_ctlblk *kcb)
 {
-	if (kprobe_ftrace(p))
-		return __skip_singlestep(p, regs, kcb, 0);
-	else
-		return 0;
+	if (kprobe_ftrace(p)) {
+		__skip_singlestep(p, regs, kcb, 0);
+		preempt_enable_no_resched();
+		return 1;
+	}
+	return 0;
 }
 NOKPROBE_SYMBOL(skip_singlestep);
 
-/* Ftrace callback handler for kprobes */
+/* Ftrace callback handler for kprobes -- called under preepmt disabed */
 void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 			   struct ftrace_ops *ops, struct pt_regs *regs)
 {
 	struct kprobe *p;
 	struct kprobe_ctlblk *kcb;
-	unsigned long flags;
-
-	/* Disable irq for emulating a breakpoint and avoiding preempt */
-	local_irq_save(flags);
 
+	/* Preempt is disabled by ftrace */
 	p = get_kprobe((kprobe_opcode_t *)ip);
 	if (unlikely(!p) || kprobe_disabled(p))
-		goto end;
+		return;
 
 	kcb = get_kprobe_ctlblk();
 	if (kprobe_running()) {
@@ -77,17 +75,19 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
 		regs->ip = ip + sizeof(kprobe_opcode_t);
 
+		/* To emulate trap based kprobes, preempt_disable here */
+		preempt_disable();
 		__this_cpu_write(current_kprobe, p);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-		if (!p->pre_handler || !p->pre_handler(p, regs))
+		if (!p->pre_handler || !p->pre_handler(p, regs)) {
 			__skip_singlestep(p, regs, kcb, orig_ip);
+			preempt_enable_no_resched();
+		}
 		/*
 		 * If pre_handler returns !0, it sets regs->ip and
-		 * resets current kprobe.
+		 * resets current kprobe, and keep preempt count +1.
 		 */
 	}
-end:
-	local_irq_restore(flags);
 }
 NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 4f98aad38237..e941136e24d8 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -142,11 +142,11 @@ void optprobe_template_func(void);
 STACK_FRAME_NON_STANDARD(optprobe_template_func);
 
 #define TMPL_MOVE_IDX \
-	((long)&optprobe_template_val - (long)&optprobe_template_entry)
+	((long)optprobe_template_val - (long)optprobe_template_entry)
 #define TMPL_CALL_IDX \
-	((long)&optprobe_template_call - (long)&optprobe_template_entry)
+	((long)optprobe_template_call - (long)optprobe_template_entry)
 #define TMPL_END_IDX \
-	((long)&optprobe_template_end - (long)&optprobe_template_entry)
+	((long)optprobe_template_end - (long)optprobe_template_entry)
 
 #define INT3_SIZE sizeof(kprobe_opcode_t)
 
@@ -154,17 +154,15 @@ STACK_FRAME_NON_STANDARD(optprobe_template_func);
 static void
 optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 {
-	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
-	unsigned long flags;
-
 	/* This is possible if op is under delayed unoptimizing */
 	if (kprobe_disabled(&op->kp))
 		return;
 
-	local_irq_save(flags);
+	preempt_disable();
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(&op->kp);
 	} else {
+		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 		/* Save skipped registers */
 #ifdef CONFIG_X86_64
 		regs->cs = __KERNEL_CS;
@@ -180,17 +178,17 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 		opt_pre_handler(&op->kp, regs);
 		__this_cpu_write(current_kprobe, NULL);
 	}
-	local_irq_restore(flags);
+	preempt_enable_no_resched();
 }
 NOKPROBE_SYMBOL(optimized_callback);
 
-static int copy_optimized_instructions(u8 *dest, u8 *src)
+static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
 {
 	struct insn insn;
 	int len = 0, ret;
 
 	while (len < RELATIVEJUMP_SIZE) {
-		ret = __copy_instruction(dest + len, src + len, &insn);
+		ret = __copy_instruction(dest + len, src + len, real, &insn);
 		if (!ret || !can_boost(&insn, src + len))
 			return -EINVAL;
 		len += ret;
@@ -343,57 +341,66 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe *op)
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
 				  struct kprobe *__unused)
 {
-	u8 *buf;
-	int ret;
+	u8 *buf = NULL, *slot;
+	int ret, len;
 	long rel;
 
 	if (!can_optimize((unsigned long)op->kp.addr))
 		return -EILSEQ;
 
-	op->optinsn.insn = get_optinsn_slot();
-	if (!op->optinsn.insn)
+	buf = kzalloc(MAX_OPTINSN_SIZE, GFP_KERNEL);
+	if (!buf)
 		return -ENOMEM;
 
+	op->optinsn.insn = slot = get_optinsn_slot();
+	if (!slot) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	/*
 	 * Verify if the address gap is in 2GB range, because this uses
 	 * a relative jump.
 	 */
-	rel = (long)op->optinsn.insn - (long)op->kp.addr + RELATIVEJUMP_SIZE;
+	rel = (long)slot - (long)op->kp.addr + RELATIVEJUMP_SIZE;
 	if (abs(rel) > 0x7fffffff) {
-		__arch_remove_optimized_kprobe(op, 0);
-		return -ERANGE;
+		ret = -ERANGE;
+		goto err;
 	}
 
-	buf = (u8 *)op->optinsn.insn;
-	set_memory_rw((unsigned long)buf & PAGE_MASK, 1);
+	/* Copy arch-dep-instance from template */
+	memcpy(buf, optprobe_template_entry, TMPL_END_IDX);
 
 	/* Copy instructions into the out-of-line buffer */
-	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr);
-	if (ret < 0) {
-		__arch_remove_optimized_kprobe(op, 0);
-		return ret;
-	}
+	ret = copy_optimized_instructions(buf + TMPL_END_IDX, op->kp.addr,
+					  slot + TMPL_END_IDX);
+	if (ret < 0)
+		goto err;
 	op->optinsn.size = ret;
-
-	/* Copy arch-dep-instance from template */
-	memcpy(buf, &optprobe_template_entry, TMPL_END_IDX);
+	len = TMPL_END_IDX + op->optinsn.size;
 
 	/* Set probe information */
 	synthesize_set_arg1(buf + TMPL_MOVE_IDX, (unsigned long)op);
 
 	/* Set probe function call */
-	synthesize_relcall(buf + TMPL_CALL_IDX, optimized_callback);
+	synthesize_relcall(buf + TMPL_CALL_IDX,
+			   slot + TMPL_CALL_IDX, optimized_callback);
 
 	/* Set returning jmp instruction at the tail of out-of-line buffer */
-	synthesize_reljump(buf + TMPL_END_IDX + op->optinsn.size,
+	synthesize_reljump(buf + len, slot + len,
 			   (u8 *)op->kp.addr + op->optinsn.size);
-
-	set_memory_ro((unsigned long)buf & PAGE_MASK, 1);
-
-	flush_icache_range((unsigned long) buf,
-			   (unsigned long) buf + TMPL_END_IDX +
-			   op->optinsn.size + RELATIVEJUMP_SIZE);
-	return 0;
+	len += RELATIVEJUMP_SIZE;
+
+	/* We have to use text_poke for instuction buffer because it is RO */
+	text_poke(slot, buf, len);
+	ret = 0;
+out:
+	kfree(buf);
+	return ret;
+
+err:
+	__arch_remove_optimized_kprobe(op, 0);
+	goto out;
 }
 
 /*
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 4b0592ca9e47..8c1cc08f514f 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -299,7 +299,7 @@ static int __init create_setup_data_nodes(struct kobject *parent)
 	return 0;
 
 out_clean_nodes:
-	for (j = i - 1; j > 0; j--)
+	for (j = i - 1; j >= 0; j--)
 		cleanup_setup_data_node(*(kobjp + j));
 	kfree(kobjp);
 out_setup_data_kobj:
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index aa60a08b65b1..b40ffbf156c1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -75,8 +75,8 @@ static int parse_no_kvmclock_vsyscall(char *arg)
 
 early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
 
-static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
-static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
+static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
 static int has_steal_clock = 0;
 
 /*
@@ -117,7 +117,11 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
 	return NULL;
 }
 
-void kvm_async_pf_task_wait(u32 token)
+/*
+ * @interrupt_kernel: Is this called from a routine which interrupts the kernel
+ * 		      (other than user space)?
+ */
+void kvm_async_pf_task_wait(u32 token, int interrupt_kernel)
 {
 	u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
 	struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
@@ -140,7 +144,10 @@ void kvm_async_pf_task_wait(u32 token)
 
 	n.token = token;
 	n.cpu = smp_processor_id();
-	n.halted = is_idle_task(current) || preempt_count() > 1;
+	n.halted = is_idle_task(current) ||
+		   (IS_ENABLED(CONFIG_PREEMPT_COUNT)
+		    ? preempt_count() > 1 || rcu_preempt_depth()
+		    : interrupt_kernel);
 	init_swait_queue_head(&n.wq);
 	hlist_add_head(&n.link, &b->list);
 	raw_spin_unlock(&b->lock);
@@ -268,7 +275,7 @@ do_async_page_fault(struct pt_regs *regs, unsigned long error_code)
 	case KVM_PV_REASON_PAGE_NOT_PRESENT:
 		/* page is swapped out by the host. */
 		prev_state = exception_enter();
-		kvm_async_pf_task_wait((u32)read_cr2());
+		kvm_async_pf_task_wait((u32)read_cr2(), !user_mode(regs));
 		exception_exit(prev_state);
 		break;
 	case KVM_PV_REASON_PAGE_READY:
@@ -305,7 +312,7 @@ static void kvm_register_steal_time(void)
 		cpu, (unsigned long long) slow_virt_to_phys(st));
 }
 
-static DEFINE_PER_CPU(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
+static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
 
 static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
 {
@@ -419,9 +426,42 @@ void kvm_disable_steal_time(void)
 	wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
 }
 
+static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
+{
+	early_set_memory_decrypted((unsigned long) ptr, size);
+}
+
+/*
+ * Iterate through all possible CPUs and map the memory region pointed
+ * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once.
+ *
+ * Note: we iterate through all possible CPUs to ensure that CPUs
+ * hotplugged will have their per-cpu variable already mapped as
+ * decrypted.
+ */
+static void __init sev_map_percpu_data(void)
+{
+	int cpu;
+
+	if (!sev_active())
+		return;
+
+	for_each_possible_cpu(cpu) {
+		__set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
+		__set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
+		__set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
+	}
+}
+
 #ifdef CONFIG_SMP
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
+	/*
+	 * Map the per-cpu variables as decrypted before kvm_guest_cpu_init()
+	 * shares the guest physical address with the hypervisor.
+	 */
+	sev_map_percpu_data();
+
 	kvm_guest_cpu_init();
 	native_smp_prepare_boot_cpu();
 	kvm_spinlock_init();
@@ -458,7 +498,7 @@ static void __init kvm_apf_trap_init(void)
 	update_intr_gate(X86_TRAP_PF, async_page_fault);
 }
 
-void __init kvm_guest_init(void)
+static void __init kvm_guest_init(void)
 {
 	int i;
 
@@ -489,6 +529,7 @@ void __init kvm_guest_init(void)
 				      kvm_cpu_online, kvm_cpu_down_prepare) < 0)
 		pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");
 #else
+	sev_map_percpu_data();
 	kvm_guest_cpu_init();
 #endif
 
@@ -537,12 +578,13 @@ static uint32_t __init kvm_detect(void)
 	return kvm_cpuid_base();
 }
 
-const struct hypervisor_x86 x86_hyper_kvm __refconst = {
+const __initconst struct hypervisor_x86 x86_hyper_kvm = {
 	.name			= "KVM",
 	.detect			= kvm_detect,
-	.x2apic_available	= kvm_para_available,
+	.type			= X86_HYPER_KVM,
+	.init.guest_late_init	= kvm_guest_init,
+	.init.x2apic_available	= kvm_para_available,
 };
-EXPORT_SYMBOL_GPL(x86_hyper_kvm);
 
 static __init int activate_jump_labels(void)
 {
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d88967659098..8b26c9e01cc4 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -27,6 +27,7 @@
 #include <linux/sched.h>
 #include <linux/sched/clock.h>
 
+#include <asm/mem_encrypt.h>
 #include <asm/x86_init.h>
 #include <asm/reboot.h>
 #include <asm/kvmclock.h>
@@ -45,13 +46,7 @@ early_param("no-kvmclock", parse_no_kvmclock);
 
 /* The hypervisor will put information about time periodically here */
 static struct pvclock_vsyscall_time_info *hv_clock;
-static struct pvclock_wall_clock wall_clock;
-
-struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void)
-{
-	return hv_clock;
-}
-EXPORT_SYMBOL_GPL(pvclock_pvti_cpu0_va);
+static struct pvclock_wall_clock *wall_clock;
 
 /*
  * The wallclock is the time of day when we booted. Since then, some time may
@@ -64,22 +59,22 @@ static void kvm_get_wallclock(struct timespec *now)
 	int low, high;
 	int cpu;
 
-	low = (int)__pa_symbol(&wall_clock);
-	high = ((u64)__pa_symbol(&wall_clock) >> 32);
+	low = (int)slow_virt_to_phys(wall_clock);
+	high = ((u64)slow_virt_to_phys(wall_clock) >> 32);
 
 	native_write_msr(msr_kvm_wall_clock, low, high);
 
 	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
-	pvclock_read_wallclock(&wall_clock, vcpu_time, now);
+	pvclock_read_wallclock(wall_clock, vcpu_time, now);
 
 	put_cpu();
 }
 
 static int kvm_set_wallclock(const struct timespec *now)
 {
-	return -1;
+	return -ENODEV;
 }
 
 static u64 kvm_clock_read(void)
@@ -249,11 +244,39 @@ static void kvm_shutdown(void)
 	native_machine_shutdown();
 }
 
+static phys_addr_t __init kvm_memblock_alloc(phys_addr_t size,
+					     phys_addr_t align)
+{
+	phys_addr_t mem;
+
+	mem = memblock_alloc(size, align);
+	if (!mem)
+		return 0;
+
+	if (sev_active()) {
+		if (early_set_memory_decrypted((unsigned long)__va(mem), size))
+			goto e_free;
+	}
+
+	return mem;
+e_free:
+	memblock_free(mem, size);
+	return 0;
+}
+
+static void __init kvm_memblock_free(phys_addr_t addr, phys_addr_t size)
+{
+	if (sev_active())
+		early_set_memory_encrypted((unsigned long)__va(addr), size);
+
+	memblock_free(addr, size);
+}
+
 void __init kvmclock_init(void)
 {
 	struct pvclock_vcpu_time_info *vcpu_time;
-	unsigned long mem;
-	int size, cpu;
+	unsigned long mem, mem_wall_clock;
+	int size, cpu, wall_clock_size;
 	u8 flags;
 
 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
@@ -267,21 +290,35 @@ void __init kvmclock_init(void)
 	} else if (!(kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)))
 		return;
 
-	printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
-		msr_kvm_system_time, msr_kvm_wall_clock);
+	wall_clock_size = PAGE_ALIGN(sizeof(struct pvclock_wall_clock));
+	mem_wall_clock = kvm_memblock_alloc(wall_clock_size, PAGE_SIZE);
+	if (!mem_wall_clock)
+		return;
 
-	mem = memblock_alloc(size, PAGE_SIZE);
-	if (!mem)
+	wall_clock = __va(mem_wall_clock);
+	memset(wall_clock, 0, wall_clock_size);
+
+	mem = kvm_memblock_alloc(size, PAGE_SIZE);
+	if (!mem) {
+		kvm_memblock_free(mem_wall_clock, wall_clock_size);
+		wall_clock = NULL;
 		return;
+	}
+
 	hv_clock = __va(mem);
 	memset(hv_clock, 0, size);
 
 	if (kvm_register_clock("primary cpu clock")) {
 		hv_clock = NULL;
-		memblock_free(mem, size);
+		kvm_memblock_free(mem, size);
+		kvm_memblock_free(mem_wall_clock, wall_clock_size);
+		wall_clock = NULL;
 		return;
 	}
 
+	printk(KERN_INFO "kvm-clock: Using msrs %x and %x",
+		msr_kvm_system_time, msr_kvm_wall_clock);
+
 	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
 		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
 
@@ -334,6 +371,7 @@ int __init kvm_setup_vsyscall_timeinfo(void)
 		return 1;
 	}
 
+	pvclock_set_pvti_cpu0_va(hv_clock);
 	put_cpu();
 
 	kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index f0e64db18ac8..1c1eae961340 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
@@ -12,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
@@ -101,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
 static void install_ldt(struct mm_struct *current_mm,
 			struct ldt_struct *ldt)
 {
-	/* Synchronizes with lockless_dereference in load_mm_ldt. */
+	/* Synchronizes with READ_ONCE in load_mm_ldt. */
 	smp_store_release(&current_mm->context.ldt, ldt);
 
 	/* Activate the LDT for all CPUs using current_mm. */
@@ -294,8 +296,8 @@ out:
 	return error;
 }
 
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
-			      unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+		unsigned long , bytecount)
 {
 	int ret = -ENOSYS;
 
@@ -313,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
 		ret = write_ldt(ptr, bytecount, 0);
 		break;
 	}
-	return ret;
+	/*
+	 * The SYSCALL_DEFINE() macros give us an 'unsigned long'
+	 * return type, but tht ABI for sys_modify_ldt() expects
+	 * 'int'.  This cast gives us an int-sized value in %rax
+	 * for the return code.  The 'unsigned' is necessary so
+	 * the compiler does not try to sign-extend the negative
+	 * return codes into the high half of the register when
+	 * taking the value from int->long.
+	 */
+	return (unsigned int)ret;
 }
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index f4c886d9165c..b5cb49e57df8 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * AMD Family 10h mmconfig enablement
  */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 62e7d70aadd5..da0c160e5589 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -172,19 +172,27 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 		case R_X86_64_NONE:
 			break;
 		case R_X86_64_64:
+			if (*(u64 *)loc != 0)
+				goto invalid_relocation;
 			*(u64 *)loc = val;
 			break;
 		case R_X86_64_32:
+			if (*(u32 *)loc != 0)
+				goto invalid_relocation;
 			*(u32 *)loc = val;
 			if (val != *(u32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_32S:
+			if (*(s32 *)loc != 0)
+				goto invalid_relocation;
 			*(s32 *)loc = val;
 			if ((s64)val != *(s32 *)loc)
 				goto overflow;
 			break;
 		case R_X86_64_PC32:
+			if (*(u32 *)loc != 0)
+				goto invalid_relocation;
 			val -= (u64)loc;
 			*(u32 *)loc = val;
 #if 0
@@ -200,6 +208,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 	}
 	return 0;
 
+invalid_relocation:
+	pr_err("x86/modules: Skipping invalid relocation target, existing value is nonzero for type %d, loc %p, val %Lx\n",
+	       (int)ELF64_R_TYPE(rel[i].r_info), loc, val);
+	return -ENOEXEC;
+
 overflow:
 	pr_err("overflow in relocation type %d val %Lx\n",
 	       (int)ELF64_R_TYPE(rel[i].r_info), val);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 5cbb3177ed17..3a4b12809ab5 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *	Intel Multiprocessor Specification 1.1 and 1.4
  *	compliant MP-table parsing routines.
@@ -430,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
 }
 
 static unsigned long mpf_base;
+static bool mpf_found;
 
 static unsigned long __init get_mpc_size(unsigned long physptr)
 {
@@ -503,7 +505,7 @@ void __init default_get_smp_config(unsigned int early)
 	if (!smp_found_config)
 		return;
 
-	if (!mpf_base)
+	if (!mpf_found)
 		return;
 
 	if (acpi_lapic && early)
@@ -592,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length)
 			smp_found_config = 1;
 #endif
 			mpf_base = base;
+			mpf_found = true;
 
 			pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n",
 				base, base + sizeof(*mpf) - 1, mpf);
@@ -857,7 +860,7 @@ static int __init update_mp_table(void)
 	if (!enable_update_mptable)
 		return 0;
 
-	if (!mpf_base)
+	if (!mpf_found)
 		return 0;
 
 	mpf = early_memremap(mpf_base, sizeof(*mpf));
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 35aafc95e4b8..18bc9b51ac9b 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -105,7 +105,7 @@ static void nmi_max_handler(struct irq_work *w)
 {
 	struct nmiaction *a = container_of(w, struct nmiaction, irq_work);
 	int remainder_ns, decimal_msecs;
-	u64 whole_msecs = ACCESS_ONCE(a->max_duration);
+	u64 whole_msecs = READ_ONCE(a->max_duration);
 
 	remainder_ns = do_div(whole_msecs, (1000 * 1000));
 	decimal_msecs = remainder_ns / 1000;
diff --git a/arch/x86/kernel/nmi_selftest.c b/arch/x86/kernel/nmi_selftest.c
index d27f8d84c4ff..a1a96df3dff1 100644
--- a/arch/x86/kernel/nmi_selftest.c
+++ b/arch/x86/kernel/nmi_selftest.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * arch/x86/kernel/nmi-selftest.c
  *
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 8f2d1c9d43a8..71f2d1125ec0 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Split spinlock implementation out into its own file, so it can be
  * compiled in a FTRACE-compatible way.
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 19a3e8f961c7..041096bdef86 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -115,8 +115,18 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
 	return 5;
 }
 
-/* Neat trick to map patch type back to the call within the
- * corresponding structure. */
+DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
+
+void __init native_pv_lock_init(void)
+{
+	if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+		static_branch_disable(&virt_spin_lock_key);
+}
+
+/*
+ * Neat trick to map patch type back to the call within the
+ * corresponding structure.
+ */
 static void *get_call_destination(u8 type)
 {
 	struct paravirt_patch_template tmpl = {
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 553acbbb4d32..758e69d72ebf 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <asm/paravirt.h>
 
 DEF_NATIVE(pv_irq_ops, irq_disable, "cli");
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 11aaf1eaa0e4..ac0be8283325 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <asm/paravirt.h>
 #include <asm/asm-offsets.h>
 #include <linux/stringify.h>
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 5286a4a92cf7..35c461f21815 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -898,10 +898,9 @@ static void calioc2_dump_error_regs(struct iommu_table *tbl)
 	       PHB_ROOT_COMPLEX_STATUS);
 }
 
-static void calgary_watchdog(unsigned long data)
+static void calgary_watchdog(struct timer_list *t)
 {
-	struct pci_dev *dev = (struct pci_dev *)data;
-	struct iommu_table *tbl = pci_iommu(dev->bus);
+	struct iommu_table *tbl = from_timer(tbl, t, watchdog_timer);
 	void __iomem *bbar = tbl->bbar;
 	u32 val32;
 	void __iomem *target;
@@ -1016,8 +1015,7 @@ static void __init calgary_enable_translation(struct pci_dev *dev)
 	writel(cpu_to_be32(val32), target);
 	readl(target); /* flush */
 
-	setup_timer(&tbl->watchdog_timer, &calgary_watchdog,
-		    (unsigned long)dev);
+	timer_setup(&tbl->watchdog_timer, calgary_watchdog, 0);
 	mod_timer(&tbl->watchdog_timer, jiffies);
 }
 
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 0accc2404b92..599d7462eccc 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/dma-mapping.h>
 #include <linux/dma-debug.h>
 #include <linux/dmar.h>
diff --git a/arch/x86/kernel/pci-iommu_table.c b/arch/x86/kernel/pci-iommu_table.c
index f712dfdf1357..4dfd90a75e63 100644
--- a/arch/x86/kernel/pci-iommu_table.c
+++ b/arch/x86/kernel/pci-iommu_table.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/dma-mapping.h>
 #include <asm/iommu_table.h>
 #include <linux/string.h>
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c
index 4fc3cb60ea11..b0caae27e1b7 100644
--- a/arch/x86/kernel/pci-nommu.c
+++ b/arch/x86/kernel/pci-nommu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Fallback functions when the main IOMMU code is not compiled in. This
    code is roughly equivalent to i386. */
 #include <linux/dma-mapping.h>
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 677077510e30..53bd05ea90d8 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /* Glue code to lib/swiotlb.c */
 
 #include <linux/pci.h>
diff --git a/arch/x86/kernel/pcspeaker.c b/arch/x86/kernel/pcspeaker.c
index a311ffcaad16..da5190a1ea16 100644
--- a/arch/x86/kernel/pcspeaker.c
+++ b/arch/x86/kernel/pcspeaker.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/platform_device.h>
 #include <linux/err.h>
 #include <linux/init.h>
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 587d887f7f17..e47b2dbbdef3 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c
index 502a77d0adb0..39a59299bfa0 100644
--- a/arch/x86/kernel/platform-quirks.c
+++ b/arch/x86/kernel/platform-quirks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/init.h>
 
diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c
index 0c5315d322c8..6b07faaa1579 100644
--- a/arch/x86/kernel/pmem.c
+++ b/arch/x86/kernel/pmem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (c) 2015, Christoph Hellwig.
  * Copyright (c) 2015, Intel Corporation.
@@ -6,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/ioport.h>
 
-static int found(u64 start, u64 end, void *data)
+static int found(struct resource *res, void *data)
 {
 	return 1;
 }
diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c
index 963e3fb56437..ee0286390a4c 100644
--- a/arch/x86/kernel/probe_roms.c
+++ b/arch/x86/kernel/probe_roms.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/uaccess.h>
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index bd6b85fac666..97fb3e5737f5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/errno.h>
@@ -48,7 +49,13 @@
  */
 __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 	.x86_tss = {
-		.sp0 = TOP_OF_INIT_STACK,
+		/*
+		 * .sp0 is only used when entering ring 0 from a lower
+		 * privilege level.  Since the init task never runs anything
+		 * but ring 0 code, there is no need for a valid value here.
+		 * Poison it.
+		 */
+		.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
 #ifdef CONFIG_X86_32
 		.ss0 = __KERNEL_DS,
 		.ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 11966251cd42..45bf0c5f93e1 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	/*
 	 * Reload esp0 and cpu_current_top_of_stack.  This changes
-	 * current_thread_info().
+	 * current_thread_info().  Refresh the SYSENTER configuration in
+	 * case prev or next is vm86.
 	 */
-	load_sp0(tss, next);
+	update_sp0(next_p);
+	refresh_sysenter_cs(next);
 	this_cpu_write(cpu_current_top_of_stack,
 		       (unsigned long)task_stack_page(next_p) +
 		       THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 302e7b2572d1..eeeb34f85c25 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	struct inactive_task_frame *frame;
 	struct task_struct *me = current;
 
-	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
 	childregs = task_pt_regs(p);
 	fork_frame = container_of(childregs, struct fork_frame, regs);
 	frame = &fork_frame->frame;
@@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	this_cpu_write(current_task, next_p);
 
-	/* Reload esp0 and ss1.  This changes current_thread_info(). */
-	load_sp0(tss, next);
+	/* Reload sp0. */
+	update_sp0(next_p);
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 5c3f6d6a5078..761f6af6efa5 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -25,8 +25,10 @@
 
 #include <asm/fixmap.h>
 #include <asm/pvclock.h>
+#include <asm/vgtod.h>
 
 static u8 valid_flags __read_mostly = 0;
+static struct pvclock_vsyscall_time_info *pvti_cpu0_va __read_mostly;
 
 void pvclock_set_flags(u8 flags)
 {
@@ -144,3 +146,15 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
 
 	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
 }
+
+void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti)
+{
+	WARN_ON(vclock_was_used(VCLOCK_PVCLOCK));
+	pvti_cpu0_va = pvti;
+}
+
+struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void)
+{
+	return pvti_cpu0_va;
+}
+EXPORT_SYMBOL_GPL(pvclock_get_pvti_cpu0_va);
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index eaa591cfd98b..697a4ce04308 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * This file contains work-arounds for x86 and x86_64 platform bugs.
  */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 54180fa6f66f..2126b9d27c34 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/export.h>
@@ -105,6 +106,10 @@ void __noreturn machine_real_restart(unsigned int type)
 	load_cr3(initial_page_table);
 #else
 	write_cr3(real_mode_header->trampoline_pgd);
+
+	/* Exiting long mode will fail if CR4.PCIDE is set. */
+	if (static_cpu_has(X86_FEATURE_PCID))
+		cr4_clear_bits(X86_CR4_PCIDE);
 #endif
 
 	/* Jump to the identity-mapped low memory code */
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index c8e41e90f59c..b7c0f142d026 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * This is a good place to put board specific reboot fixups.
  *
diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index 5ab3895516ac..9b9fb7882c20 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/ioport.h>
 #include <asm/e820/api.h>
 
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 5b21cb7d84d6..69ac9cb9cac6 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * RTC related functions
  */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0957dd73d127..8af2e8d0c0a1 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -136,18 +136,6 @@ RESERVE_BRK(dmi_alloc, 65536);
 static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
 unsigned long _brk_end = (unsigned long)__brk_base;
 
-#ifdef CONFIG_X86_64
-int default_cpu_present_to_apicid(int mps_cpu)
-{
-	return __default_cpu_present_to_apicid(mps_cpu);
-}
-
-int default_check_phys_apicid_present(int phys_apicid)
-{
-	return __default_check_phys_apicid_present(phys_apicid);
-}
-#endif
-
 struct boot_params boot_params;
 
 /*
@@ -380,9 +368,11 @@ static void __init reserve_initrd(void)
 	 * If SME is active, this memory will be marked encrypted by the
 	 * kernel when it is accessed (including relocation). However, the
 	 * ramdisk image was loaded decrypted by the bootloader, so make
-	 * sure that it is encrypted before accessing it.
+	 * sure that it is encrypted before accessing it. For SEV the
+	 * ramdisk will already be encrypted, so only do this for SME.
 	 */
-	sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
+	if (sme_active())
+		sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image);
 
 	initrd_start = 0;
 
@@ -822,26 +812,6 @@ dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
 	return 0;
 }
 
-static void __init simple_udelay_calibration(void)
-{
-	unsigned int tsc_khz, cpu_khz;
-	unsigned long lpj;
-
-	if (!boot_cpu_has(X86_FEATURE_TSC))
-		return;
-
-	cpu_khz = x86_platform.calibrate_cpu();
-	tsc_khz = x86_platform.calibrate_tsc();
-
-	tsc_khz = tsc_khz ? : cpu_khz;
-	if (!tsc_khz)
-		return;
-
-	lpj = tsc_khz * 1000;
-	do_div(lpj, HZ);
-	loops_per_jiffy = lpj;
-}
-
 /*
  * Determine if we were loaded by an EFI loader.  If so, then we have also been
  * passed the efi memmap, systab, etc., so we should use these data structures
@@ -1045,12 +1015,10 @@ void __init setup_arch(char **cmdline_p)
 
 	/*
 	 * VMware detection requires dmi to be available, so this
-	 * needs to be done after dmi_scan_machine, for the BP.
+	 * needs to be done after dmi_scan_machine(), for the boot CPU.
 	 */
 	init_hypervisor_platform();
 
-	simple_udelay_calibration();
-
 	x86_init.resources.probe_roms();
 
 	/* after parse_early_param, so could debug it */
@@ -1135,9 +1103,6 @@ void __init setup_arch(char **cmdline_p)
 	memblock_set_current_limit(ISA_END_ADDRESS);
 	e820__memblock_setup();
 
-	if (!early_xdbc_setup_hardware())
-		early_xdbc_register_console();
-
 	reserve_bios_regions();
 
 	if (efi_enabled(EFI_MEMMAP)) {
@@ -1243,6 +1208,10 @@ void __init setup_arch(char **cmdline_p)
 	kvmclock_init();
 #endif
 
+	tsc_early_delay_calibrate();
+	if (!early_xdbc_setup_hardware())
+		early_xdbc_register_console();
+
 	x86_init.paging.pagetable_init();
 
 	kasan_init();
@@ -1294,7 +1263,7 @@ void __init setup_arch(char **cmdline_p)
 
 	io_apic_init_mappings();
 
-	kvm_guest_init();
+	x86_init.hyper.guest_late_init();
 
 	e820__reserve_resources();
 	e820__register_nosave_regions(max_low_pfn);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 28dafed6c682..497aa766fab3 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/kernel.h>
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e04442345fc0..b9e00e8f1c9b 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
@@ -263,7 +264,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		sp = (unsigned long) ka->sa.sa_restorer;
 	}
 
-	if (fpu->fpstate_active) {
+	if (fpu->initialized) {
 		sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
 					  &buf_fx, &math_size);
 		*fpstate = (void __user *)sp;
@@ -279,7 +280,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
 		return (void __user *)-1L;
 
 	/* save i387 and extended state */
-	if (fpu->fpstate_active &&
+	if (fpu->initialized &&
 	    copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
 		return (void __user *)-1L;
 
@@ -755,7 +756,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 		/*
 		 * Ensure the signal handler starts with the new fpu state.
 		 */
-		if (fpu->fpstate_active)
+		if (fpu->initialized)
 			fpu__clear(fpu);
 	}
 	signal_setup_done(failed, ksig, stepping);
diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c
index ab9feb5887b1..8c6da1a643da 100644
--- a/arch/x86/kernel/signal_compat.c
+++ b/arch/x86/kernel/signal_compat.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/compat.h>
 #include <linux/uaccess.h>
 #include <linux/ptrace.h>
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0854ff169274..3d01df7d7cf6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -77,6 +77,7 @@
 #include <asm/i8259.h>
 #include <asm/realmode.h>
 #include <asm/misc.h>
+#include <asm/qspinlock.h>
 
 /* Number of siblings per CPU package */
 int smp_num_siblings = 1;
@@ -100,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
 EXPORT_PER_CPU_SYMBOL(cpu_info);
 
 /* Logical package management. We might want to allocate that dynamically */
-static int *physical_to_logical_pkg __read_mostly;
-static unsigned long *physical_package_map __read_mostly;;
-static unsigned int max_physical_pkg_id __read_mostly;
 unsigned int __max_logical_packages __read_mostly;
 EXPORT_SYMBOL(__max_logical_packages);
 static unsigned int logical_packages __read_mostly;
@@ -194,6 +192,12 @@ static void smp_callin(void)
 	smp_store_cpu_info(cpuid);
 
 	/*
+	 * The topology information must be up to date before
+	 * calibrate_delay() and notify_cpu_starting().
+	 */
+	set_cpu_sibling_map(raw_smp_processor_id());
+
+	/*
 	 * Get our bogomips.
 	 * Update loops_per_jiffy in cpu_data. Previous call to
 	 * smp_store_cpu_info() stored a value that is close but not as
@@ -203,11 +207,6 @@ static void smp_callin(void)
 	cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
 	pr_debug("Stack at about %p\n", &cpuid);
 
-	/*
-	 * This must be done before setting cpu_online_mask
-	 * or calling notify_cpu_starting.
-	 */
-	set_cpu_sibling_map(raw_smp_processor_id());
 	wmb();
 
 	notify_cpu_starting(cpuid);
@@ -232,12 +231,6 @@ static void notrace start_secondary(void *unused)
 	 */
 	if (boot_cpu_has(X86_FEATURE_PCID))
 		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
-	cpu_init();
-	x86_cpuinit.early_percpu_clock_init();
-	preempt_disable();
-	smp_callin();
-
-	enable_start_cpu0 = 0;
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
@@ -245,22 +238,29 @@ static void notrace start_secondary(void *unused)
 	__flush_tlb_all();
 #endif
 
+	cpu_init();
+	x86_cpuinit.early_percpu_clock_init();
+	preempt_disable();
+	smp_callin();
+
+	enable_start_cpu0 = 0;
+
 	/* otherwise gcc will move up smp_processor_id before the cpu_init */
 	barrier();
 	/*
-	 * Check TSC synchronization with the BP:
+	 * Check TSC synchronization with the boot CPU:
 	 */
 	check_tsc_sync_target();
 
 	/*
-	 * Lock vector_lock and initialize the vectors on this cpu
-	 * before setting the cpu online. We must set it online with
-	 * vector_lock held to prevent a concurrent setup/teardown
-	 * from seeing a half valid vector space.
+	 * Lock vector_lock, set CPU online and bring the vector
+	 * allocator online. Online must be set with vector_lock held
+	 * to prevent a concurrent irq setup/teardown from seeing a
+	 * half valid vector space.
 	 */
 	lock_vector_lock();
-	setup_vector_irq(smp_processor_id());
 	set_cpu_online(smp_processor_id(), true);
+	lapic_online();
 	unlock_vector_lock();
 	cpu_set_state_online(smp_processor_id());
 	x86_platform.nmi_init();
@@ -278,108 +278,48 @@ static void notrace start_secondary(void *unused)
 }
 
 /**
+ * topology_phys_to_logical_pkg - Map a physical package id to a logical
+ *
+ * Returns logical package id or -1 if not found
+ */
+int topology_phys_to_logical_pkg(unsigned int phys_pkg)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+		if (c->initialized && c->phys_proc_id == phys_pkg)
+			return c->logical_proc_id;
+	}
+	return -1;
+}
+EXPORT_SYMBOL(topology_phys_to_logical_pkg);
+
+/**
  * topology_update_package_map - Update the physical to logical package map
  * @pkg:	The physical package id as retrieved via CPUID
  * @cpu:	The cpu for which this is updated
  */
 int topology_update_package_map(unsigned int pkg, unsigned int cpu)
 {
-	unsigned int new;
-
-	/* Called from early boot ? */
-	if (!physical_package_map)
-		return 0;
-
-	if (pkg >= max_physical_pkg_id)
-		return -EINVAL;
+	int new;
 
-	/* Set the logical package id */
-	if (test_and_set_bit(pkg, physical_package_map))
+	/* Already available somewhere? */
+	new = topology_phys_to_logical_pkg(pkg);
+	if (new >= 0)
 		goto found;
 
-	if (logical_packages >= __max_logical_packages) {
-		pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n",
-			logical_packages, cpu, __max_logical_packages);
-		return -ENOSPC;
-	}
-
 	new = logical_packages++;
 	if (new != pkg) {
 		pr_info("CPU %u Converting physical %u to logical package %u\n",
 			cpu, pkg, new);
 	}
-	physical_to_logical_pkg[pkg] = new;
-
 found:
-	cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg];
+	cpu_data(cpu).logical_proc_id = new;
 	return 0;
 }
 
-/**
- * topology_phys_to_logical_pkg - Map a physical package id to a logical
- *
- * Returns logical package id or -1 if not found
- */
-int topology_phys_to_logical_pkg(unsigned int phys_pkg)
-{
-	if (phys_pkg >= max_physical_pkg_id)
-		return -1;
-	return physical_to_logical_pkg[phys_pkg];
-}
-EXPORT_SYMBOL(topology_phys_to_logical_pkg);
-
-static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu)
-{
-	unsigned int ncpus;
-	size_t size;
-
-	/*
-	 * Today neither Intel nor AMD support heterogenous systems. That
-	 * might change in the future....
-	 *
-	 * While ideally we'd want '* smp_num_siblings' in the below @ncpus
-	 * computation, this won't actually work since some Intel BIOSes
-	 * report inconsistent HT data when they disable HT.
-	 *
-	 * In particular, they reduce the APIC-IDs to only include the cores,
-	 * but leave the CPUID topology to say there are (2) siblings.
-	 * This means we don't know how many threads there will be until
-	 * after the APIC enumeration.
-	 *
-	 * By not including this we'll sometimes over-estimate the number of
-	 * logical packages by the amount of !present siblings, but this is
-	 * still better than MAX_LOCAL_APIC.
-	 *
-	 * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited
-	 * on the command line leading to a similar issue as the HT disable
-	 * problem because the hyperthreads are usually enumerated after the
-	 * primary cores.
-	 */
-	ncpus = boot_cpu_data.x86_max_cores;
-	if (!ncpus) {
-		pr_warn("x86_max_cores == zero !?!?");
-		ncpus = 1;
-	}
-
-	__max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus);
-	logical_packages = 0;
-
-	/*
-	 * Possibly larger than what we need as the number of apic ids per
-	 * package can be smaller than the actual used apic ids.
-	 */
-	max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus);
-	size = max_physical_pkg_id * sizeof(unsigned int);
-	physical_to_logical_pkg = kmalloc(size, GFP_KERNEL);
-	memset(physical_to_logical_pkg, 0xff, size);
-	size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long);
-	physical_package_map = kzalloc(size, GFP_KERNEL);
-
-	pr_info("Max logical packages: %u\n", __max_logical_packages);
-
-	topology_update_package_map(c->phys_proc_id, cpu);
-}
-
 void __init smp_store_boot_cpu_info(void)
 {
 	int id = 0; /* CPU 0 */
@@ -387,7 +327,8 @@ void __init smp_store_boot_cpu_info(void)
 
 	*c = boot_cpu_data;
 	c->cpu_index = id;
-	smp_init_package_map(c, id);
+	topology_update_package_map(c->phys_proc_id, id);
+	c->initialized = true;
 }
 
 /*
@@ -398,13 +339,16 @@ void smp_store_cpu_info(int id)
 {
 	struct cpuinfo_x86 *c = &cpu_data(id);
 
-	*c = boot_cpu_data;
+	/* Copy boot_cpu_data only on the first bringup */
+	if (!c->initialized)
+		*c = boot_cpu_data;
 	c->cpu_index = id;
 	/*
 	 * During boot time, CPU0 has this setup already. Save the info when
 	 * bringing up AP or offlined CPU0.
 	 */
 	identify_secondary_cpu(c);
+	c->initialized = true;
 }
 
 static bool
@@ -960,8 +904,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
 #ifdef CONFIG_X86_32
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	irq_ctx_init(cpu);
-	per_cpu(cpu_current_top_of_stack, cpu) =
-		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
+	per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
 #else
 	initial_gs = per_cpu_offset(cpu);
 #endif
@@ -1093,7 +1036,7 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
 	unsigned long flags;
 	int err, ret = 0;
 
-	WARN_ON(irqs_disabled());
+	lockdep_assert_irqs_enabled();
 
 	pr_debug("++++++++++++++++++++=_---CPU UP  %u\n", cpu);
 
@@ -1189,17 +1132,10 @@ static __init void disable_smp(void)
 	cpumask_set_cpu(0, topology_core_cpumask(0));
 }
 
-enum {
-	SMP_OK,
-	SMP_NO_CONFIG,
-	SMP_NO_APIC,
-	SMP_FORCE_UP,
-};
-
 /*
  * Various sanity checks.
  */
-static int __init smp_sanity_check(unsigned max_cpus)
+static void __init smp_sanity_check(void)
 {
 	preempt_disable();
 
@@ -1237,16 +1173,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
 	}
 
 	/*
-	 * If we couldn't find an SMP configuration at boot time,
-	 * get out of here now!
-	 */
-	if (!smp_found_config && !acpi_lapic) {
-		preempt_enable();
-		pr_notice("SMP motherboard not detected\n");
-		return SMP_NO_CONFIG;
-	}
-
-	/*
 	 * Should not be necessary because the MP table should list the boot
 	 * CPU too, but we do it for the sake of robustness anyway.
 	 */
@@ -1256,29 +1182,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
 		physid_set(hard_smp_processor_id(), phys_cpu_present_map);
 	}
 	preempt_enable();
-
-	/*
-	 * If we couldn't find a local APIC, then get out of here now!
-	 */
-	if (APIC_INTEGRATED(boot_cpu_apic_version) &&
-	    !boot_cpu_has(X86_FEATURE_APIC)) {
-		if (!disable_apic) {
-			pr_err("BIOS bug, local APIC #%d not detected!...\n",
-				boot_cpu_physical_apicid);
-			pr_err("... forcing use of dummy APIC emulation (tell your hw vendor)\n");
-		}
-		return SMP_NO_APIC;
-	}
-
-	/*
-	 * If SMP should be disabled, then really disable it!
-	 */
-	if (!max_cpus) {
-		pr_info("SMP mode deactivated\n");
-		return SMP_FORCE_UP;
-	}
-
-	return SMP_OK;
 }
 
 static void __init smp_cpu_index_default(void)
@@ -1293,9 +1196,18 @@ static void __init smp_cpu_index_default(void)
 	}
 }
 
+static void __init smp_get_logical_apicid(void)
+{
+	if (x2apic_mode)
+		cpu0_logical_apicid = apic_read(APIC_LDR);
+	else
+		cpu0_logical_apicid = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+}
+
 /*
- * Prepare for SMP bootup.  The MP table or ACPI has been read
- * earlier.  Just do some sanity checking here and enable APIC mode.
+ * Prepare for SMP bootup.
+ * @max_cpus: configured maximum number of CPUs, It is a legacy parameter
+ *            for common interface support.
  */
 void __init native_smp_prepare_cpus(unsigned int max_cpus)
 {
@@ -1327,35 +1239,33 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
 
 	set_cpu_sibling_map(0);
 
-	switch (smp_sanity_check(max_cpus)) {
-	case SMP_NO_CONFIG:
-		disable_smp();
-		if (APIC_init_uniprocessor())
-			pr_notice("Local APIC not detected. Using dummy APIC emulation.\n");
-		return;
-	case SMP_NO_APIC:
+	smp_sanity_check();
+
+	switch (apic_intr_mode) {
+	case APIC_PIC:
+	case APIC_VIRTUAL_WIRE_NO_CONFIG:
 		disable_smp();
 		return;
-	case SMP_FORCE_UP:
+	case APIC_SYMMETRIC_IO_NO_ROUTING:
 		disable_smp();
-		apic_bsp_setup(false);
+		/* Setup local timer */
+		x86_init.timers.setup_percpu_clockev();
 		return;
-	case SMP_OK:
+	case APIC_VIRTUAL_WIRE:
+	case APIC_SYMMETRIC_IO:
 		break;
 	}
 
-	if (read_apic_id() != boot_cpu_physical_apicid) {
-		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
-		     read_apic_id(), boot_cpu_physical_apicid);
-		/* Or can we switch back to PIC here? */
-	}
+	/* Setup local timer */
+	x86_init.timers.setup_percpu_clockev();
 
-	default_setup_apic_routing();
-	cpu0_logical_apicid = apic_bsp_setup(false);
+	smp_get_logical_apicid();
 
 	pr_info("CPU0: ");
 	print_cpu_info(&cpu_data(0));
 
+	native_pv_lock_init();
+
 	uv_system_init();
 
 	set_mtrr_aps_delayed_init();
@@ -1387,14 +1297,22 @@ void __init native_smp_prepare_boot_cpu(void)
 
 void __init native_smp_cpus_done(unsigned int max_cpus)
 {
+	int ncpus;
+
 	pr_debug("Boot done\n");
+	/*
+	 * Today neither Intel nor AMD support heterogenous systems so
+	 * extrapolate the boot cpu's data to all packages.
+	 */
+	ncpus = cpu_data(0).booted_cores * smp_num_siblings;
+	__max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus);
+	pr_info("Max logical packages: %u\n", __max_logical_packages);
 
 	if (x86_has_numa_in_package)
 		set_sched_topology(x86_numa_in_package_topology);
 
 	nmi_selftest();
 	impress_friends();
-	setup_ioapic_dest();
 	mtrr_aps_init();
 }
 
@@ -1553,13 +1471,14 @@ void cpu_disable_common(void)
 	remove_cpu_from_maps(cpu);
 	unlock_vector_lock();
 	fixup_irqs();
+	lapic_offline();
 }
 
 int native_cpu_disable(void)
 {
 	int ret;
 
-	ret = check_irq_vectors_for_cpu_disable();
+	ret = lapic_can_unplug_cpu();
 	if (ret)
 		return ret;
 
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 8dabd7bf1673..77835bc021c7 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -30,7 +30,7 @@ static int save_stack_address(struct stack_trace *trace, unsigned long addr,
 	return 0;
 }
 
-static void __save_stack_trace(struct stack_trace *trace,
+static void noinline __save_stack_trace(struct stack_trace *trace,
 			       struct task_struct *task, struct pt_regs *regs,
 			       bool nosched)
 {
@@ -56,6 +56,7 @@ static void __save_stack_trace(struct stack_trace *trace,
  */
 void save_stack_trace(struct stack_trace *trace)
 {
+	trace->skip++;
 	__save_stack_trace(trace, current, NULL, false);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace);
@@ -70,6 +71,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	if (!try_get_task_stack(tsk))
 		return;
 
+	if (tsk == current)
+		trace->skip++;
 	__save_stack_trace(trace, tsk, NULL, true);
 
 	put_task_stack(tsk);
@@ -88,8 +91,9 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 	}							\
 })
 
-static int __save_stack_trace_reliable(struct stack_trace *trace,
-				       struct task_struct *task)
+static int __always_inline
+__save_stack_trace_reliable(struct stack_trace *trace,
+			    struct task_struct *task)
 {
 	struct unwind_state state;
 	struct pt_regs *regs;
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 5ee663836c08..60d2c3798ba2 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * x86 single-step support code, common to 32-bit and 64-bit.
  */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 73e4d28112f8..676774b9bb8d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
@@ -187,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 	if (len > TASK_SIZE)
 		return -ENOMEM;
 
+	/* No address checking. See comment at mmap_address_hint_valid() */
 	if (flags & MAP_FIXED)
 		return addr;
 
@@ -196,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	/* requesting a specific address */
 	if (addr) {
-		addr = PAGE_ALIGN(addr);
+		addr &= PAGE_MASK;
+		if (!mmap_address_hint_valid(addr, len))
+			goto get_unmapped_area;
+
 		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr &&
-				(!vma || addr + len <= vm_start_gap(vma)))
+		if (!vma || addr + len <= vm_start_gap(vma))
 			return addr;
 	}
+get_unmapped_area:
 
 	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
 	info.length = len;
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index e0754cdbad37..749d189f8cd4 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Copyright (c) 1991,1992,1995  Linus Torvalds
  *  Copyright (c) 1994  Alan Modra
@@ -84,6 +85,11 @@ void __init hpet_time_init(void)
 static __init void x86_late_time_init(void)
 {
 	x86_init.timers.timer_init();
+	/*
+	 * After PIT/HPET timers init, select and setup
+	 * the final interrupt mode for delivering IRQs.
+	 */
+	x86_init.irqs.intr_mode_init();
 	tsc_init();
 }
 
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index a106b9719c58..9a9c9b076955 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/sched.h>
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
index 80bb24d9b880..b8e7abe00b06 100644
--- a/arch/x86/kernel/trace_clock.c
+++ b/arch/x86/kernel/trace_clock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * X86 trace clocks
  */
diff --git a/arch/x86/kernel/tracepoint.c b/arch/x86/kernel/tracepoint.c
index c6636d1f60b9..5bd30c442794 100644
--- a/arch/x86/kernel/tracepoint.c
+++ b/arch/x86/kernel/tracepoint.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Code for supporting irq vector tracepoints.
  *
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 34ea3651362e..989514c94a55 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -42,7 +42,6 @@
 #include <linux/edac.h>
 #endif
 
-#include <asm/kmemcheck.h>
 #include <asm/stacktrace.h>
 #include <asm/processor.h>
 #include <asm/debugreg.h>
@@ -60,6 +59,7 @@
 #include <asm/trace/mpx.h>
 #include <asm/mpx.h>
 #include <asm/vm86.h>
+#include <asm/umip.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -71,7 +71,7 @@
 #include <asm/proto.h>
 #endif
 
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
+DECLARE_BITMAP(system_vectors, NR_VECTORS);
 
 static inline void cond_local_irq_enable(struct pt_regs *regs)
 {
@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
 	 * will catch asm bugs and any attempt to use ist_preempt_enable
 	 * from double_fault.
 	 */
-	BUG_ON((unsigned long)(current_top_of_stack() -
-			       current_stack_pointer()) >= THREAD_SIZE);
+	BUG_ON(!on_thread_stack());
 
 	preempt_enable_no_resched();
 }
@@ -209,9 +208,6 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
 		if (fixup_exception(regs, trapnr))
 			return 0;
 
-		if (fixup_bug(regs, trapnr))
-			return 0;
-
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = trapnr;
 		die(str, regs, error_code);
@@ -292,6 +288,13 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str,
 
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 
+	/*
+	 * WARN*()s end up here; fix them up before we call the
+	 * notifier chain.
+	 */
+	if (!user_mode(regs) && fixup_bug(regs, trapnr))
+		return;
+
 	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) !=
 			NOTIFY_STOP) {
 		cond_local_irq_enable(regs);
@@ -514,6 +517,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
 	cond_local_irq_enable(regs);
 
+	if (static_cpu_has(X86_FEATURE_UMIP)) {
+		if (user_mode(regs) && fixup_umip_exception(regs))
+			return;
+	}
+
 	if (v8086_mode(regs)) {
 		local_irq_enable();
 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
@@ -740,10 +748,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
 	if (!dr6 && user_mode(regs))
 		user_icebp = 1;
 
-	/* Catch kmemcheck conditions! */
-	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
-		goto exit;
-
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
 
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 796d96bb0821..8ea117f8142e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -112,7 +112,7 @@ static void cyc2ns_data_init(struct cyc2ns_data *data)
 	data->cyc2ns_offset = 0;
 }
 
-static void cyc2ns_init(int cpu)
+static void __init cyc2ns_init(int cpu)
 {
 	struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu);
 
@@ -812,13 +812,13 @@ unsigned long native_calibrate_cpu(void)
 	return tsc_pit_min;
 }
 
-int recalibrate_cpu_khz(void)
+void recalibrate_cpu_khz(void)
 {
 #ifndef CONFIG_SMP
 	unsigned long cpu_khz_old = cpu_khz;
 
 	if (!boot_cpu_has(X86_FEATURE_TSC))
-		return -ENODEV;
+		return;
 
 	cpu_khz = x86_platform.calibrate_cpu();
 	tsc_khz = x86_platform.calibrate_tsc();
@@ -828,10 +828,6 @@ int recalibrate_cpu_khz(void)
 		cpu_khz = tsc_khz;
 	cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
 						    cpu_khz_old, cpu_khz);
-
-	return 0;
-#else
-	return -ENODEV;
 #endif
 }
 
@@ -959,17 +955,21 @@ core_initcall(cpufreq_register_tsc_scaling);
 /*
  * If ART is present detect the numerator:denominator to convert to TSC
  */
-static void detect_art(void)
+static void __init detect_art(void)
 {
 	unsigned int unused[2];
 
 	if (boot_cpu_data.cpuid_level < ART_CPUID_LEAF)
 		return;
 
-	/* Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required */
+	/*
+	 * Don't enable ART in a VM, non-stop TSC and TSC_ADJUST required,
+	 * and the TSC counter resets must not occur asynchronously.
+	 */
 	if (boot_cpu_has(X86_FEATURE_HYPERVISOR) ||
 	    !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ||
-	    !boot_cpu_has(X86_FEATURE_TSC_ADJUST))
+	    !boot_cpu_has(X86_FEATURE_TSC_ADJUST) ||
+	    tsc_async_resets)
 		return;
 
 	cpuid(ART_CPUID_LEAF, &art_to_tsc_denominator,
@@ -1263,6 +1263,25 @@ static int __init init_tsc_clocksource(void)
  */
 device_initcall(init_tsc_clocksource);
 
+void __init tsc_early_delay_calibrate(void)
+{
+	unsigned long lpj;
+
+	if (!boot_cpu_has(X86_FEATURE_TSC))
+		return;
+
+	cpu_khz = x86_platform.calibrate_cpu();
+	tsc_khz = x86_platform.calibrate_tsc();
+
+	tsc_khz = tsc_khz ? : cpu_khz;
+	if (!tsc_khz)
+		return;
+
+	lpj = tsc_khz * 1000;
+	do_div(lpj, HZ);
+	loops_per_jiffy = lpj;
+}
+
 void __init tsc_init(void)
 {
 	u64 lpj, cyc;
@@ -1346,12 +1365,10 @@ void __init tsc_init(void)
 unsigned long calibrate_delay_is_known(void)
 {
 	int sibling, cpu = smp_processor_id();
-	struct cpumask *mask = topology_core_cpumask(cpu);
-
-	if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
-		return 0;
+	int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
+	const struct cpumask *mask = topology_core_cpumask(cpu);
 
-	if (!mask)
+	if (tsc_disabled || !constant_tsc || !mask)
 		return 0;
 
 	sibling = cpumask_any_but(mask, cpu);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 7842371bc9e4..ec534f978867 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * check TSC synchronization.
  *
@@ -30,6 +31,20 @@ struct tsc_adjust {
 
 static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
 
+/*
+ * TSC's on different sockets may be reset asynchronously.
+ * This may cause the TSC ADJUST value on socket 0 to be NOT 0.
+ */
+bool __read_mostly tsc_async_resets;
+
+void mark_tsc_async_resets(char *reason)
+{
+	if (tsc_async_resets)
+		return;
+	tsc_async_resets = true;
+	pr_info("tsc: Marking TSC async resets true due to %s\n", reason);
+}
+
 void tsc_verify_tsc_adjust(bool resume)
 {
 	struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
@@ -38,6 +53,10 @@ void tsc_verify_tsc_adjust(bool resume)
 	if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
 		return;
 
+	/* Skip unnecessary error messages if TSC already unstable */
+	if (check_tsc_unstable())
+		return;
+
 	/* Rate limit the MSR check */
 	if (!resume && time_before(jiffies, adj->nextcheck))
 		return;
@@ -71,12 +90,22 @@ static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
 	 * non zero. We don't do that on non boot cpus because physical
 	 * hotplug should have set the ADJUST register to a value > 0 so
 	 * the TSC is in sync with the already running cpus.
+	 *
+	 * Also don't force the ADJUST value to zero if that is a valid value
+	 * for socket 0 as determined by the system arch.  This is required
+	 * when multiple sockets are reset asynchronously with each other
+	 * and socket 0 may not have an TSC ADJUST value of 0.
 	 */
 	if (bootcpu && bootval != 0) {
-		pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n", cpu,
-			bootval);
-		wrmsrl(MSR_IA32_TSC_ADJUST, 0);
-		bootval = 0;
+		if (likely(!tsc_async_resets)) {
+			pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
+				cpu, bootval);
+			wrmsrl(MSR_IA32_TSC_ADJUST, 0);
+			bootval = 0;
+		} else {
+			pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
+				cpu, bootval);
+		}
 	}
 	cur->adjusted = bootval;
 }
@@ -90,6 +119,10 @@ bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
 	if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
 		return false;
 
+	/* Skip unnecessary error messages if TSC already unstable */
+	if (check_tsc_unstable())
+		return false;
+
 	rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
 	cur->bootval = bootval;
 	cur->nextcheck = jiffies + HZ;
@@ -118,6 +151,13 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
 	cur->warned = false;
 
 	/*
+	 * If a non-zero TSC value for socket 0 may be valid then the default
+	 * adjusted value cannot assumed to be zero either.
+	 */
+	if (tsc_async_resets)
+		cur->adjusted = bootval;
+
+	/*
 	 * Check whether this CPU is the first in a package to come up. In
 	 * this case do not check the boot value against another package
 	 * because the new package might have been physically hotplugged,
@@ -138,10 +178,9 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
 	 * Compare the boot value and complain if it differs in the
 	 * package.
 	 */
-	if (bootval != ref->bootval) {
-		pr_warn(FW_BUG "TSC ADJUST differs: Reference CPU%u: %lld CPU%u: %lld\n",
-			refcpu, ref->bootval, cpu, bootval);
-	}
+	if (bootval != ref->bootval)
+		printk_once(FW_BUG "TSC ADJUST differs within socket(s), fixing all errors\n");
+
 	/*
 	 * The TSC_ADJUST values in a package must be the same. If the boot
 	 * value on this newly upcoming CPU differs from the adjustment
@@ -149,8 +188,6 @@ bool tsc_store_and_check_tsc_adjust(bool bootcpu)
 	 * adjusted value.
 	 */
 	if (bootval != ref->adjusted) {
-		pr_warn("TSC ADJUST synchronize: Reference CPU%u: %lld CPU%u: %lld\n",
-			refcpu, ref->adjusted, cpu, bootval);
 		cur->adjusted = ref->adjusted;
 		wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
 	}
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
new file mode 100644
index 000000000000..f44ce0fb3583
--- /dev/null
+++ b/arch/x86/kernel/umip.c
@@ -0,0 +1,428 @@
+/*
+ * umip.c Emulation for instruction protected by the Intel User-Mode
+ * Instruction Prevention feature
+ *
+ * Copyright (c) 2017, Intel Corporation.
+ * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ */
+
+#include <linux/uaccess.h>
+#include <asm/umip.h>
+#include <asm/traps.h>
+#include <asm/insn.h>
+#include <asm/insn-eval.h>
+#include <linux/ratelimit.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) "umip: " fmt
+
+/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
+ *
+ * The feature User-Mode Instruction Prevention present in recent Intel
+ * processor prevents a group of instructions (sgdt, sidt, sldt, smsw, and str)
+ * from being executed with CPL > 0. Otherwise, a general protection fault is
+ * issued.
+ *
+ * Rather than relaying to the user space the general protection fault caused by
+ * the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
+ * trapped and emulate the result of such instructions to provide dummy values.
+ * This allows to both conserve the current kernel behavior and not reveal the
+ * system resources that UMIP intends to protect (i.e., the locations of the
+ * global descriptor and interrupt descriptor tables, the segment selectors of
+ * the local descriptor table, the value of the task state register and the
+ * contents of the CR0 register).
+ *
+ * This emulation is needed because certain applications (e.g., WineHQ and
+ * DOSEMU2) rely on this subset of instructions to function.
+ *
+ * The instructions protected by UMIP can be split in two groups. Those which
+ * return a kernel memory address (sgdt and sidt) and those which return a
+ * value (sldt, str and smsw).
+ *
+ * For the instructions that return a kernel memory address, applications
+ * such as WineHQ rely on the result being located in the kernel memory space,
+ * not the actual location of the table. The result is emulated as a hard-coded
+ * value that, lies close to the top of the kernel memory. The limit for the GDT
+ * and the IDT are set to zero.
+ *
+ * Given that sldt and str are not commonly used in programs that run on WineHQ
+ * or DOSEMU2, they are not emulated.
+ *
+ * The instruction smsw is emulated to return the value that the register CR0
+ * has at boot time as set in the head_32.
+ *
+ * Also, emulation is provided only for 32-bit processes; 64-bit processes
+ * that attempt to use the instructions that UMIP protects will receive the
+ * SIGSEGV signal issued as a consequence of the general protection fault.
+ *
+ * Care is taken to appropriately emulate the results when segmentation is
+ * used. That is, rather than relying on USER_DS and USER_CS, the function
+ * insn_get_addr_ref() inspects the segment descriptor pointed by the
+ * registers in pt_regs. This ensures that we correctly obtain the segment
+ * base address and the address and operand sizes even if the user space
+ * application uses a local descriptor table.
+ */
+
+#define UMIP_DUMMY_GDT_BASE 0xfffe0000
+#define UMIP_DUMMY_IDT_BASE 0xffff0000
+
+/*
+ * The SGDT and SIDT instructions store the contents of the global descriptor
+ * table and interrupt table registers, respectively. The destination is a
+ * memory operand of X+2 bytes. X bytes are used to store the base address of
+ * the table and 2 bytes are used to store the limit. In 32-bit processes, the
+ * only processes for which emulation is provided, X has a value of 4.
+ */
+#define UMIP_GDT_IDT_BASE_SIZE 4
+#define UMIP_GDT_IDT_LIMIT_SIZE 2
+
+#define	UMIP_INST_SGDT	0	/* 0F 01 /0 */
+#define	UMIP_INST_SIDT	1	/* 0F 01 /1 */
+#define	UMIP_INST_SMSW	2	/* 0F 01 /4 */
+#define	UMIP_INST_SLDT  3       /* 0F 00 /0 */
+#define	UMIP_INST_STR   4       /* 0F 00 /1 */
+
+const char * const umip_insns[5] = {
+	[UMIP_INST_SGDT] = "SGDT",
+	[UMIP_INST_SIDT] = "SIDT",
+	[UMIP_INST_SMSW] = "SMSW",
+	[UMIP_INST_SLDT] = "SLDT",
+	[UMIP_INST_STR] = "STR",
+};
+
+#define umip_pr_err(regs, fmt, ...) \
+	umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
+#define umip_pr_warning(regs, fmt, ...) \
+	umip_printk(regs, KERN_WARNING, fmt,  ##__VA_ARGS__)
+
+/**
+ * umip_printk() - Print a rate-limited message
+ * @regs:	Register set with the context in which the warning is printed
+ * @log_level:	Kernel log level to print the message
+ * @fmt:	The text string to print
+ *
+ * Print the text contained in @fmt. The print rate is limited to bursts of 5
+ * messages every two minutes. The purpose of this customized version of
+ * printk() is to print messages when user space processes use any of the
+ * UMIP-protected instructions. Thus, the printed text is prepended with the
+ * task name and process ID number of the current task as well as the
+ * instruction and stack pointers in @regs as seen when entering kernel mode.
+ *
+ * Returns:
+ *
+ * None.
+ */
+static __printf(3, 4)
+void umip_printk(const struct pt_regs *regs, const char *log_level,
+		 const char *fmt, ...)
+{
+	/* Bursts of 5 messages every two minutes */
+	static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5);
+	struct task_struct *tsk = current;
+	struct va_format vaf;
+	va_list args;
+
+	if (!__ratelimit(&ratelimit))
+		return;
+
+	va_start(args, fmt);
+	vaf.fmt = fmt;
+	vaf.va = &args;
+	printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm,
+	       task_pid_nr(tsk), regs->ip, regs->sp, &vaf);
+	va_end(args);
+}
+
+/**
+ * identify_insn() - Identify a UMIP-protected instruction
+ * @insn:	Instruction structure with opcode and ModRM byte.
+ *
+ * From the opcode and ModRM.reg in @insn identify, if any, a UMIP-protected
+ * instruction that can be emulated.
+ *
+ * Returns:
+ *
+ * On success, a constant identifying a specific UMIP-protected instruction that
+ * can be emulated.
+ *
+ * -EINVAL on error or when not an UMIP-protected instruction that can be
+ * emulated.
+ */
+static int identify_insn(struct insn *insn)
+{
+	/* By getting modrm we also get the opcode. */
+	insn_get_modrm(insn);
+
+	if (!insn->modrm.nbytes)
+		return -EINVAL;
+
+	/* All the instructions of interest start with 0x0f. */
+	if (insn->opcode.bytes[0] != 0xf)
+		return -EINVAL;
+
+	if (insn->opcode.bytes[1] == 0x1) {
+		switch (X86_MODRM_REG(insn->modrm.value)) {
+		case 0:
+			return UMIP_INST_SGDT;
+		case 1:
+			return UMIP_INST_SIDT;
+		case 4:
+			return UMIP_INST_SMSW;
+		default:
+			return -EINVAL;
+		}
+	} else if (insn->opcode.bytes[1] == 0x0) {
+		if (X86_MODRM_REG(insn->modrm.value) == 0)
+			return UMIP_INST_SLDT;
+		else if (X86_MODRM_REG(insn->modrm.value) == 1)
+			return UMIP_INST_STR;
+		else
+			return -EINVAL;
+	} else {
+		return -EINVAL;
+	}
+}
+
+/**
+ * emulate_umip_insn() - Emulate UMIP instructions and return dummy values
+ * @insn:	Instruction structure with operands
+ * @umip_inst:	A constant indicating the instruction to emulate
+ * @data:	Buffer into which the dummy result is stored
+ * @data_size:	Size of the emulated result
+ *
+ * Emulate an instruction protected by UMIP and provide a dummy result. The
+ * result of the emulation is saved in @data. The size of the results depends
+ * on both the instruction and type of operand (register vs memory address).
+ * The size of the result is updated in @data_size. Caller is responsible
+ * of providing a @data buffer of at least UMIP_GDT_IDT_BASE_SIZE +
+ * UMIP_GDT_IDT_LIMIT_SIZE bytes.
+ *
+ * Returns:
+ *
+ * 0 on success, -EINVAL on error while emulating.
+ */
+static int emulate_umip_insn(struct insn *insn, int umip_inst,
+			     unsigned char *data, int *data_size)
+{
+	unsigned long dummy_base_addr, dummy_value;
+	unsigned short dummy_limit = 0;
+
+	if (!data || !data_size || !insn)
+		return -EINVAL;
+	/*
+	 * These two instructions return the base address and limit of the
+	 * global and interrupt descriptor table, respectively. According to the
+	 * Intel Software Development manual, the base address can be 24-bit,
+	 * 32-bit or 64-bit. Limit is always 16-bit. If the operand size is
+	 * 16-bit, the returned value of the base address is supposed to be a
+	 * zero-extended 24-byte number. However, it seems that a 32-byte number
+	 * is always returned irrespective of the operand size.
+	 */
+	if (umip_inst == UMIP_INST_SGDT || umip_inst == UMIP_INST_SIDT) {
+		/* SGDT and SIDT do not use registers operands. */
+		if (X86_MODRM_MOD(insn->modrm.value) == 3)
+			return -EINVAL;
+
+		if (umip_inst == UMIP_INST_SGDT)
+			dummy_base_addr = UMIP_DUMMY_GDT_BASE;
+		else
+			dummy_base_addr = UMIP_DUMMY_IDT_BASE;
+
+		*data_size = UMIP_GDT_IDT_LIMIT_SIZE + UMIP_GDT_IDT_BASE_SIZE;
+
+		memcpy(data + 2, &dummy_base_addr, UMIP_GDT_IDT_BASE_SIZE);
+		memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
+
+	} else if (umip_inst == UMIP_INST_SMSW) {
+		dummy_value = CR0_STATE;
+
+		/*
+		 * Even though the CR0 register has 4 bytes, the number
+		 * of bytes to be copied in the result buffer is determined
+		 * by whether the operand is a register or a memory location.
+		 * If operand is a register, return as many bytes as the operand
+		 * size. If operand is memory, return only the two least
+		 * siginificant bytes of CR0.
+		 */
+		if (X86_MODRM_MOD(insn->modrm.value) == 3)
+			*data_size = insn->opnd_bytes;
+		else
+			*data_size = 2;
+
+		memcpy(data, &dummy_value, *data_size);
+	/* STR and SLDT  are not emulated */
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * force_sig_info_umip_fault() - Force a SIGSEGV with SEGV_MAPERR
+ * @addr:	Address that caused the signal
+ * @regs:	Register set containing the instruction pointer
+ *
+ * Force a SIGSEGV signal with SEGV_MAPERR as the error code. This function is
+ * intended to be used to provide a segmentation fault when the result of the
+ * UMIP emulation could not be copied to the user space memory.
+ *
+ * Returns: none
+ */
+static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs)
+{
+	siginfo_t info;
+	struct task_struct *tsk = current;
+
+	tsk->thread.cr2		= (unsigned long)addr;
+	tsk->thread.error_code	= X86_PF_USER | X86_PF_WRITE;
+	tsk->thread.trap_nr	= X86_TRAP_PF;
+
+	info.si_signo	= SIGSEGV;
+	info.si_errno	= 0;
+	info.si_code	= SEGV_MAPERR;
+	info.si_addr	= addr;
+	force_sig_info(SIGSEGV, &info, tsk);
+
+	if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV)))
+		return;
+
+	umip_pr_err(regs, "segfault in emulation. error%x\n",
+		    X86_PF_USER | X86_PF_WRITE);
+}
+
+/**
+ * fixup_umip_exception() - Fixup a general protection fault caused by UMIP
+ * @regs:	Registers as saved when entering the #GP handler
+ *
+ * The instructions sgdt, sidt, str, smsw, sldt cause a general protection
+ * fault if executed with CPL > 0 (i.e., from user space). If the offending
+ * user-space process is not in long mode, this function fixes the exception
+ * up and provides dummy results for sgdt, sidt and smsw; str and sldt are not
+ * fixed up. Also long mode user-space processes are not fixed up.
+ *
+ * If operands are memory addresses, results are copied to user-space memory as
+ * indicated by the instruction pointed by eIP using the registers indicated in
+ * the instruction operands. If operands are registers, results are copied into
+ * the context that was saved when entering kernel mode.
+ *
+ * Returns:
+ *
+ * True if emulation was successful; false if not.
+ */
+bool fixup_umip_exception(struct pt_regs *regs)
+{
+	int not_copied, nr_copied, reg_offset, dummy_data_size, umip_inst;
+	unsigned long seg_base = 0, *reg_addr;
+	/* 10 bytes is the maximum size of the result of UMIP instructions */
+	unsigned char dummy_data[10] = { 0 };
+	unsigned char buf[MAX_INSN_SIZE];
+	void __user *uaddr;
+	struct insn insn;
+	int seg_defs;
+
+	if (!regs)
+		return false;
+
+	/*
+	 * If not in user-space long mode, a custom code segment could be in
+	 * use. This is true in protected mode (if the process defined a local
+	 * descriptor table), or virtual-8086 mode. In most of the cases
+	 * seg_base will be zero as in USER_CS.
+	 */
+	if (!user_64bit_mode(regs))
+		seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+
+	if (seg_base == -1L)
+		return false;
+
+	not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
+				    sizeof(buf));
+	nr_copied = sizeof(buf) - not_copied;
+
+	/*
+	 * The copy_from_user above could have failed if user code is protected
+	 * by a memory protection key. Give up on emulation in such a case.
+	 * Should we issue a page fault?
+	 */
+	if (!nr_copied)
+		return false;
+
+	insn_init(&insn, buf, nr_copied, user_64bit_mode(regs));
+
+	/*
+	 * Override the default operand and address sizes with what is specified
+	 * in the code segment descriptor. The instruction decoder only sets
+	 * the address size it to either 4 or 8 address bytes and does nothing
+	 * for the operand bytes. This OK for most of the cases, but we could
+	 * have special cases where, for instance, a 16-bit code segment
+	 * descriptor is used.
+	 * If there is an address override prefix, the instruction decoder
+	 * correctly updates these values, even for 16-bit defaults.
+	 */
+	seg_defs = insn_get_code_seg_params(regs);
+	if (seg_defs == -EINVAL)
+		return false;
+
+	insn.addr_bytes = INSN_CODE_SEG_ADDR_SZ(seg_defs);
+	insn.opnd_bytes = INSN_CODE_SEG_OPND_SZ(seg_defs);
+
+	insn_get_length(&insn);
+	if (nr_copied < insn.length)
+		return false;
+
+	umip_inst = identify_insn(&insn);
+	if (umip_inst < 0)
+		return false;
+
+	umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+			umip_insns[umip_inst]);
+
+	/* Do not emulate SLDT, STR or user long mode processes. */
+	if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs))
+		return false;
+
+	umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+
+	if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size))
+		return false;
+
+	/*
+	 * If operand is a register, write result to the copy of the register
+	 * value that was pushed to the stack when entering into kernel mode.
+	 * Upon exit, the value we write will be restored to the actual hardware
+	 * register.
+	 */
+	if (X86_MODRM_MOD(insn.modrm.value) == 3) {
+		reg_offset = insn_get_modrm_rm_off(&insn, regs);
+
+		/*
+		 * Negative values are usually errors. In memory addressing,
+		 * the exception is -EDOM. Since we expect a register operand,
+		 * all negative values are errors.
+		 */
+		if (reg_offset < 0)
+			return false;
+
+		reg_addr = (unsigned long *)((unsigned long)regs + reg_offset);
+		memcpy(reg_addr, dummy_data, dummy_data_size);
+	} else {
+		uaddr = insn_get_addr_ref(&insn, regs);
+		if ((unsigned long)uaddr == -1L)
+			return false;
+
+		nr_copied = copy_to_user(uaddr, dummy_data, dummy_data_size);
+		if (nr_copied  > 0) {
+			/*
+			 * If copy fails, send a signal and tell caller that
+			 * fault was fixed up.
+			 */
+			force_sig_info_umip_fault(uaddr, regs);
+			return true;
+		}
+	}
+
+	/* increase IP to let the program keep going */
+	regs->ip += insn.length;
+	return true;
+}
diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c
index d145a0b1f529..3dc26f95d46e 100644
--- a/arch/x86/kernel/unwind_frame.c
+++ b/arch/x86/kernel/unwind_frame.c
@@ -44,7 +44,8 @@ static void unwind_dump(struct unwind_state *state)
 			state->stack_info.type, state->stack_info.next_sp,
 			state->stack_mask, state->graph_idx);
 
-	for (sp = state->orig_sp; sp; sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
+	for (sp = PTR_ALIGN(state->orig_sp, sizeof(long)); sp;
+	     sp = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
 		if (get_stack_info(sp, state->task, &stack_info, &visit_mask))
 			break;
 
@@ -174,6 +175,7 @@ static bool is_last_task_frame(struct unwind_state *state)
  * This determines if the frame pointer actually contains an encoded pointer to
  * pt_regs on the stack.  See ENCODE_FRAME_POINTER.
  */
+#ifdef CONFIG_X86_64
 static struct pt_regs *decode_frame_pointer(unsigned long *bp)
 {
 	unsigned long regs = (unsigned long)bp;
@@ -183,6 +185,23 @@ static struct pt_regs *decode_frame_pointer(unsigned long *bp)
 
 	return (struct pt_regs *)(regs & ~0x1);
 }
+#else
+static struct pt_regs *decode_frame_pointer(unsigned long *bp)
+{
+	unsigned long regs = (unsigned long)bp;
+
+	if (regs & 0x80000000)
+		return NULL;
+
+	return (struct pt_regs *)(regs | 0x80000000);
+}
+#endif
+
+#ifdef CONFIG_X86_32
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs) - 2*sizeof(long))
+#else
+#define KERNEL_REGS_SIZE (sizeof(struct pt_regs))
+#endif
 
 static bool update_stack_state(struct unwind_state *state,
 			       unsigned long *next_bp)
@@ -202,7 +221,7 @@ static bool update_stack_state(struct unwind_state *state,
 	regs = decode_frame_pointer(next_bp);
 	if (regs) {
 		frame = (unsigned long *)regs;
-		len = regs_size(regs);
+		len = KERNEL_REGS_SIZE;
 		state->got_irq = true;
 	} else {
 		frame = next_bp;
@@ -226,6 +245,14 @@ static bool update_stack_state(struct unwind_state *state,
 	    frame < prev_frame_end)
 		return false;
 
+	/*
+	 * On 32-bit with user mode regs, make sure the last two regs are safe
+	 * to access:
+	 */
+	if (IS_ENABLED(CONFIG_X86_32) && regs && user_mode(regs) &&
+	    !on_stack(info, frame, len + 2*sizeof(long)))
+		return false;
+
 	/* Move state to the next frame: */
 	if (regs) {
 		state->regs = regs;
@@ -328,6 +355,13 @@ bad_address:
 	    state->regs->sp < (unsigned long)task_pt_regs(state->task))
 		goto the_end;
 
+	/*
+	 * There are some known frame pointer issues on 32-bit.  Disable
+	 * unwinder warnings on 32-bit until it gets objtool support.
+	 */
+	if (IS_ENABLED(CONFIG_X86_32))
+		goto the_end;
+
 	if (state->regs) {
 		printk_deferred_once(KERN_WARNING
 			"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 570b70d3f604..a3f973b2c97a 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -86,8 +86,8 @@ static struct orc_entry *orc_find(unsigned long ip)
 		idx = (ip - LOOKUP_START_IP) / LOOKUP_BLOCK_SIZE;
 
 		if (unlikely((idx >= lookup_num_blocks-1))) {
-			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%lx\n",
-				 idx, lookup_num_blocks, ip);
+			orc_warn("WARNING: bad lookup idx: idx=%u num=%u ip=%pB\n",
+				 idx, lookup_num_blocks, (void *)ip);
 			return NULL;
 		}
 
@@ -96,8 +96,8 @@ static struct orc_entry *orc_find(unsigned long ip)
 
 		if (unlikely((__start_orc_unwind + start >= __stop_orc_unwind) ||
 			     (__start_orc_unwind + stop > __stop_orc_unwind))) {
-			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%lx\n",
-				 idx, lookup_num_blocks, start, stop, ip);
+			orc_warn("WARNING: bad lookup value: idx=%u num=%u start=%u stop=%u ip=%pB\n",
+				 idx, lookup_num_blocks, start, stop, (void *)ip);
 			return NULL;
 		}
 
@@ -279,7 +279,7 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
 	if (!stack_access_ok(state, addr, sizeof(long)))
 		return false;
 
-	*val = READ_ONCE_TASK_STACK(state->task, *(unsigned long *)addr);
+	*val = READ_ONCE_NOCHECK(*(unsigned long *)addr);
 	return true;
 }
 
@@ -373,7 +373,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R10:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R10 at ip %p\n",
+			orc_warn("missing regs for base reg R10 at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -382,7 +382,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_R13:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg R13 at ip %p\n",
+			orc_warn("missing regs for base reg R13 at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -391,7 +391,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DI:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DI at ip %p\n",
+			orc_warn("missing regs for base reg DI at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -400,7 +400,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_REG_DX:
 		if (!state->regs || !state->full_regs) {
-			orc_warn("missing regs for base reg DX at ip %p\n",
+			orc_warn("missing regs for base reg DX at ip %pB\n",
 				 (void *)state->ip);
 			goto done;
 		}
@@ -408,7 +408,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown SP base reg %d for ip %p\n",
+		orc_warn("unknown SP base reg %d for ip %pB\n",
 			 orc->sp_reg, (void *)state->ip);
 		goto done;
 	}
@@ -436,7 +436,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS:
 		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
-			orc_warn("can't dereference registers at %p for ip %p\n",
+			orc_warn("can't dereference registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
 		}
@@ -448,7 +448,7 @@ bool unwind_next_frame(struct unwind_state *state)
 
 	case ORC_TYPE_REGS_IRET:
 		if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
-			orc_warn("can't dereference iret registers at %p for ip %p\n",
+			orc_warn("can't dereference iret registers at %p for ip %pB\n",
 				 (void *)sp, (void *)orig_ip);
 			goto done;
 		}
@@ -465,7 +465,8 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown .orc_unwind entry type %d\n", orc->type);
+		orc_warn("unknown .orc_unwind entry type %d for ip %pB\n",
+			 orc->type, (void *)orig_ip);
 		break;
 	}
 
@@ -487,7 +488,7 @@ bool unwind_next_frame(struct unwind_state *state)
 		break;
 
 	default:
-		orc_warn("unknown BP base reg %d for ip %p\n",
+		orc_warn("unknown BP base reg %d for ip %pB\n",
 			 orc->bp_reg, (void *)orig_ip);
 		goto done;
 	}
@@ -496,7 +497,7 @@ bool unwind_next_frame(struct unwind_state *state)
 	if (state->stack_info.type == prev_type &&
 	    on_stack(&state->stack_info, (void *)state->sp, sizeof(long)) &&
 	    state->sp <= prev_sp) {
-		orc_warn("stack going in the wrong direction? ip=%p\n",
+		orc_warn("stack going in the wrong direction? ip=%pB\n",
 			 (void *)orig_ip);
 		goto done;
 	}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 495c776de4b4..a3755d293a48 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -271,12 +271,15 @@ static bool is_prefix_bad(struct insn *insn)
 	int i;
 
 	for (i = 0; i < insn->prefixes.nbytes; i++) {
-		switch (insn->prefixes.bytes[i]) {
-		case 0x26:	/* INAT_PFX_ES   */
-		case 0x2E:	/* INAT_PFX_CS   */
-		case 0x36:	/* INAT_PFX_DS   */
-		case 0x3E:	/* INAT_PFX_SS   */
-		case 0xF0:	/* INAT_PFX_LOCK */
+		insn_attr_t attr;
+
+		attr = inat_get_opcode_attribute(insn->prefixes.bytes[i]);
+		switch (attr) {
+		case INAT_MAKE_PREFIX(INAT_PFX_ES):
+		case INAT_MAKE_PREFIX(INAT_PFX_CS):
+		case INAT_MAKE_PREFIX(INAT_PFX_DS):
+		case INAT_MAKE_PREFIX(INAT_PFX_SS):
+		case INAT_MAKE_PREFIX(INAT_PFX_LOCK):
 			return true;
 		}
 	}
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 014ea59aa153..3d3c2f71f617 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -33,7 +33,7 @@
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
-verify_cpu:
+ENTRY(verify_cpu)
 	pushf				# Save caller passed flags
 	push	$0			# Kill any dangerous flags
 	popf
@@ -139,3 +139,4 @@ verify_cpu:
 	popf				# Restore caller passed flags
 	xorl %eax, %eax
 	ret
+ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 7924a5356c8a..5edb27f1a2c4 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  *  Copyright (C) 1994  Linus Torvalds
  *
@@ -54,6 +55,7 @@
 #include <asm/irq.h>
 #include <asm/traps.h>
 #include <asm/vm86.h>
+#include <asm/switch_to.h>
 
 /*
  * Known problems:
@@ -93,7 +95,6 @@
 
 void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 {
-	struct tss_struct *tss;
 	struct task_struct *tsk = current;
 	struct vm86plus_struct __user *user;
 	struct vm86 *vm86 = current->thread.vm86;
@@ -145,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 		do_exit(SIGSEGV);
 	}
 
-	tss = &per_cpu(cpu_tss, get_cpu());
+	preempt_disable();
 	tsk->thread.sp0 = vm86->saved_sp0;
 	tsk->thread.sysenter_cs = __KERNEL_CS;
-	load_sp0(tss, &tsk->thread);
+	update_sp0(tsk);
+	refresh_sysenter_cs(&tsk->thread);
 	vm86->saved_sp0 = 0;
-	put_cpu();
+	preempt_enable();
 
 	memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
 
@@ -236,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
 
 static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 {
-	struct tss_struct *tss;
 	struct task_struct *tsk = current;
 	struct vm86 *vm86 = tsk->thread.vm86;
 	struct kernel_vm86_regs vm86regs;
@@ -364,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	vm86->saved_sp0 = tsk->thread.sp0;
 	lazy_save_gs(vm86->regs32.gs);
 
-	tss = &per_cpu(cpu_tss, get_cpu());
 	/* make room for real-mode segments */
+	preempt_disable();
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has(X86_FEATURE_SEP))
+	if (static_cpu_has(X86_FEATURE_SEP)) {
 		tsk->thread.sysenter_cs = 0;
+		refresh_sysenter_cs(&tsk->thread);
+	}
 
-	load_sp0(tss, &tsk->thread);
-	put_cpu();
+	update_sp0(tsk);
+	preempt_enable();
 
 	if (vm86->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index f05f00acac89..a4009fb9be87 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * ld script for the x86 kernel
  *
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index b034b1b14b9c..44685fb2a192 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -26,9 +26,6 @@
 
 #define TOPOLOGY_REGISTER_OFFSET 0x10
 
-/* Flag below is initialized once during vSMP PCI initialization. */
-static int irq_routing_comply = 1;
-
 #if defined CONFIG_PCI && defined CONFIG_PARAVIRT
 /*
  * Interrupt control on vSMPowered systems:
@@ -105,9 +102,6 @@ static void __init set_vsmp_pv_ops(void)
 	if (cap & ctl & BIT(8)) {
 		ctl &= ~BIT(8);
 
-		/* Interrupt routing set to ignore */
-		irq_routing_comply = 0;
-
 #ifdef CONFIG_PROC_FS
 		/* Don't let users change irq affinity via procfs */
 		no_irq_affinity = 1;
@@ -211,23 +205,10 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
 	return hard_smp_processor_id() >> index_msb;
 }
 
-/*
- * In vSMP, all cpus should be capable of handling interrupts, regardless of
- * the APIC used.
- */
-static void fill_vector_allocation_domain(int cpu, struct cpumask *retmask,
-					  const struct cpumask *mask)
-{
-	cpumask_setall(retmask);
-}
-
 static void vsmp_apic_post_init(void)
 {
 	/* need to update phys_pkg_id */
 	apic->phys_pkg_id = apicid_phys_pkg_id;
-
-	if (!irq_routing_comply)
-		apic->vector_allocation_domain = fill_vector_allocation_domain;
 }
 
 void __init vsmp_init(void)
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index a088b2c47f73..1151ccd72ce9 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -28,6 +28,8 @@ void x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
+bool __init bool_x86_init_noop(void) { return false; }
+void x86_op_int_noop(int cpu) { }
 
 /*
  * The platform setup functions are preset with the default functions
@@ -55,6 +57,7 @@ struct x86_init_ops x86_init __initdata = {
 		.pre_vector_init	= init_ISA_irqs,
 		.intr_init		= native_init_IRQ,
 		.trap_init		= x86_init_noop,
+		.intr_mode_init		= apic_intr_mode_init
 	},
 
 	.oem = {
@@ -81,6 +84,13 @@ struct x86_init_ops x86_init __initdata = {
 		.init_irq		= x86_default_pci_init_irq,
 		.fixup_irqs		= x86_default_pci_fixup_irqs,
 	},
+
+	.hyper = {
+		.init_platform		= x86_init_noop,
+		.guest_late_init	= x86_init_noop,
+		.x2apic_available	= bool_x86_init_noop,
+		.init_mem_mapping	= x86_init_noop,
+	},
 };
 
 struct x86_cpuinit_ops x86_cpuinit = {
@@ -101,6 +111,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
 	.get_nmi_reason			= default_get_nmi_reason,
 	.save_sched_clock_state 	= tsc_save_sched_clock_state,
 	.restore_sched_clock_state 	= tsc_restore_sched_clock_state,
+	.hyper.pin_vcpu			= x86_op_int_noop,
 };
 
 EXPORT_SYMBOL_GPL(x86_platform);
author	Tony Lindgren <tony@atomide.com>	2017-11-28 08:12:32 -0800
committer	Tony Lindgren <tony@atomide.com>	2017-11-28 08:12:32 -0800
commit	bc686442f8a601bccac1f22506ecdb4b0d62cadd (patch)
tree	b224ab4aa2350b233da640f5850f48bc6bfeb2d0 /arch/x86/kernel
parent	60636a5d0fa2f8bc6d0c23c4027100ba20866f9b (diff)
parent	ca41e244517d6d3f1600c229ff7ca615049c1e9c (diff)
download	linux-stable-bc686442f8a601bccac1f22506ecdb4b0d62cadd.tar.gz linux-stable-bc686442f8a601bccac1f22506ecdb4b0d62cadd.tar.bz2 linux-stable-bc686442f8a601bccac1f22506ecdb4b0d62cadd.zip