summaryrefslogtreecommitdiffstats
path: root/arch/x86/include
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/Kbuild3
-rw-r--r--arch/x86/include/asm/acpi.h17
-rw-r--r--arch/x86/include/asm/alternative.h304
-rw-r--r--arch/x86/include/asm/amd-ibs.h3
-rw-r--r--arch/x86/include/asm/amd_nb.h58
-rw-r--r--arch/x86/include/asm/amd_node.h60
-rw-r--r--arch/x86/include/asm/apic.h60
-rw-r--r--arch/x86/include/asm/arch_hweight.h16
-rw-r--r--arch/x86/include/asm/asm-prototypes.h5
-rw-r--r--arch/x86/include/asm/asm.h24
-rw-r--r--arch/x86/include/asm/atomic.h26
-rw-r--r--arch/x86/include/asm/atomic64_32.h184
-rw-r--r--arch/x86/include/asm/atomic64_64.h26
-rw-r--r--arch/x86/include/asm/barrier.h32
-rw-r--r--arch/x86/include/asm/bitops.h24
-rw-r--r--arch/x86/include/asm/boot.h7
-rw-r--r--arch/x86/include/asm/bug.h18
-rw-r--r--arch/x86/include/asm/cfi.h28
-rw-r--r--arch/x86/include/asm/cmdline.h4
-rw-r--r--arch/x86/include/asm/cmpxchg.h28
-rw-r--r--arch/x86/include/asm/cmpxchg_32.h207
-rw-r--r--arch/x86/include/asm/cmpxchg_64.h12
-rw-r--r--arch/x86/include/asm/coco.h9
-rw-r--r--arch/x86/include/asm/cpu.h14
-rw-r--r--arch/x86/include/asm/cpu_device_id.h187
-rw-r--r--arch/x86/include/asm/cpufeature.h117
-rw-r--r--arch/x86/include/asm/cpufeatures.h829
-rw-r--r--arch/x86/include/asm/cpuid.h201
-rw-r--r--arch/x86/include/asm/cpuid/api.h210
-rw-r--r--arch/x86/include/asm/cpuid/types.h32
-rw-r--r--arch/x86/include/asm/cpumask.h4
-rw-r--r--arch/x86/include/asm/current.h40
-rw-r--r--arch/x86/include/asm/desc.h1
-rw-r--r--arch/x86/include/asm/desc_defs.h4
-rw-r--r--arch/x86/include/asm/disabled-features.h161
-rw-r--r--arch/x86/include/asm/dwarf2.h2
-rw-r--r--arch/x86/include/asm/e820/api.h2
-rw-r--r--arch/x86/include/asm/e820/types.h9
-rw-r--r--arch/x86/include/asm/edac.h2
-rw-r--r--arch/x86/include/asm/efi.h26
-rw-r--r--arch/x86/include/asm/elf.h5
-rw-r--r--arch/x86/include/asm/entry-common.h28
-rw-r--r--arch/x86/include/asm/extable.h1
-rw-r--r--arch/x86/include/asm/extable_fixup_types.h2
-rw-r--r--arch/x86/include/asm/fb.h19
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/fpu.h13
-rw-r--r--arch/x86/include/asm/fpu/api.h20
-rw-r--r--arch/x86/include/asm/fpu/signal.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h13
-rw-r--r--arch/x86/include/asm/fpu/xstate.h4
-rw-r--r--arch/x86/include/asm/frame.h10
-rw-r--r--arch/x86/include/asm/fred.h27
-rw-r--r--arch/x86/include/asm/fsgsbase.h4
-rw-r--r--arch/x86/include/asm/ftrace.h84
-rw-r--r--arch/x86/include/asm/futex.h8
-rw-r--r--arch/x86/include/asm/hardirq.h18
-rw-r--r--arch/x86/include/asm/highmem.h3
-rw-r--r--arch/x86/include/asm/hpet.h1
-rw-r--r--arch/x86/include/asm/hw_irq.h4
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h811
-rw-r--r--arch/x86/include/asm/ia32.h11
-rw-r--r--arch/x86/include/asm/ia32_unistd.h12
-rw-r--r--arch/x86/include/asm/ibt.h25
-rw-r--r--arch/x86/include/asm/idtentry.h18
-rw-r--r--arch/x86/include/asm/inat.h17
-rw-r--r--arch/x86/include/asm/init.h9
-rw-r--r--arch/x86/include/asm/insn.h32
-rw-r--r--arch/x86/include/asm/inst.h2
-rw-r--r--arch/x86/include/asm/intel-family.h227
-rw-r--r--arch/x86/include/asm/intel_ds.h1
-rw-r--r--arch/x86/include/asm/intel_pconfig.h65
-rw-r--r--arch/x86/include/asm/intel_punit_ipc.h7
-rw-r--r--arch/x86/include/asm/intel_scu_ipc.h68
-rw-r--r--arch/x86/include/asm/intel_telemetry.h2
-rw-r--r--arch/x86/include/asm/io.h28
-rw-r--r--arch/x86/include/asm/iosf_mbi.h7
-rw-r--r--arch/x86/include/asm/irq_remapping.h7
-rw-r--r--arch/x86/include/asm/irq_stack.h16
-rw-r--r--arch/x86/include/asm/irq_vectors.h12
-rw-r--r--arch/x86/include/asm/irqflags.h58
-rw-r--r--arch/x86/include/asm/jump_label.h39
-rw-r--r--arch/x86/include/asm/kasan.h2
-rw-r--r--arch/x86/include/asm/kexec.h129
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h23
-rw-r--r--arch/x86/include/asm/kvm-x86-pmu-ops.h3
-rw-r--r--arch/x86/include/asm/kvm_host.h338
-rw-r--r--arch/x86/include/asm/linkage.h24
-rw-r--r--arch/x86/include/asm/mce.h45
-rw-r--r--arch/x86/include/asm/mem_encrypt.h4
-rw-r--r--arch/x86/include/asm/mmu.h14
-rw-r--r--arch/x86/include/asm/mmu_context.h24
-rw-r--r--arch/x86/include/asm/mmzone.h6
-rw-r--r--arch/x86/include/asm/mmzone_32.h17
-rw-r--r--arch/x86/include/asm/mmzone_64.h18
-rw-r--r--arch/x86/include/asm/mpspec.h6
-rw-r--r--arch/x86/include/asm/mshyperv.h30
-rw-r--r--arch/x86/include/asm/msr-index.h89
-rw-r--r--arch/x86/include/asm/msr.h29
-rw-r--r--arch/x86/include/asm/mtrr.h12
-rw-r--r--arch/x86/include/asm/mwait.h1
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/nops.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h88
-rw-r--r--arch/x86/include/asm/numa.h30
-rw-r--r--arch/x86/include/asm/numa_32.h13
-rw-r--r--arch/x86/include/asm/orc_types.h4
-rw-r--r--arch/x86/include/asm/page.h6
-rw-r--r--arch/x86/include/asm/page_32.h4
-rw-r--r--arch/x86/include/asm/page_32_types.h8
-rw-r--r--arch/x86/include/asm/page_64.h10
-rw-r--r--arch/x86/include/asm/page_64_types.h2
-rw-r--r--arch/x86/include/asm/page_types.h17
-rw-r--r--arch/x86/include/asm/paravirt.h46
-rw-r--r--arch/x86/include/asm/paravirt_types.h25
-rw-r--r--arch/x86/include/asm/percpu.h753
-rw-r--r--arch/x86/include/asm/perf_event.h84
-rw-r--r--arch/x86/include/asm/pgalloc.h42
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h12
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h4
-rw-r--r--arch/x86/include/asm/pgtable-invert.h4
-rw-r--r--arch/x86/include/asm/pgtable.h224
-rw-r--r--arch/x86/include/asm/pgtable_32.h4
-rw-r--r--arch/x86/include/asm/pgtable_32_areas.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h25
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h8
-rw-r--r--arch/x86/include/asm/pgtable_types.h28
-rw-r--r--arch/x86/include/asm/posted_intr.h118
-rw-r--r--arch/x86/include/asm/preempt.h25
-rw-r--r--arch/x86/include/asm/processor.h137
-rw-r--r--arch/x86/include/asm/prom.h13
-rw-r--r--arch/x86/include/asm/proto.h3
-rw-r--r--arch/x86/include/asm/pti.h4
-rw-r--r--arch/x86/include/asm/ptrace.h4
-rw-r--r--arch/x86/include/asm/purgatory.h4
-rw-r--r--arch/x86/include/asm/pvclock-abi.h4
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/qspinlock.h25
-rw-r--r--arch/x86/include/asm/qspinlock_paravirt.h7
-rw-r--r--arch/x86/include/asm/realmode.h4
-rw-r--r--arch/x86/include/asm/reboot.h6
-rw-r--r--arch/x86/include/asm/required-features.h105
-rw-r--r--arch/x86/include/asm/resctrl.h42
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/rqspinlock.h33
-rw-r--r--arch/x86/include/asm/runtime-const.h74
-rw-r--r--arch/x86/include/asm/seccomp.h2
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/segment.h8
-rw-r--r--arch/x86/include/asm/set_memory.h6
-rw-r--r--arch/x86/include/asm/setup.h17
-rw-r--r--arch/x86/include/asm/setup_data.h4
-rw-r--r--arch/x86/include/asm/sev-common.h92
-rw-r--r--arch/x86/include/asm/sev.h291
-rw-r--r--arch/x86/include/asm/shared/tdx.h51
-rw-r--r--arch/x86/include/asm/shstk.h8
-rw-r--r--arch/x86/include/asm/signal.h8
-rw-r--r--arch/x86/include/asm/smap.h33
-rw-r--r--arch/x86/include/asm/smp.h25
-rw-r--r--arch/x86/include/asm/sparsemem.h11
-rw-r--r--arch/x86/include/asm/special_insns.h36
-rw-r--r--arch/x86/include/asm/sta2x11.h13
-rw-r--r--arch/x86/include/asm/stackprotector.h36
-rw-r--r--arch/x86/include/asm/static_call.h15
-rw-r--r--arch/x86/include/asm/string_64.h47
-rw-r--r--arch/x86/include/asm/svm.h34
-rw-r--r--arch/x86/include/asm/switch_to.h6
-rw-r--r--arch/x86/include/asm/sync_bitops.h12
-rw-r--r--arch/x86/include/asm/sync_core.h6
-rw-r--r--arch/x86/include/asm/syscall.h7
-rw-r--r--arch/x86/include/asm/tdx.h11
-rw-r--r--arch/x86/include/asm/text-patching.h3
-rw-r--r--arch/x86/include/asm/thread_info.h18
-rw-r--r--arch/x86/include/asm/timer.h2
-rw-r--r--arch/x86/include/asm/tlb.h147
-rw-r--r--arch/x86/include/asm/tlbbatch.h5
-rw-r--r--arch/x86/include/asm/tlbflush.h85
-rw-r--r--arch/x86/include/asm/topology.h34
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/tsc.h3
-rw-r--r--arch/x86/include/asm/uaccess.h6
-rw-r--r--arch/x86/include/asm/uaccess_64.h48
-rw-r--r--arch/x86/include/asm/unistd.h1
-rw-r--r--arch/x86/include/asm/unwind_hints.h4
-rw-r--r--arch/x86/include/asm/uv/uv_irq.h1
-rw-r--r--arch/x86/include/asm/vdso.h6
-rw-r--r--arch/x86/include/asm/vdso/getrandom.h32
-rw-r--r--arch/x86/include/asm/vdso/gettimeofday.h68
-rw-r--r--arch/x86/include/asm/vdso/processor.h4
-rw-r--r--arch/x86/include/asm/vdso/vsyscall.h25
-rw-r--r--arch/x86/include/asm/vermagic.h4
-rw-r--r--arch/x86/include/asm/vgtod.h5
-rw-r--r--arch/x86/include/asm/video.h21
-rw-r--r--arch/x86/include/asm/vm86.h2
-rw-r--r--arch/x86/include/asm/vmware.h336
-rw-r--r--arch/x86/include/asm/vmx.h81
-rw-r--r--arch/x86/include/asm/vmxfeatures.h110
-rw-r--r--arch/x86/include/asm/vvar.h55
-rw-r--r--arch/x86/include/asm/word-at-a-time.h57
-rw-r--r--arch/x86/include/asm/x86_init.h14
-rw-r--r--arch/x86/include/asm/xen/hypercall.h42
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h20
-rw-r--r--arch/x86/include/asm/xen/interface.h10
-rw-r--r--arch/x86/include/asm/xen/interface_32.h4
-rw-r--r--arch/x86/include/asm/xen/interface_64.h4
-rw-r--r--arch/x86/include/uapi/asm/amd_hsmp.h67
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h4
-rw-r--r--arch/x86/include/uapi/asm/e820.h4
-rw-r--r--arch/x86/include/uapi/asm/elf.h16
-rw-r--r--arch/x86/include/uapi/asm/kvm.h77
-rw-r--r--arch/x86/include/uapi/asm/ldt.h4
-rw-r--r--arch/x86/include/uapi/asm/mce.h3
-rw-r--r--arch/x86/include/uapi/asm/mman.h3
-rw-r--r--arch/x86/include/uapi/asm/msr.h4
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h6
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h4
-rw-r--r--arch/x86/include/uapi/asm/setup_data.h4
-rw-r--r--arch/x86/include/uapi/asm/signal.h8
-rw-r--r--arch/x86/include/uapi/asm/svm.h3
219 files changed, 5176 insertions, 4682 deletions
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a192bdea69e2..4566000e15c4 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -8,6 +8,9 @@ generated-y += syscalls_x32.h
generated-y += unistd_32_ia32.h
generated-y += unistd_64_x32.h
generated-y += xen-hypercalls.h
+generated-y += cpufeaturemasks.h
generic-y += early_ioremap.h
+generic-y += fprobe.h
generic-y += mcs_spinlock.h
+generic-y += mmzone.h
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index f896eed4516c..5ab1a4598d00 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -56,6 +56,8 @@ static inline void disable_acpi(void)
extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+extern int acpi_blacklisted(void);
+
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
static inline void acpi_disable_pci(void)
{
@@ -76,6 +78,13 @@ static inline bool acpi_skip_set_wakeup_address(void)
#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
+union acpi_subtable_headers;
+
+int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
+ const unsigned long end);
+
+void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
+
/*
* Check if the CPU can handle C2 and deeper
*/
@@ -165,6 +174,14 @@ void acpi_generic_reduced_hw_init(void);
void x86_default_set_root_pointer(u64 addr);
u64 x86_default_get_root_pointer(void);
+#ifdef CONFIG_XEN_PV
+/* A Xen PV domain needs a special acpi_os_ioremap() handling. */
+extern void __iomem * (*acpi_os_ioremap)(acpi_physical_address phys,
+ acpi_size size);
+void __iomem *x86_acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+#define acpi_os_ioremap acpi_os_ioremap
+#endif
+
#else /* !CONFIG_ACPI */
#define acpi_lapic 0
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 67b68d0d17d1..4a37a8bd87fd 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/stringify.h>
+#include <linux/objtool.h>
#include <asm/asm.h>
#define ALT_FLAGS_SHIFT 16
@@ -14,7 +15,7 @@
#define ALT_DIRECT_CALL(feature) ((ALT_FLAG_DIRECT_CALL << ALT_FLAGS_SHIFT) | (feature))
#define ALT_CALL_ALWAYS ALT_DIRECT_CALL(X86_FEATURE_ALWAYS)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stddef.h>
@@ -47,7 +48,7 @@
".popsection\n" \
"671:"
-#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock; "
+#define LOCK_PREFIX LOCK_PREFIX_HERE "\n\tlock "
#else /* ! CONFIG_SMP */
#define LOCK_PREFIX_HERE ""
@@ -55,16 +56,6 @@
#endif
/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-#define ANNOTATE_IGNORE_ALTERNATIVE \
- "999:\n\t" \
- ".pushsection .discard.ignore_alts\n\t" \
- ".long 999b\n\t" \
- ".popsection\n\t"
-
-/*
* The patching flags are part of the upper bits of the @ft_flags parameter when
* specifying them. The split is currently like this:
*
@@ -109,7 +100,6 @@ struct module;
struct callthunk_sites {
s32 *call_start, *call_end;
- struct alt_instr *alt_start, *alt_end;
};
#ifdef CONFIG_CALL_THUNKS
@@ -156,102 +146,50 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define ALT_CALL_INSTR "call BUG_func"
-#define b_replacement(num) "664"#num
-#define e_replacement(num) "665"#num
-
-#define alt_end_marker "663"
-#define alt_slen "662b-661b"
-#define alt_total_slen alt_end_marker"b-661b"
-#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
+#define alt_slen "772b-771b"
+#define alt_total_slen "773b-771b"
+#define alt_rlen "775f-774f"
-#define OLDINSTR(oldinstr, num) \
- "# ALT: oldnstr\n" \
- "661:\n\t" oldinstr "\n662:\n" \
+#define OLDINSTR(oldinstr) \
+ "# ALT: oldinstr\n" \
+ "771:\n\t" oldinstr "\n772:\n" \
"# ALT: padding\n" \
- ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
- "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \
- alt_end_marker ":\n"
-
-/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
-
-/*
- * Pad the second replacement alternative with additional NOPs if it is
- * additionally longer than the first replacement alternative.
- */
-#define OLDINSTR_2(oldinstr, num1, num2) \
- "# ALT: oldinstr2\n" \
- "661:\n\t" oldinstr "\n662:\n" \
- "# ALT: padding2\n" \
- ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define OLDINSTR_3(oldinsn, n1, n2, n3) \
- "# ALT: oldinstr3\n" \
- "661:\n\t" oldinsn "\n662:\n" \
- "# ALT: padding3\n" \
- ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")) > 0) * " \
- "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \
- " - (" alt_slen ")), 0x90\n" \
- alt_end_marker ":\n"
-
-#define ALTINSTR_ENTRY(ft_flags, num) \
- " .long 661b - .\n" /* label */ \
- " .long " b_replacement(num)"f - .\n" /* new instruction */ \
+ ".skip -(((" alt_rlen ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen ")-(" alt_slen ")),0x90\n" \
+ "773:\n"
+
+#define ALTINSTR_ENTRY(ft_flags) \
+ ".pushsection .altinstructions,\"a\"\n" \
+ " .long 771b - .\n" /* label */ \
+ " .long 774f - .\n" /* new instruction */ \
" .4byte " __stringify(ft_flags) "\n" /* feature + flags */ \
" .byte " alt_total_slen "\n" /* source len */ \
- " .byte " alt_rlen(num) "\n" /* replacement len */
+ " .byte " alt_rlen "\n" /* replacement len */ \
+ ".popsection\n"
-#define ALTINSTR_REPLACEMENT(newinstr, num) /* replacement */ \
- "# ALT: replacement " #num "\n" \
- b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n"
+#define ALTINSTR_REPLACEMENT(newinstr) /* replacement */ \
+ ".pushsection .altinstr_replacement, \"ax\"\n" \
+ "# ALT: replacement\n" \
+ "774:\n\t" newinstr "\n775:\n" \
+ ".popsection\n"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- OLDINSTR(oldinstr, 1) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags, 1) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr, 1) \
- ".popsection\n"
+ OLDINSTR(oldinstr) \
+ ALTINSTR_ENTRY(ft_flags) \
+ ALTINSTR_REPLACEMENT(newinstr)
#define ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
- OLDINSTR_2(oldinstr, 1, 2) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinstr1, 1) \
- ALTINSTR_REPLACEMENT(newinstr2, 2) \
- ".popsection\n"
+ ALTERNATIVE(ALTERNATIVE(oldinstr, newinstr1, ft_flags1), newinstr2, ft_flags2)
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
#define ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
- ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
- newinstr_yes, ft_flags)
-
-#define ALTERNATIVE_3(oldinsn, newinsn1, ft_flags1, newinsn2, ft_flags2, \
- newinsn3, ft_flags3) \
- OLDINSTR_3(oldinsn, 1, 2, 3) \
- ".pushsection .altinstructions,\"a\"\n" \
- ALTINSTR_ENTRY(ft_flags1, 1) \
- ALTINSTR_ENTRY(ft_flags2, 2) \
- ALTINSTR_ENTRY(ft_flags3, 3) \
- ".popsection\n" \
- ".pushsection .altinstr_replacement, \"ax\"\n" \
- ALTINSTR_REPLACEMENT(newinsn1, 1) \
- ALTINSTR_REPLACEMENT(newinsn2, 2) \
- ALTINSTR_REPLACEMENT(newinsn3, 3) \
- ".popsection\n"
+ ALTERNATIVE_2(oldinstr, newinstr_no, X86_FEATURE_ALWAYS, newinstr_yes, ft_flags)
+
+#define ALTERNATIVE_3(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, \
+ newinstr3, ft_flags3) \
+ ALTERNATIVE(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2), \
+ newinstr3, ft_flags3)
/*
* Alternative instructions for different CPU types or capabilities.
@@ -266,14 +204,11 @@ static inline int alternatives_text_reserved(void *start, void *end)
* without volatile and memory clobber.
*/
#define alternative(oldinstr, newinstr, ft_flags) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) : : : "memory")
#define alternative_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2) ::: "memory")
-#define alternative_ternary(oldinstr, ft_flags, newinstr_yes, newinstr_no) \
- asm_inline volatile(ALTERNATIVE_TERNARY(oldinstr, ft_flags, newinstr_yes, newinstr_no) ::: "memory")
-
/*
* Alternative inline assembly with input.
*
@@ -283,32 +218,30 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Leaving an unused argument 0 to keep API compatibility.
*/
#define alternative_input(oldinstr, newinstr, ft_flags, input...) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \
- : : "i" (0), ## input)
-
-/*
- * This is similar to alternative_input. But it has two features and
- * respective instructions.
- *
- * If CPU has feature2, newinstr2 is used.
- * Otherwise, if CPU has feature1, newinstr1 is used.
- * Otherwise, oldinstr is used.
- */
-#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \
- ft_flags2, input...) \
- asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \
- newinstr2, ft_flags2) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: : "i" (0), ## input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, newinstr, ft_flags, output, input...) \
- asm_inline volatile (ALTERNATIVE(oldinstr, newinstr, ft_flags) \
+ asm_inline volatile(ALTERNATIVE(oldinstr, newinstr, ft_flags) \
: output : "i" (0), ## input)
-/* Like alternative_io, but for replacing a direct call with another one. */
-#define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \
- asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \
- : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input)
+/*
+ * Like alternative_io, but for replacing a direct call with another one.
+ *
+ * Use the %c operand modifier which is the generic way to print a bare
+ * constant expression with all syntax-specific punctuation omitted. %P
+ * is the x86-specific variant which can handle constants too, for
+ * historical reasons, but it should be used primarily for PIC
+ * references: i.e., if used for a function, it would add the PLT
+ * suffix.
+ */
+#define alternative_call(oldfunc, newfunc, ft_flags, output, input, clobbers...) \
+ asm_inline volatile(ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new] "i" (newfunc) \
+ COMMA(input) \
+ : clobbers)
/*
* Like alternative_call, but there are two features and respective functions.
@@ -316,25 +249,17 @@ static inline int alternatives_text_reserved(void *start, void *end)
* Otherwise, if CPU has feature1, function1 is used.
* Otherwise, old function is used.
*/
-#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
- output, input...) \
- asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\
- "call %P[new2]", ft_flags2) \
- : output, ASM_CALL_CONSTRAINT \
- : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
- [new2] "i" (newfunc2), ## input)
-
-/*
- * use this macro(s) if you need more than one output parameter
- * in alternative_io
- */
-#define ASM_OUTPUT2(a...) a
-
-/*
- * use this macro if you need clobbers but no inputs in
- * alternative_{input,io,call}()
- */
-#define ASM_NO_INPUT_CLOBBER(clbr...) "i" (0) : clbr
+#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \
+ output, input, clobbers...) \
+ asm_inline volatile(ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \
+ "call %c[new2]", ft_flags2) \
+ : ALT_OUTPUT_SP(output) \
+ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
+ [new2] "i" (newfunc2) \
+ COMMA(input) \
+ : clobbers)
+
+#define ALT_OUTPUT_SP(...) ASM_CALL_CONSTRAINT, ## __VA_ARGS__
/* Macro for creating assembler functions avoiding any C magic. */
#define DEFINE_ASM_FUNC(func, instr, sec) \
@@ -352,7 +277,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
void BUG_func(void);
void nop_func(void);
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#ifdef CONFIG_SMP
.macro LOCK_PREFIX
@@ -368,17 +293,6 @@ void nop_func(void);
#endif
/*
- * objtool annotation to ignore the alternatives and only consider the original
- * instruction(s).
- */
-.macro ANNOTATE_IGNORE_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.ignore_alts
- .long .Lannotate_\@
- .popsection
-.endm
-
-/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
@@ -402,22 +316,23 @@ void nop_func(void);
* @newinstr. ".skip" directive takes care of proper instruction padding
* in case @newinstr is longer than @oldinstr.
*/
-.macro ALTERNATIVE oldinstr, newinstr, ft_flags
-140:
- \oldinstr
-141:
- .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags,142b-140b,144f-143f
- .popsection
+#define __ALTERNATIVE(oldinst, newinst, flag) \
+740: \
+ oldinst ; \
+741: \
+ .skip -(((744f-743f)-(741b-740b)) > 0) * ((744f-743f)-(741b-740b)),0x90 ;\
+742: \
+ .pushsection .altinstructions,"a" ; \
+ altinstr_entry 740b,743f,flag,742b-740b,744f-743f ; \
+ .popsection ; \
+ .pushsection .altinstr_replacement,"ax" ; \
+743: \
+ newinst ; \
+744: \
+ .popsection ;
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr
-144:
- .popsection
+.macro ALTERNATIVE oldinstr, newinstr, ft_flags
+ __ALTERNATIVE(\oldinstr, \newinstr, \ft_flags)
.endm
#define old_len 141b-140b
@@ -426,65 +341,18 @@ void nop_func(void);
#define new_len3 146f-145f
/*
- * gas compatible max based on the idea from:
- * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
- *
- * The additional "-" is needed because gas uses a "true" value of -1.
- */
-#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
-#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c))
-
-
-/*
* Same as ALTERNATIVE macro above but for two alternatives. If CPU
* has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
* @feature2, it replaces @oldinstr with @feature2.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2
-140:
- \oldinstr
-141:
- .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
- (alt_max_2(new_len1, new_len2) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- .popsection
+ __ALTERNATIVE(__ALTERNATIVE(\oldinstr, \newinstr1, \ft_flags1),
+ \newinstr2, \ft_flags2)
.endm
.macro ALTERNATIVE_3 oldinstr, newinstr1, ft_flags1, newinstr2, ft_flags2, newinstr3, ft_flags3
-140:
- \oldinstr
-141:
- .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
- (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
-142:
-
- .pushsection .altinstructions,"a"
- altinstr_entry 140b,143f,\ft_flags1,142b-140b,144f-143f
- altinstr_entry 140b,144f,\ft_flags2,142b-140b,145f-144f
- altinstr_entry 140b,145f,\ft_flags3,142b-140b,146f-145f
- .popsection
-
- .pushsection .altinstr_replacement,"ax"
-143:
- \newinstr1
-144:
- \newinstr2
-145:
- \newinstr3
-146:
- .popsection
+ __ALTERNATIVE(ALTERNATIVE_2(\oldinstr, \newinstr1, \ft_flags1, \newinstr2, \ft_flags2),
+ \newinstr3, \ft_flags3)
.endm
/* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
@@ -492,6 +360,6 @@ void nop_func(void);
ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS, \
newinstr_yes, ft_flags
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_ALTERNATIVE_H */
diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd-ibs.h
index cb2a5e113daa..77f3a589a99a 100644
--- a/arch/x86/include/asm/amd-ibs.h
+++ b/arch/x86/include/asm/amd-ibs.h
@@ -64,7 +64,8 @@ union ibs_op_ctl {
opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */
reserved0:5, /* 27-31: reserved */
opcurcnt:27, /* 32-58: periodic op counter current count */
- reserved1:5; /* 59-63: reserved */
+ ldlat_thrsh:4, /* 59-62: Load Latency threshold */
+ ldlat_en:1; /* 63: Load Latency enabled */
};
};
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h
index 5c37944c8a5e..adfa0854cf2d 100644
--- a/arch/x86/include/asm/amd_nb.h
+++ b/arch/x86/include/asm/amd_nb.h
@@ -4,7 +4,7 @@
#include <linux/ioport.h>
#include <linux/pci.h>
-#include <linux/refcount.h>
+#include <asm/amd_node.h>
struct amd_nb_bus_dev_range {
u8 bus;
@@ -21,49 +21,15 @@ extern int amd_numa_init(void);
extern int amd_get_subcaches(int);
extern int amd_set_subcaches(int, unsigned long);
-extern int amd_smn_read(u16 node, u32 address, u32 *value);
-extern int amd_smn_write(u16 node, u32 address, u32 value);
-
struct amd_l3_cache {
unsigned indices;
u8 subcaches[4];
};
-struct threshold_block {
- unsigned int block; /* Number within bank */
- unsigned int bank; /* MCA bank the block belongs to */
- unsigned int cpu; /* CPU which controls MCA bank */
- u32 address; /* MSR address for the block */
- u16 interrupt_enable; /* Enable/Disable APIC interrupt */
- bool interrupt_capable; /* Bank can generate an interrupt. */
-
- u16 threshold_limit; /*
- * Value upon which threshold
- * interrupt is generated.
- */
-
- struct kobject kobj; /* sysfs object */
- struct list_head miscj; /*
- * List of threshold blocks
- * within a bank.
- */
-};
-
-struct threshold_bank {
- struct kobject *kobj;
- struct threshold_block *blocks;
-
- /* initialized to the number of CPUs on the node sharing this bank */
- refcount_t cpus;
- unsigned int shared;
-};
-
struct amd_northbridge {
- struct pci_dev *root;
struct pci_dev *misc;
struct pci_dev *link;
struct amd_l3_cache l3_cache;
- struct threshold_bank *bank4;
};
struct amd_northbridge_info {
@@ -82,23 +48,6 @@ u16 amd_nb_num(void);
bool amd_nb_has_feature(unsigned int feature);
struct amd_northbridge *node_to_amd_nb(int node);
-static inline u16 amd_pci_dev_to_node_id(struct pci_dev *pdev)
-{
- struct pci_dev *misc;
- int i;
-
- for (i = 0; i != amd_nb_num(); i++) {
- misc = node_to_amd_nb(i)->misc;
-
- if (pci_domain_nr(misc->bus) == pci_domain_nr(pdev->bus) &&
- PCI_SLOT(misc->devfn) == PCI_SLOT(pdev->devfn))
- return i;
- }
-
- WARN(1, "Unable to find AMD Northbridge id for %s\n", pci_name(pdev));
- return 0;
-}
-
static inline bool amd_gart_present(void)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
@@ -116,7 +65,10 @@ static inline bool amd_gart_present(void)
#define amd_nb_num(x) 0
#define amd_nb_has_feature(x) false
-#define node_to_amd_nb(x) NULL
+static inline struct amd_northbridge *node_to_amd_nb(int node)
+{
+ return NULL;
+}
#define amd_gart_present(x) false
#endif
diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd_node.h
new file mode 100644
index 000000000000..23fe617898a8
--- /dev/null
+++ b/arch/x86/include/asm/amd_node.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Node helper functions and common defines
+ *
+ * Copyright (c) 2024, Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Author: Yazen Ghannam <Yazen.Ghannam@amd.com>
+ *
+ * Note:
+ * Items in this file may only be used in a single place.
+ * However, it's prudent to keep all AMD Node functionality
+ * in a unified place rather than spreading throughout the
+ * kernel.
+ */
+
+#ifndef _ASM_X86_AMD_NODE_H_
+#define _ASM_X86_AMD_NODE_H_
+
+#include <linux/pci.h>
+
+#define MAX_AMD_NUM_NODES 8
+#define AMD_NODE0_PCI_SLOT 0x18
+
+struct pci_dev *amd_node_get_func(u16 node, u8 func);
+struct pci_dev *amd_node_get_root(u16 node);
+
+static inline u16 amd_num_nodes(void)
+{
+ return topology_amd_nodes_per_pkg() * topology_max_packages();
+}
+
+#ifdef CONFIG_AMD_NODE
+int __must_check amd_smn_read(u16 node, u32 address, u32 *value);
+int __must_check amd_smn_write(u16 node, u32 address, u32 value);
+
+/* Should only be used by the HSMP driver. */
+int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write);
+#else
+static inline int __must_check amd_smn_read(u16 node, u32 address, u32 *value) { return -ENODEV; }
+static inline int __must_check amd_smn_write(u16 node, u32 address, u32 value) { return -ENODEV; }
+
+static inline int __must_check amd_smn_hsmp_rdwr(u16 node, u32 address, u32 *value, bool write)
+{
+ return -ENODEV;
+}
+#endif /* CONFIG_AMD_NODE */
+
+/* helper for use with read_poll_timeout */
+static inline int smn_read_register(u32 reg)
+{
+ int data, rc;
+
+ rc = amd_smn_read(0, reg, &data);
+ if (rc)
+ return rc;
+
+ return data;
+}
+#endif /*_ASM_X86_AMD_NODE_H_*/
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index e6ab0cf15ed5..c903d358405d 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -14,9 +14,15 @@
#include <asm/msr.h>
#include <asm/hardirq.h>
#include <asm/io.h>
+#include <asm/posted_intr.h>
#define ARCH_APICTIMER_STOPS_ON_C3 1
+/* Macros for apic_extnmi which controls external NMI masking */
+#define APIC_EXTNMI_BSP 0 /* Default */
+#define APIC_EXTNMI_ALL 1
+#define APIC_EXTNMI_NONE 2
+
/*
* Debugging macros
*/
@@ -24,22 +30,22 @@
#define APIC_VERBOSE 1
#define APIC_DEBUG 2
-/* Macros for apic_extnmi which controls external NMI masking */
-#define APIC_EXTNMI_BSP 0 /* Default */
-#define APIC_EXTNMI_ALL 1
-#define APIC_EXTNMI_NONE 2
-
/*
- * Define the default level of output to be very little
- * This can be turned up by using apic=verbose for more
- * information and apic=debug for _lots_ of information.
- * apic_verbosity is defined in apic.c
+ * Define the default level of output to be very little This can be turned
+ * up by using apic=verbose for more information and apic=debug for _lots_
+ * of information. apic_verbosity is defined in apic.c
*/
-#define apic_printk(v, s, a...) do { \
- if ((v) <= apic_verbosity) \
- printk(s, ##a); \
- } while (0)
-
+#define apic_printk(v, s, a...) \
+do { \
+ if ((v) <= apic_verbosity) \
+ printk(s, ##a); \
+} while (0)
+
+#define apic_pr_verbose(s, a...) apic_printk(APIC_VERBOSE, KERN_INFO s, ##a)
+#define apic_pr_debug(s, a...) apic_printk(APIC_DEBUG, KERN_DEBUG s, ##a)
+#define apic_pr_debug_cont(s, a...) apic_printk(APIC_DEBUG, KERN_CONT s, ##a)
+/* Unconditional debug prints for code which is guarded by apic_verbosity already */
+#define apic_dbg(s, a...) printk(KERN_DEBUG s, ##a)
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
extern void x86_32_probe_apic(void);
@@ -92,9 +98,9 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
- alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
- ASM_OUTPUT2("=r" (v), "=m" (*addr)),
- ASM_OUTPUT2("0" (v), "m" (*addr)));
+ alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+ ASM_OUTPUT("=r" (v), "=m" (*addr)),
+ ASM_INPUT("0" (v), "m" (*addr)));
}
static inline u32 native_apic_mem_read(u32 reg)
@@ -121,8 +127,6 @@ static inline bool apic_is_x2apic_enabled(void)
extern void enable_IR_x2apic(void);
-extern int get_physical_broadcast(void);
-
extern int lapic_get_maxlvt(void);
extern void clear_local_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
@@ -344,20 +348,12 @@ extern struct apic *apic;
* APIC drivers are probed based on how they are listed in the .apicdrivers
* section. So the order is important and enforced by the ordering
* of different apic driver files in the Makefile.
- *
- * For the files having two apic drivers, we use apic_drivers()
- * to enforce the order with in them.
*/
#define apic_driver(sym) \
static const struct apic *__apicdrivers_##sym __used \
__aligned(sizeof(struct apic *)) \
__section(".apicdrivers") = { &sym }
-#define apic_drivers(sym1, sym2) \
- static struct apic *__apicdrivers_##sym1##sym2[2] __used \
- __aligned(sizeof(struct apic *)) \
- __section(".apicdrivers") = { &sym1, &sym2 }
-
extern struct apic *__apicdrivers[], *__apicdrivers_end[];
/*
@@ -483,7 +479,6 @@ static inline u64 apic_icr_read(void) { return 0; }
static inline void apic_icr_write(u32 low, u32 high) { }
static inline void apic_wait_icr_idle(void) { }
static inline u32 safe_apic_wait_icr_idle(void) { return 0; }
-static inline void apic_set_eoi_cb(void (*eoi)(void)) {}
static inline void apic_native_eoi(void) { WARN_ON_ONCE(1); }
static inline void apic_setup_apic_calls(void) { }
@@ -500,14 +495,17 @@ static inline bool lapic_vector_set_in_irr(unsigned int vector)
return !!(irr & (1U << (vector % 32)));
}
+static inline bool is_vector_pending(unsigned int vector)
+{
+ return lapic_vector_set_in_irr(vector) || pi_pending_this_cpu(vector);
+}
+
/*
* Warm reset vector position:
*/
#define TRAMPOLINE_PHYS_LOW 0x467
#define TRAMPOLINE_PHYS_HIGH 0x469
-extern void generic_bigsmp_probe(void);
-
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
@@ -530,8 +528,6 @@ static inline int default_acpi_madt_oem_check(char *a, char *b) { return 0; }
static inline void x86_64_probe_apic(void) { }
#endif
-extern int default_apic_id_valid(u32 apicid);
-
extern u32 apic_default_calc_apicid(unsigned int cpu);
extern u32 apic_flat_calc_apicid(unsigned int cpu);
diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h
index ba88edd0d58b..cbc6157f0b4b 100644
--- a/arch/x86/include/asm/arch_hweight.h
+++ b/arch/x86/include/asm/arch_hweight.h
@@ -16,9 +16,11 @@ static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res;
- asm (ALTERNATIVE("call __sw_hweight32", "popcntl %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
+ "call __sw_hweight32",
+ "popcntl %[val], %[cnt]", X86_FEATURE_POPCNT)
+ : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+ : [val] REG_IN (w));
return res;
}
@@ -44,9 +46,11 @@ static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res;
- asm (ALTERNATIVE("call __sw_hweight64", "popcntq %1, %0", X86_FEATURE_POPCNT)
- : "="REG_OUT (res)
- : REG_IN (w));
+ asm_inline (ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
+ "call __sw_hweight64",
+ "popcntq %[val], %[cnt]", X86_FEATURE_POPCNT)
+ : [cnt] "=" REG_OUT (res), ASM_CALL_CONSTRAINT
+ : [val] REG_IN (w));
return res;
}
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index 25466c4d2134..11c6fecc3ad7 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -16,7 +16,10 @@
#include <asm/gsseg.h>
#include <asm/nospec-branch.h>
-#ifndef CONFIG_X86_CMPXCHG64
+#ifndef CONFIG_X86_CX8
extern void cmpxchg8b_emu(void);
#endif
+#ifdef CONFIG_STACKPROTECTOR
+extern unsigned long __ref_stack_chk_guard;
+#endif
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index ca8eed1d496a..cc2881576c2c 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_ASM_H
#define _ASM_X86_ASM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define __ASM_FORM(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_RAW(x, ...) x,## __VA_ARGS__
# define __ASM_FORM_COMMA(x, ...) x,## __VA_ARGS__,
@@ -113,7 +113,7 @@
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef __pic__
static __always_inline __pure void *rip_rel_ptr(void *p)
{
@@ -144,7 +144,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
# include <asm/extable_fixup_types.h>
/* Exception table entry */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
# define _ASM_EXTABLE_TYPE(from, to, type) \
.pushsection "__ex_table","a" ; \
@@ -164,7 +164,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
# define _ASM_NOKPROBE(entry)
# endif
-#else /* ! __ASSEMBLY__ */
+#else /* ! __ASSEMBLER__ */
# define DEFINE_EXTABLE_TYPE_REG \
".macro extable_type_reg type:req reg:req\n" \
@@ -213,6 +213,17 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
/* For C file, we already have NOKPROBE_SYMBOL macro */
+/* Insert a comma if args are non-empty */
+#define COMMA(x...) __COMMA(x)
+#define __COMMA(...) , ##__VA_ARGS__
+
+/*
+ * Combine multiple asm inline constraint args into a single arg for passing to
+ * another macro.
+ */
+#define ASM_OUTPUT(x...) x
+#define ASM_INPUT(x...) x
+
/*
* This output constraint should be used for any inline asm which has a "call"
* instruction. Otherwise the asm may be inserted before the frame pointer
@@ -221,7 +232,7 @@ static __always_inline __pure void *rip_rel_ptr(void *p)
*/
register unsigned long current_stack_pointer asm(_ASM_SP);
#define ASM_CALL_CONSTRAINT "+r" (current_stack_pointer)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define _ASM_EXTABLE(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_DEFAULT)
@@ -229,9 +240,6 @@ register unsigned long current_stack_pointer asm(_ASM_SP);
#define _ASM_EXTABLE_UA(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_UACCESS)
-#define _ASM_EXTABLE_CPY(from, to) \
- _ASM_EXTABLE_TYPE(from, to, EX_TYPE_COPY)
-
#define _ASM_EXTABLE_FAULT(from, to) \
_ASM_EXTABLE_TYPE(from, to, EX_TYPE_FAULT)
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 55a55ec04350..75743f1dfd4e 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -30,14 +30,14 @@ static __always_inline void arch_atomic_set(atomic_t *v, int i)
static __always_inline void arch_atomic_add(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "addl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "addl %1, %0"
: "+m" (v->counter)
: "ir" (i) : "memory");
}
static __always_inline void arch_atomic_sub(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "subl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "subl %1, %0"
: "+m" (v->counter)
: "ir" (i) : "memory");
}
@@ -50,14 +50,14 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
static __always_inline void arch_atomic_inc(atomic_t *v)
{
- asm volatile(LOCK_PREFIX "incl %0"
+ asm_inline volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter) :: "memory");
}
#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_dec(atomic_t *v)
{
- asm volatile(LOCK_PREFIX "decl %0"
+ asm_inline volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter) :: "memory");
}
#define arch_atomic_dec arch_atomic_dec
@@ -86,11 +86,7 @@ static __always_inline int arch_atomic_add_return(int i, atomic_t *v)
}
#define arch_atomic_add_return arch_atomic_add_return
-static __always_inline int arch_atomic_sub_return(int i, atomic_t *v)
-{
- return arch_atomic_add_return(-i, v);
-}
-#define arch_atomic_sub_return arch_atomic_sub_return
+#define arch_atomic_sub_return(i, v) arch_atomic_add_return(-(i), v)
static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
{
@@ -98,11 +94,7 @@ static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v)
}
#define arch_atomic_fetch_add arch_atomic_fetch_add
-static __always_inline int arch_atomic_fetch_sub(int i, atomic_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic_fetch_sub arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub(i, v) arch_atomic_fetch_add(-(i), v)
static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new)
{
@@ -124,7 +116,7 @@ static __always_inline int arch_atomic_xchg(atomic_t *v, int new)
static __always_inline void arch_atomic_and(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "andl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "andl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
@@ -142,7 +134,7 @@ static __always_inline int arch_atomic_fetch_and(int i, atomic_t *v)
static __always_inline void arch_atomic_or(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "orl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "orl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
@@ -160,7 +152,7 @@ static __always_inline int arch_atomic_fetch_or(int i, atomic_t *v)
static __always_inline void arch_atomic_xor(int i, atomic_t *v)
{
- asm volatile(LOCK_PREFIX "xorl %1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorl %1, %0"
: "+m" (v->counter)
: "ir" (i)
: "memory");
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index 3486d91b8595..ab838205c1c6 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -14,6 +14,32 @@ typedef struct {
#define ATOMIC64_INIT(val) { (val) }
+/*
+ * Read an atomic64_t non-atomically.
+ *
+ * This is intended to be used in cases where a subsequent atomic operation
+ * will handle the torn value, and can be used to prime the first iteration
+ * of unconditional try_cmpxchg() loops, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do { } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ *
+ * This is NOT safe to use where the value is not always checked by a
+ * subsequent atomic operation, such as in conditional try_cmpxchg() loops
+ * that can break before the atomic operation, e.g.:
+ *
+ * s64 val = arch_atomic64_read_nonatomic(v);
+ * do {
+ * if (condition(val))
+ * break;
+ * } while (!arch_atomic64_try_cmpxchg(v, &val, val OP i);
+ */
+static __always_inline s64 arch_atomic64_read_nonatomic(const atomic64_t *v)
+{
+ /* See comment in arch_atomic_read(). */
+ return __READ_ONCE(v->counter);
+}
+
#define __ATOMIC64_DECL(sym) void atomic64_##sym(atomic64_t *, ...)
#ifndef ATOMIC64_EXPORT
#define ATOMIC64_DECL_ONE __ATOMIC64_DECL
@@ -22,16 +48,20 @@ typedef struct {
ATOMIC64_EXPORT(atomic64_##sym)
#endif
-#ifdef CONFIG_X86_CMPXCHG64
-#define __alternative_atomic64(f, g, out, in...) \
- asm volatile("call %P[func]" \
- : out : [func] "i" (atomic64_##g##_cx8), ## in)
+#ifdef CONFIG_X86_CX8
+#define __alternative_atomic64(f, g, out, in, clobbers...) \
+ asm volatile("call %c[func]" \
+ : ALT_OUTPUT_SP(out) \
+ : [func] "i" (atomic64_##g##_cx8) \
+ COMMA(in) \
+ : clobbers)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8)
#else
-#define __alternative_atomic64(f, g, out, in...) \
- alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
- X86_FEATURE_CX8, ASM_OUTPUT2(out), ## in)
+#define __alternative_atomic64(f, g, out, in, clobbers...) \
+ alternative_call(atomic64_##f##_386, atomic64_##g##_cx8, \
+ X86_FEATURE_CX8, ASM_OUTPUT(out), \
+ ASM_INPUT(in), clobbers)
#define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8); \
ATOMIC64_DECL_ONE(sym##_386)
@@ -42,8 +72,8 @@ ATOMIC64_DECL_ONE(inc_386);
ATOMIC64_DECL_ONE(dec_386);
#endif
-#define alternative_atomic64(f, out, in...) \
- __alternative_atomic64(f, f, ASM_OUTPUT2(out), ## in)
+#define alternative_atomic64(f, out, in, clobbers...) \
+ __alternative_atomic64(f, f, ASM_OUTPUT(out), ASM_INPUT(in), clobbers)
ATOMIC64_DECL(read);
ATOMIC64_DECL(set);
@@ -61,20 +91,27 @@ ATOMIC64_DECL(add_unless);
#undef __ATOMIC64_DECL
#undef ATOMIC64_EXPORT
-static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 o, s64 n)
+static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
- return arch_cmpxchg64(&v->counter, o, n);
+ return arch_cmpxchg64(&v->counter, old, new);
}
#define arch_atomic64_cmpxchg arch_atomic64_cmpxchg
+static __always_inline bool arch_atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new)
+{
+ return arch_try_cmpxchg64(&v->counter, old, new);
+}
+#define arch_atomic64_try_cmpxchg arch_atomic64_try_cmpxchg
+
static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 n)
{
s64 o;
unsigned high = (unsigned)(n >> 32);
unsigned low = (unsigned)n;
- alternative_atomic64(xchg, "=&A" (o),
- "S" (v), "b" (low), "c" (high)
- : "memory");
+ alternative_atomic64(xchg,
+ "=&A" (o),
+ ASM_INPUT("S" (v), "b" (low), "c" (high)),
+ "memory");
return o;
}
#define arch_atomic64_xchg arch_atomic64_xchg
@@ -83,23 +120,25 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
{
unsigned high = (unsigned)(i >> 32);
unsigned low = (unsigned)i;
- alternative_atomic64(set, /* no output */,
- "S" (v), "b" (low), "c" (high)
- : "eax", "edx", "memory");
+ alternative_atomic64(set,
+ /* no output */,
+ ASM_INPUT("S" (v), "b" (low), "c" (high)),
+ "eax", "edx", "memory");
}
static __always_inline s64 arch_atomic64_read(const atomic64_t *v)
{
s64 r;
- alternative_atomic64(read, "=&A" (r), "c" (v) : "memory");
+ alternative_atomic64(read, "=&A" (r), "c" (v), "memory");
return r;
}
static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
{
alternative_atomic64(add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
return i;
}
#define arch_atomic64_add_return arch_atomic64_add_return
@@ -107,8 +146,9 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
{
alternative_atomic64(sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
return i;
}
#define arch_atomic64_sub_return arch_atomic64_sub_return
@@ -116,8 +156,10 @@ static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v)
{
s64 a;
- alternative_atomic64(inc_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ alternative_atomic64(inc_return,
+ "=&A" (a),
+ "S" (v),
+ "memory", "ecx");
return a;
}
#define arch_atomic64_inc_return arch_atomic64_inc_return
@@ -125,39 +167,45 @@ static __always_inline s64 arch_atomic64_inc_return(atomic64_t *v)
static __always_inline s64 arch_atomic64_dec_return(atomic64_t *v)
{
s64 a;
- alternative_atomic64(dec_return, "=&A" (a),
- "S" (v) : "memory", "ecx");
+ alternative_atomic64(dec_return,
+ "=&A" (a),
+ "S" (v),
+ "memory", "ecx");
return a;
}
#define arch_atomic64_dec_return arch_atomic64_dec_return
-static __always_inline s64 arch_atomic64_add(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
__alternative_atomic64(add, add_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
- return i;
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
}
-static __always_inline s64 arch_atomic64_sub(s64 i, atomic64_t *v)
+static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
__alternative_atomic64(sub, sub_return,
- ASM_OUTPUT2("+A" (i), "+c" (v)),
- ASM_NO_INPUT_CLOBBER("memory"));
- return i;
+ ASM_OUTPUT("+A" (i), "+c" (v)),
+ /* no input */,
+ "memory");
}
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
- __alternative_atomic64(inc, inc_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(inc, inc_return,
+ /* no output */,
+ "S" (v),
+ "memory", "eax", "ecx", "edx");
}
#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
- __alternative_atomic64(dec, dec_return, /* no output */,
- "S" (v) : "memory", "eax", "ecx", "edx");
+ __alternative_atomic64(dec, dec_return,
+ /* no output */,
+ "S" (v),
+ "memory", "eax", "ecx", "edx");
}
#define arch_atomic64_dec arch_atomic64_dec
@@ -166,8 +214,9 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
unsigned low = (unsigned)u;
unsigned high = (unsigned)(u >> 32);
alternative_atomic64(add_unless,
- ASM_OUTPUT2("+A" (a), "+c" (low), "+D" (high)),
- "S" (v) : "memory");
+ ASM_OUTPUT("+A" (a), "+c" (low), "+D" (high)),
+ "S" (v),
+ "memory");
return (int)a;
}
#define arch_atomic64_add_unless arch_atomic64_add_unless
@@ -175,8 +224,10 @@ static __always_inline int arch_atomic64_add_unless(atomic64_t *v, s64 a, s64 u)
static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
- alternative_atomic64(inc_not_zero, "=&a" (r),
- "S" (v) : "ecx", "edx", "memory");
+ alternative_atomic64(inc_not_zero,
+ "=&a" (r),
+ "S" (v),
+ "ecx", "edx", "memory");
return r;
}
#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
@@ -184,8 +235,10 @@ static __always_inline int arch_atomic64_inc_not_zero(atomic64_t *v)
static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
{
s64 r;
- alternative_atomic64(dec_if_positive, "=&A" (r),
- "S" (v) : "ecx", "memory");
+ alternative_atomic64(dec_if_positive,
+ "=&A" (r),
+ "S" (v),
+ "ecx", "memory");
return r;
}
#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
@@ -195,69 +248,62 @@ static __always_inline s64 arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
}
static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c & i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val & i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_and arch_atomic64_fetch_and
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
}
static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c | i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val | i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_or arch_atomic64_fetch_or
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
}
static __always_inline s64 arch_atomic64_fetch_xor(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c ^ i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val ^ i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
- s64 old, c = 0;
+ s64 val = arch_atomic64_read_nonatomic(v);
- while ((old = arch_atomic64_cmpxchg(v, c, c + i)) != c)
- c = old;
+ do { } while (!arch_atomic64_try_cmpxchg(v, &val, val + i));
- return old;
+ return val;
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 3165c0feedf7..87b496325b5b 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -22,14 +22,14 @@ static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i)
static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "addq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "addq %1, %0"
: "=m" (v->counter)
: "er" (i), "m" (v->counter) : "memory");
}
static __always_inline void arch_atomic64_sub(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "subq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "subq %1, %0"
: "=m" (v->counter)
: "er" (i), "m" (v->counter) : "memory");
}
@@ -42,7 +42,7 @@ static __always_inline bool arch_atomic64_sub_and_test(s64 i, atomic64_t *v)
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "incq %0"
+ asm_inline volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
: "m" (v->counter) : "memory");
}
@@ -50,7 +50,7 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "decq %0"
+ asm_inline volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
: "m" (v->counter) : "memory");
}
@@ -80,11 +80,7 @@ static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v)
}
#define arch_atomic64_add_return arch_atomic64_add_return
-static __always_inline s64 arch_atomic64_sub_return(s64 i, atomic64_t *v)
-{
- return arch_atomic64_add_return(-i, v);
-}
-#define arch_atomic64_sub_return arch_atomic64_sub_return
+#define arch_atomic64_sub_return(i, v) arch_atomic64_add_return(-(i), v)
static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
{
@@ -92,11 +88,7 @@ static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v)
}
#define arch_atomic64_fetch_add arch_atomic64_fetch_add
-static __always_inline s64 arch_atomic64_fetch_sub(s64 i, atomic64_t *v)
-{
- return xadd(&v->counter, -i);
-}
-#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub(i, v) arch_atomic64_fetch_add(-(i), v)
static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new)
{
@@ -118,7 +110,7 @@ static __always_inline s64 arch_atomic64_xchg(atomic64_t *v, s64 new)
static __always_inline void arch_atomic64_and(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "andq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "andq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
@@ -136,7 +128,7 @@ static __always_inline s64 arch_atomic64_fetch_and(s64 i, atomic64_t *v)
static __always_inline void arch_atomic64_or(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "orq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "orq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
@@ -154,7 +146,7 @@ static __always_inline s64 arch_atomic64_fetch_or(s64 i, atomic64_t *v)
static __always_inline void arch_atomic64_xor(s64 i, atomic64_t *v)
{
- asm volatile(LOCK_PREFIX "xorq %1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorq %1, %0"
: "+m" (v->counter)
: "er" (i)
: "memory");
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 63bdc6b85219..db70832232d4 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -12,11 +12,11 @@
*/
#ifdef CONFIG_X86_32
-#define mb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "mfence", \
+#define mb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "mfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
-#define rmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "lfence", \
+#define rmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "lfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
-#define wmb() asm volatile(ALTERNATIVE("lock; addl $0,-4(%%esp)", "sfence", \
+#define wmb() asm volatile(ALTERNATIVE("lock addl $0,-4(%%esp)", "sfence", \
X86_FEATURE_XMM2) ::: "memory", "cc")
#else
#define __mb() asm volatile("mfence":::"memory")
@@ -33,20 +33,16 @@
* Returns:
* 0 - (index < size)
*/
-static __always_inline unsigned long array_index_mask_nospec(unsigned long index,
- unsigned long size)
-{
- unsigned long mask;
-
- asm volatile ("cmp %1,%2; sbb %0,%0;"
- :"=r" (mask)
- :"g"(size),"r" (index)
- :"cc");
- return mask;
-}
-
-/* Override the default implementation from linux/nospec.h. */
-#define array_index_mask_nospec array_index_mask_nospec
+#define array_index_mask_nospec(idx,sz) ({ \
+ typeof((idx)+(sz)) __idx = (idx); \
+ typeof(__idx) __sz = (sz); \
+ unsigned long __mask; \
+ asm volatile ("cmp %1,%2; sbb %0,%0" \
+ :"=r" (__mask) \
+ :ASM_INPUT_G (__sz), \
+ "r" (__idx) \
+ :"cc"); \
+ __mask; })
/* Prevent speculative execution past this barrier. */
#define barrier_nospec() alternative("", "lfence", X86_FEATURE_LFENCE_RDTSC)
@@ -54,7 +50,7 @@ static __always_inline unsigned long array_index_mask_nospec(unsigned long index
#define __dma_rmb() barrier()
#define __dma_wmb() barrier()
-#define __smp_mb() asm volatile("lock; addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
+#define __smp_mb() asm volatile("lock addl $0,-4(%%" _ASM_SP ")" ::: "memory", "cc")
#define __smp_rmb() dma_rmb()
#define __smp_wmb() barrier()
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 990eb686ca67..100413aff640 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -52,12 +52,12 @@ static __always_inline void
arch_set_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "orb %b1,%0"
+ asm_inline volatile(LOCK_PREFIX "orb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" (CONST_MASK(nr))
: "memory");
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(bts) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -72,11 +72,11 @@ static __always_inline void
arch_clear_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "andb %b1,%0"
+ asm_inline volatile(LOCK_PREFIX "andb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" (~CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btr) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -98,7 +98,7 @@ static __always_inline bool arch_xor_unlock_is_negative_byte(unsigned long mask,
volatile unsigned long *addr)
{
bool negative;
- asm volatile(LOCK_PREFIX "xorb %2,%1"
+ asm_inline volatile(LOCK_PREFIX "xorb %2,%1"
CC_SET(s)
: CC_OUT(s) (negative), WBYTE_ADDR(addr)
: "iq" ((char)mask) : "memory");
@@ -122,11 +122,11 @@ static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr)
{
if (__builtin_constant_p(nr)) {
- asm volatile(LOCK_PREFIX "xorb %b1,%0"
+ asm_inline volatile(LOCK_PREFIX "xorb %b1,%0"
: CONST_MASK_ADDR(nr, addr)
: "iq" (CONST_MASK(nr)));
} else {
- asm volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
+ asm_inline volatile(LOCK_PREFIX __ASM_SIZE(btc) " %1,%0"
: : RLONG_ADDR(addr), "Ir" (nr) : "memory");
}
}
@@ -250,7 +250,7 @@ static __always_inline unsigned long variable__ffs(unsigned long word)
{
asm("rep; bsf %1,%0"
: "=r" (word)
- : "rm" (word));
+ : ASM_INPUT_RM (word));
return word;
}
@@ -297,7 +297,7 @@ static __always_inline unsigned long __fls(unsigned long word)
asm("bsr %1,%0"
: "=r" (word)
- : "rm" (word));
+ : ASM_INPUT_RM (word));
return word;
}
@@ -320,7 +320,7 @@ static __always_inline int variable_ffs(int x)
*/
asm("bsfl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsfl %1,%0\n\t"
"cmovzl %2,%0"
@@ -377,7 +377,7 @@ static __always_inline int fls(unsigned int x)
*/
asm("bsrl %1,%0"
: "=r" (r)
- : "rm" (x), "0" (-1));
+ : ASM_INPUT_RM (x), "0" (-1));
#elif defined(CONFIG_X86_CMOV)
asm("bsrl %1,%0\n\t"
"cmovzl %2,%0"
@@ -416,7 +416,7 @@ static __always_inline int fls64(__u64 x)
*/
asm("bsrq %1,%q0"
: "+r" (bitpos)
- : "rm" (x));
+ : ASM_INPUT_RM (x));
return bitpos + 1;
}
#else
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index a3e0be0470a4..3f02ff6d333d 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -6,11 +6,6 @@
#include <asm/pgtable_types.h>
#include <uapi/asm/boot.h>
-/* Physical address where kernel should be loaded. */
-#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
- + (CONFIG_PHYSICAL_ALIGN - 1)) \
- & ~(CONFIG_PHYSICAL_ALIGN - 1))
-
/* Minimum kernel alignment, as a power of two */
#ifdef CONFIG_X86_64
# define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
@@ -79,7 +74,7 @@
# define BOOT_STACK_SIZE 0x1000
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern unsigned int output_len;
extern const unsigned long kernel_text_size;
extern const unsigned long kernel_total_size;
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
index a3ec87d198ac..f0e9acf72547 100644
--- a/arch/x86/include/asm/bug.h
+++ b/arch/x86/include/asm/bug.h
@@ -13,6 +13,22 @@
#define INSN_UD2 0x0b0f
#define LEN_UD2 2
+/*
+ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
+ */
+#define INSN_ASOP 0x67
+#define INSN_LOCK 0xf0
+#define OPCODE_ESCAPE 0x0f
+#define SECOND_BYTE_OPCODE_UD1 0xb9
+#define SECOND_BYTE_OPCODE_UD2 0x0b
+
+#define BUG_NONE 0xffff
+#define BUG_UD2 0xfffe
+#define BUG_UD1 0xfffd
+#define BUG_UD1_UBSAN 0xfffc
+#define BUG_EA 0xffea
+#define BUG_LOCK 0xfff0
+
#ifdef CONFIG_GENERIC_BUG
#ifdef CONFIG_X86_32
@@ -80,7 +96,7 @@ do { \
do { \
__auto_type __flags = BUGFLAG_WARNING|(flags); \
instrumentation_begin(); \
- _BUG_FLAGS(ASM_UD2, __flags, ASM_REACHABLE); \
+ _BUG_FLAGS(ASM_UD2, __flags, ANNOTATE_REACHABLE(1b)); \
instrumentation_end(); \
} while (0)
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h
index 7cd752557905..3e51ba459154 100644
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -93,7 +93,7 @@
*
*/
enum cfi_mode {
- CFI_DEFAULT, /* FineIBT if hardware has IBT, otherwise kCFI */
+ CFI_AUTO, /* FineIBT if hardware has IBT, otherwise kCFI */
CFI_OFF, /* Taditional / IBT depending on .config */
CFI_KCFI, /* Optionally CALL_PADDING, IBT, RETPOLINE */
CFI_FINEIBT, /* see arch/x86/kernel/alternative.c */
@@ -101,6 +101,16 @@ enum cfi_mode {
extern enum cfi_mode cfi_mode;
+#ifdef CONFIG_FINEIBT_BHI
+extern bool cfi_bhi;
+#else
+#define cfi_bhi (0)
+#endif
+
+typedef u8 bhi_thunk[32];
+extern bhi_thunk __bhi_args[];
+extern bhi_thunk __bhi_args_end[];
+
struct pt_regs;
#ifdef CONFIG_CFI_CLANG
@@ -125,6 +135,18 @@ static inline int cfi_get_offset(void)
#define cfi_get_offset cfi_get_offset
extern u32 cfi_get_func_hash(void *func);
+extern int cfi_get_func_arity(void *func);
+
+#ifdef CONFIG_FINEIBT
+extern bool decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type);
+#else
+static inline bool
+decode_fineibt_insn(struct pt_regs *regs, unsigned long *target, u32 *type)
+{
+ return false;
+}
+
+#endif
#else
static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
@@ -137,6 +159,10 @@ static inline u32 cfi_get_func_hash(void *func)
{
return 0;
}
+static inline int cfi_get_func_arity(void *func)
+{
+ return 0;
+}
#endif /* CONFIG_CFI_CLANG */
#if HAS_KERNEL_IBT == 1
diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h
index 6faaf27e8899..6cbd9ae58b21 100644
--- a/arch/x86/include/asm/cmdline.h
+++ b/arch/x86/include/asm/cmdline.h
@@ -2,6 +2,10 @@
#ifndef _ASM_X86_CMDLINE_H
#define _ASM_X86_CMDLINE_H
+#include <asm/setup.h>
+
+extern char builtin_cmdline[COMMAND_LINE_SIZE];
+
int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
int cmdline_find_option(const char *cmdline_ptr, const char *option,
char *buffer, int bufsize);
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 5612648b0202..b61f32c3459f 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -44,22 +44,22 @@ extern void __add_wrong_size(void)
__typeof__ (*(ptr)) __ret = (arg); \
switch (sizeof(*(ptr))) { \
case __X86_CASE_B: \
- asm volatile (lock #op "b %b0, %1\n" \
+ asm_inline volatile (lock #op "b %b0, %1" \
: "+q" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_W: \
- asm volatile (lock #op "w %w0, %1\n" \
+ asm_inline volatile (lock #op "w %w0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_L: \
- asm volatile (lock #op "l %0, %1\n" \
+ asm_inline volatile (lock #op "l %0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
case __X86_CASE_Q: \
- asm volatile (lock #op "q %q0, %1\n" \
+ asm_inline volatile (lock #op "q %q0, %1" \
: "+r" (__ret), "+m" (*(ptr)) \
: : "memory", "cc"); \
break; \
@@ -91,7 +91,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(ptr); \
- asm volatile(lock "cmpxchgb %2,%1" \
+ asm_inline volatile(lock "cmpxchgb %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "q" (__new), "0" (__old) \
: "memory"); \
@@ -100,7 +100,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(ptr); \
- asm volatile(lock "cmpxchgw %2,%1" \
+ asm_inline volatile(lock "cmpxchgw %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -109,7 +109,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(ptr); \
- asm volatile(lock "cmpxchgl %2,%1" \
+ asm_inline volatile(lock "cmpxchgl %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -118,7 +118,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(ptr); \
- asm volatile(lock "cmpxchgq %2,%1" \
+ asm_inline volatile(lock "cmpxchgq %2, %1" \
: "=a" (__ret), "+m" (*__ptr) \
: "r" (__new), "0" (__old) \
: "memory"); \
@@ -134,7 +134,7 @@ extern void __add_wrong_size(void)
__raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
#define __sync_cmpxchg(ptr, old, new, size) \
- __raw_cmpxchg((ptr), (old), (new), (size), "lock; ")
+ __raw_cmpxchg((ptr), (old), (new), (size), "lock ")
#define __cmpxchg_local(ptr, old, new, size) \
__raw_cmpxchg((ptr), (old), (new), (size), "")
@@ -165,7 +165,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_B: \
{ \
volatile u8 *__ptr = (volatile u8 *)(_ptr); \
- asm volatile(lock "cmpxchgb %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgb %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
@@ -177,7 +177,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_W: \
{ \
volatile u16 *__ptr = (volatile u16 *)(_ptr); \
- asm volatile(lock "cmpxchgw %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgw %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
@@ -189,7 +189,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_L: \
{ \
volatile u32 *__ptr = (volatile u32 *)(_ptr); \
- asm volatile(lock "cmpxchgl %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgl %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
@@ -201,7 +201,7 @@ extern void __add_wrong_size(void)
case __X86_CASE_Q: \
{ \
volatile u64 *__ptr = (volatile u64 *)(_ptr); \
- asm volatile(lock "cmpxchgq %[new], %[ptr]" \
+ asm_inline volatile(lock "cmpxchgq %[new], %[ptr]" \
CC_SET(z) \
: CC_OUT(z) (success), \
[ptr] "+m" (*__ptr), \
@@ -222,7 +222,7 @@ extern void __add_wrong_size(void)
__raw_try_cmpxchg((ptr), (pold), (new), (size), LOCK_PREFIX)
#define __sync_try_cmpxchg(ptr, pold, new, size) \
- __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock; ")
+ __raw_try_cmpxchg((ptr), (pold), (new), (size), "lock ")
#define __try_cmpxchg_local(ptr, pold, new, size) \
__raw_try_cmpxchg((ptr), (pold), (new), (size), "")
diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index b5731c51f0f4..371f7906019e 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -3,103 +3,152 @@
#define _ASM_X86_CMPXCHG_32_H
/*
- * Note: if you use set64_bit(), __cmpxchg64(), or their variants,
+ * Note: if you use __cmpxchg64(), or their variants,
* you need to test for the feature in boot_cpu_data.
*/
-#ifdef CONFIG_X86_CMPXCHG64
-#define arch_cmpxchg64(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
- (unsigned long long)(n)))
-#define arch_cmpxchg64_local(ptr, o, n) \
- ((__typeof__(*(ptr)))__cmpxchg64_local((ptr), (unsigned long long)(o), \
- (unsigned long long)(n)))
-#define arch_try_cmpxchg64(ptr, po, n) \
- __try_cmpxchg64((ptr), (unsigned long long *)(po), \
- (unsigned long long)(n))
-#endif
+union __u64_halves {
+ u64 full;
+ struct {
+ u32 low, high;
+ };
+};
+
+#define __arch_cmpxchg64(_ptr, _old, _new, _lock) \
+({ \
+ union __u64_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
+ : [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ o.full; \
+})
-static inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+
+static __always_inline u64 __cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
{
- u64 prev;
- asm volatile(LOCK_PREFIX "cmpxchg8b %1"
- : "=A" (prev),
- "+m" (*ptr)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32)),
- "0" (old)
- : "memory");
- return prev;
+ return __arch_cmpxchg64(ptr, old, new, LOCK_PREFIX);
}
-static inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+static __always_inline u64 __cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64(ptr, old, new,);
+}
+
+#define __arch_try_cmpxchg64(_ptr, _oldp, _new, _lock) \
+({ \
+ union __u64_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm_inline volatile(_lock "cmpxchg8b %[ptr]" \
+ CC_SET(e) \
+ : CC_OUT(e) (ret), \
+ [ptr] "+m" (*(_ptr)), \
+ "+a" (o.low), "+d" (o.high) \
+ : "b" (n.low), "c" (n.high) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
{
- u64 prev;
- asm volatile("cmpxchg8b %1"
- : "=A" (prev),
- "+m" (*ptr)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32)),
- "0" (old)
- : "memory");
- return prev;
+ return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
}
-static inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *pold, u64 new)
+static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
{
- bool success;
- u64 old = *pold;
- asm volatile(LOCK_PREFIX "cmpxchg8b %[ptr]"
- CC_SET(z)
- : CC_OUT(z) (success),
- [ptr] "+m" (*ptr),
- "+A" (old)
- : "b" ((u32)new),
- "c" ((u32)(new >> 32))
- : "memory");
-
- if (unlikely(!success))
- *pold = old;
- return success;
+ return __arch_try_cmpxchg64(ptr, oldp, new,);
}
-#ifndef CONFIG_X86_CMPXCHG64
+#ifdef CONFIG_X86_CX8
+
+#define arch_cmpxchg64 __cmpxchg64
+
+#define arch_cmpxchg64_local __cmpxchg64_local
+
+#define arch_try_cmpxchg64 __try_cmpxchg64
+
+#define arch_try_cmpxchg64_local __try_cmpxchg64_local
+
+#else
+
/*
* Building a kernel capable running on 80386 and 80486. It may be necessary
* to simulate the cmpxchg8b on the 80386 and 80486 CPU.
*/
-#define arch_cmpxchg64(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __typeof__(*(ptr)) __old = (o); \
- __typeof__(*(ptr)) __new = (n); \
- alternative_io(LOCK_PREFIX_HERE \
- "call cmpxchg8b_emu", \
- "lock; cmpxchg8b (%%esi)" , \
- X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "0" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
- __ret; })
-
-
-#define arch_cmpxchg64_local(ptr, o, n) \
-({ \
- __typeof__(*(ptr)) __ret; \
- __typeof__(*(ptr)) __old = (o); \
- __typeof__(*(ptr)) __new = (n); \
- alternative_io("call cmpxchg8b_emu", \
- "cmpxchg8b (%%esi)" , \
- X86_FEATURE_CX8, \
- "=A" (__ret), \
- "S" ((ptr)), "0" (__old), \
- "b" ((unsigned int)__new), \
- "c" ((unsigned int)(__new>>32)) \
- : "memory"); \
- __ret; })
+#define __arch_cmpxchg64_emu(_ptr, _old, _new, _lock_loc, _lock) \
+({ \
+ union __u64_halves o = { .full = (_old), }, \
+ n = { .full = (_new), }; \
+ \
+ asm_inline volatile( \
+ ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP("+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), \
+ [ptr] "S" (_ptr) \
+ : "memory"); \
+ \
+ o.full; \
+})
+
+static __always_inline u64 arch_cmpxchg64(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64_emu(ptr, old, new, LOCK_PREFIX_HERE, "lock ");
+}
+#define arch_cmpxchg64 arch_cmpxchg64
+
+static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 new)
+{
+ return __arch_cmpxchg64_emu(ptr, old, new, ,);
+}
+#define arch_cmpxchg64_local arch_cmpxchg64_local
+
+#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new, _lock_loc, _lock) \
+({ \
+ union __u64_halves o = { .full = *(_oldp), }, \
+ n = { .full = (_new), }; \
+ bool ret; \
+ \
+ asm_inline volatile( \
+ ALTERNATIVE(_lock_loc \
+ "call cmpxchg8b_emu", \
+ _lock "cmpxchg8b %a[ptr]", X86_FEATURE_CX8) \
+ CC_SET(e) \
+ : ALT_OUTPUT_SP(CC_OUT(e) (ret), \
+ "+a" (o.low), "+d" (o.high)) \
+ : "b" (n.low), "c" (n.high), \
+ [ptr] "S" (_ptr) \
+ : "memory"); \
+ \
+ if (unlikely(!ret)) \
+ *(_oldp) = o.full; \
+ \
+ likely(ret); \
+})
+
+static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+ return __arch_try_cmpxchg64_emu(ptr, oldp, new, LOCK_PREFIX_HERE, "lock ");
+}
+#define arch_try_cmpxchg64 arch_try_cmpxchg64
+
+static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+ return __arch_try_cmpxchg64_emu(ptr, oldp, new, ,);
+}
+#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
#endif
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 44b08b53ab32..71d1e72ed879 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -20,6 +20,12 @@
arch_try_cmpxchg((ptr), (po), (n)); \
})
+#define arch_try_cmpxchg64_local(ptr, po, n) \
+({ \
+ BUILD_BUG_ON(sizeof(*(ptr)) != 8); \
+ arch_try_cmpxchg_local((ptr), (po), (n)); \
+})
+
union __u128_halves {
u128 full;
struct {
@@ -32,7 +38,7 @@ union __u128_halves {
union __u128_halves o = { .full = (_old), }, \
n = { .full = (_new), }; \
\
- asm volatile(_lock "cmpxchg16b %[ptr]" \
+ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
: [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
@@ -59,10 +65,10 @@ static __always_inline u128 arch_cmpxchg128_local(volatile u128 *ptr, u128 old,
n = { .full = (_new), }; \
bool ret; \
\
- asm volatile(_lock "cmpxchg16b %[ptr]" \
+ asm_inline volatile(_lock "cmpxchg16b %[ptr]" \
CC_SET(e) \
: CC_OUT(e) (ret), \
- [ptr] "+m" (*ptr), \
+ [ptr] "+m" (*(_ptr)), \
"+a" (o.low), "+d" (o.high) \
: "b" (n.low), "c" (n.high) \
: "memory"); \
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index c086699b0d0c..e7225452963f 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -15,6 +15,11 @@ enum cc_vendor {
extern enum cc_vendor cc_vendor;
extern u64 cc_mask;
+static inline u64 cc_get_mask(void)
+{
+ return cc_mask;
+}
+
static inline void cc_set_mask(u64 mask)
{
RIP_REL_REF(cc_mask) = mask;
@@ -25,6 +30,10 @@ u64 cc_mkdec(u64 val);
void cc_random_init(void);
#else
#define cc_vendor (CC_VENDOR_NONE)
+static inline u64 cc_get_mask(void)
+{
+ return 0;
+}
static inline u64 cc_mkenc(u64 val)
{
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index aa30fd8cad7f..ad235dda1ded 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -12,7 +12,6 @@
#ifndef CONFIG_SMP
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
#define cpu_acpi_id(cpu) 0
-#define safe_smp_processor_id() 0
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
@@ -26,12 +25,13 @@ int mwait_usable(const struct cpuinfo_x86 *);
unsigned int x86_family(unsigned int sig);
unsigned int x86_model(unsigned int sig);
unsigned int x86_stepping(unsigned int sig);
-#ifdef CONFIG_CPU_SUP_INTEL
+#ifdef CONFIG_X86_BUS_LOCK_DETECT
extern void __init sld_setup(struct cpuinfo_x86 *c);
extern bool handle_user_split_lock(struct pt_regs *regs, long error_code);
extern bool handle_guest_split_lock(unsigned long ip);
extern void handle_bus_lock(struct pt_regs *regs);
-u8 get_this_hybrid_cpu_type(void);
+void split_lock_init(void);
+void bus_lock_init(void);
#else
static inline void __init sld_setup(struct cpuinfo_x86 *c) {}
static inline bool handle_user_split_lock(struct pt_regs *regs, long error_code)
@@ -45,12 +45,10 @@ static inline bool handle_guest_split_lock(unsigned long ip)
}
static inline void handle_bus_lock(struct pt_regs *regs) {}
-
-static inline u8 get_this_hybrid_cpu_type(void)
-{
- return 0;
-}
+static inline void split_lock_init(void) {}
+static inline void bus_lock_init(void) {}
#endif
+
#ifdef CONFIG_IA32_FEAT_CTL
void init_ia32_feat_ctl(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h
index eb8fcede9e3b..6be777a06944 100644
--- a/arch/x86/include/asm/cpu_device_id.h
+++ b/arch/x86/include/asm/cpu_device_id.h
@@ -3,6 +3,39 @@
#define _ASM_X86_CPU_DEVICE_ID
/*
+ * Can't use <linux/bitfield.h> because it generates expressions that
+ * cannot be used in structure initializers. Bitfield construction
+ * here must match the union in struct cpuinfo_86:
+ * union {
+ * struct {
+ * __u8 x86_model;
+ * __u8 x86;
+ * __u8 x86_vendor;
+ * __u8 x86_reserved;
+ * };
+ * __u32 x86_vfm;
+ * };
+ */
+#define VFM_MODEL_BIT 0
+#define VFM_FAMILY_BIT 8
+#define VFM_VENDOR_BIT 16
+#define VFM_RSVD_BIT 24
+
+#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT)
+#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT)
+#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT)
+
+#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT)
+#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT)
+#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT)
+
+#define VFM_MAKE(_vendor, _family, _model) ( \
+ ((_model) << VFM_MODEL_BIT) | \
+ ((_family) << VFM_FAMILY_BIT) | \
+ ((_vendor) << VFM_VENDOR_BIT) \
+)
+
+/*
* Declare drivers belonging to specific x86 CPUs
* Similar in spirit to pci_device_id and related PCI functions
*
@@ -20,9 +53,11 @@
#define X86_CENTAUR_FAM6_C7_D 0xd
#define X86_CENTAUR_FAM6_NANO 0xf
-#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
+/* x86_cpu_id::flags */
+#define X86_CPU_ID_FLAG_ENTRY_VALID BIT(0)
+
/**
- * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching
+ * X86_MATCH_CPU - Base macro for CPU matching
* @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@_vendor
* @_family: The family number or X86_FAMILY_ANY
@@ -39,35 +74,18 @@
* into another macro at the usage site for good reasons, then please
* start this local macro with X86_MATCH to allow easy grepping.
*/
-#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \
- _steppings, _feature, _data) { \
- .vendor = X86_VENDOR_##_vendor, \
+#define X86_MATCH_CPU(_vendor, _family, _model, _steppings, _feature, _type, _data) { \
+ .vendor = _vendor, \
.family = _family, \
.model = _model, \
.steppings = _steppings, \
.feature = _feature, \
+ .flags = X86_CPU_ID_FLAG_ENTRY_VALID, \
+ .type = _type, \
.driver_data = (unsigned long) _data \
}
/**
- * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching
- * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
- * The name is expanded to X86_VENDOR_@_vendor
- * @_family: The family number or X86_FAMILY_ANY
- * @_model: The model number, model constant or X86_MODEL_ANY
- * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY
- * @_data: Driver specific data or NULL. The internal storage
- * format is unsigned long. The supplied value, pointer
- * etc. is casted to unsigned long internally.
- *
- * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is
- * set to wildcards.
- */
-#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \
- X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \
- X86_STEPPING_ANY, feature, data)
-
-/**
* X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature
* @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY
* The name is expanded to X86_VENDOR_@vendor
@@ -76,13 +94,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, \
- X86_MODEL_ANY, feature, data)
+#define X86_MATCH_VENDOR_FAM_FEATURE(vendor, family, feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VENDOR_FEATURE - Macro for matching vendor and CPU feature
@@ -92,12 +107,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
- X86_MATCH_VENDOR_FAM_FEATURE(vendor, X86_FAMILY_ANY, feature, data)
+#define X86_MATCH_VENDOR_FEATURE(vendor, feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, X86_FAMILY_ANY, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_FEATURE - Macro for matching a CPU feature
@@ -105,12 +118,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_FEATURE(feature, data) \
- X86_MATCH_VENDOR_FEATURE(ANY, feature, data)
+#define X86_MATCH_FEATURE(feature, data) \
+ X86_MATCH_CPU(X86_VENDOR_ANY, X86_FAMILY_ANY, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VENDOR_FAM_MODEL - Match vendor, family and model
@@ -121,13 +132,10 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set to wildcards.
*/
-#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
- X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, \
- X86_FEATURE_ANY, data)
+#define X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, model, X86_STEPPING_ANY, \
+ X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
/**
* X86_MATCH_VENDOR_FAM - Match vendor and family
@@ -137,60 +145,63 @@
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
* etc. is casted to unsigned long internally.
- *
- * All other missing arguments to X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are
- * set of wildcards.
*/
-#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
- X86_MATCH_VENDOR_FAM_MODEL(vendor, family, X86_MODEL_ANY, data)
+#define X86_MATCH_VENDOR_FAM(vendor, family, data) \
+ X86_MATCH_CPU(X86_VENDOR_##vendor, family, X86_MODEL_ANY, \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
/**
- * X86_MATCH_INTEL_FAM6_MODEL - Match vendor INTEL, family 6 and model
- * @model: The model name without the INTEL_FAM6_ prefix or ANY
- * The model name is expanded to INTEL_FAM6_@model internally
+ * X86_MATCH_VFM - Match encoded vendor/family/model
+ * @vfm: Encoded 8-bits each for vendor, family, model
* @data: Driver specific data or NULL. The internal storage
* format is unsigned long. The supplied value, pointer
- * etc. is casted to unsigned long internally.
- *
- * The vendor is set to INTEL, the family to 6 and all other missing
- * arguments of X86_MATCH_VENDOR_FAM_MODEL_FEATURE() are set to wildcards.
- *
- * See X86_MATCH_VENDOR_FAM_MODEL_FEATURE() for further information.
+ * etc. is cast to unsigned long internally.
*/
-#define X86_MATCH_INTEL_FAM6_MODEL(model, data) \
- X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data)
+#define X86_MATCH_VFM(vfm, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, X86_CPU_TYPE_ANY, data)
-#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data) \
- X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \
- steppings, X86_FEATURE_ANY, data)
+#define __X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins)
+/**
+ * X86_MATCH_VFM_STEPS - Match encoded vendor/family/model and steppings
+ * range.
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @min_step: Lowest stepping number to match
+ * @max_step: Highest stepping number to match
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_STEPS(vfm, min_step, max_step, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ __X86_STEPPINGS(min_step, max_step), X86_FEATURE_ANY, \
+ X86_CPU_TYPE_ANY, data)
-/*
- * Match specific microcode revisions.
- *
- * vendor/family/model/stepping must be all set.
- *
- * Only checks against the boot CPU. When mixed-stepping configs are
- * valid for a CPU model, add a quirk for every valid stepping and
- * do the fine-tuning in the quirk handler.
+/**
+ * X86_MATCH_VFM_FEATURE - Match encoded vendor/family/model/feature
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @feature: A X86_FEATURE bit
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
*/
+#define X86_MATCH_VFM_FEATURE(vfm, feature, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, feature, X86_CPU_TYPE_ANY, data)
-struct x86_cpu_desc {
- u8 x86_family;
- u8 x86_vendor;
- u8 x86_model;
- u8 x86_stepping;
- u32 x86_microcode_rev;
-};
-
-#define INTEL_CPU_DESC(model, stepping, revision) { \
- .x86_family = 6, \
- .x86_vendor = X86_VENDOR_INTEL, \
- .x86_model = (model), \
- .x86_stepping = (stepping), \
- .x86_microcode_rev = (revision), \
-}
+/**
+ * X86_MATCH_VFM_CPU_TYPE - Match encoded vendor/family/model/type
+ * @vfm: Encoded 8-bits each for vendor, family, model
+ * @type: CPU type e.g. P-core, E-core
+ * @data: Driver specific data or NULL. The internal storage
+ * format is unsigned long. The supplied value, pointer
+ * etc. is cast to unsigned long internally.
+ */
+#define X86_MATCH_VFM_CPU_TYPE(vfm, type, data) \
+ X86_MATCH_CPU(VFM_VENDOR(vfm), VFM_FAMILY(vfm), VFM_MODEL(vfm), \
+ X86_STEPPING_ANY, X86_FEATURE_ANY, type, data)
extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match);
-extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table);
+extern bool x86_match_min_microcode_rev(const struct x86_cpu_id *table);
#endif /* _ASM_X86_CPU_DEVICE_ID */
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 686e92d2663e..893cbca37fe9 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -4,11 +4,12 @@
#include <asm/processor.h>
-#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+#if defined(__KERNEL__) && !defined(__ASSEMBLER__)
#include <asm/asm.h>
#include <linux/bitops.h>
#include <asm/alternative.h>
+#include <asm/cpufeaturemasks.h>
enum cpuid_leafs
{
@@ -37,107 +38,34 @@ enum cpuid_leafs
NR_CPUID_WORDS,
};
-#define X86_CAP_FMT_NUM "%d:%d"
-#define x86_cap_flag_num(flag) ((flag) >> 5), ((flag) & 31)
-
extern const char * const x86_cap_flags[NCAPINTS*32];
extern const char * const x86_power_flags[32];
-#define X86_CAP_FMT "%s"
-#define x86_cap_flag(flag) x86_cap_flags[flag]
/*
* In order to save room, we index into this array by doing
* X86_BUG_<name> - NCAPINTS*32.
*/
extern const char * const x86_bug_flags[NBUGINTS*32];
+#define x86_bug_flag(flag) x86_bug_flags[flag]
#define test_cpu_cap(c, bit) \
arch_test_bit(bit, (unsigned long *)((c)->x86_capability))
-/*
- * There are 32 bits/features in each mask word. The high bits
- * (selected with (bit>>5) give us the word number and the low 5
- * bits give us the bit/feature number inside the word.
- * (1UL<<((bit)&31) gives us a mask for the feature_bit so we can
- * see if it is set in the mask word.
- */
-#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
- (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
-
-/*
- * {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the
- * following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all
- * header macros which use NCAPINTS need to be changed. The duplicated macro
- * use causes the compiler to issue errors for all headers so that all usage
- * sites can be corrected.
- */
-#define REQUIRED_MASK_BIT_SET(feature_bit) \
- ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 2, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 3, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 4, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 5, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 6, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 7, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 8, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 9, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 10, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 11, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 12, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 13, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 14, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 15, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 16, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 17, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
- REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 22))
-
-#define DISABLED_MASK_BIT_SET(feature_bit) \
- ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 1, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 2, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 3, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 4, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 5, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 6, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 7, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 8, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 9, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 10, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 11, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 12, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 13, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 14, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 15, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 16, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 17, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
- CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
- DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 22))
-
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
test_cpu_cap(c, bit))
#define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
- x86_this_cpu_test_bit(bit, \
- (unsigned long __percpu *)&cpu_info.x86_capability))
+ x86_this_cpu_test_bit(bit, cpu_info.x86_capability))
/*
- * This macro is for detection of features which need kernel
- * infrastructure to be used. It may *not* directly test the CPU
- * itself. Use the cpu_has() family if you want true runtime
- * testing of CPU features, like in hypervisor code where you are
- * supporting a possible guest feature where host support for it
+ * This is the default CPU features testing macro to use in code.
+ *
+ * It is for detection of features which need kernel infrastructure to be
+ * used. It may *not* directly test the CPU itself. Use the cpu_has() family
+ * if you want true runtime testing of CPU features, like in hypervisor code
+ * where you are supporting a possible guest feature where host support for it
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
@@ -149,22 +77,20 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
extern void setup_clear_cpu_cap(unsigned int bit);
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
-
-#define setup_force_cpu_cap(bit) do { \
- set_cpu_cap(&boot_cpu_data, bit); \
+void check_cpufeature_deps(struct cpuinfo_x86 *c);
+
+#define setup_force_cpu_cap(bit) do { \
+ \
+ if (!boot_cpu_has(bit)) \
+ WARN_ON(alternatives_patched); \
+ \
+ set_cpu_cap(&boot_cpu_data, bit); \
set_bit(bit, (unsigned long *)cpu_caps_set); \
} while (0)
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
/*
- * Static testing of CPU features. Used the same as boot_cpu_has(). It
- * statically patches the target code for additional performance. Use
- * static_cpu_has() only in fast paths, where every cycle counts. Which
- * means that the boot_cpu_has() variant is already fast enough for the
- * majority of cases and you should stick to using it as it is generally
- * only two instructions: a RIP-relative MOV and a TEST.
- *
* Do not use an "m" constraint for [cap_byte] here: gcc doesn't know
* that this is only used on a fallback path and will sometimes cause
* it to manifest the address of boot_cpu_data in a register, fouling
@@ -172,11 +98,10 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm goto(
- ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
+ asm goto(ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
- " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n"
+ " testb %[bitnum], %a[cap_byte]\n"
" jnz %l[t_yes]\n"
" jmp %l[t_no]\n"
".popsection\n"
@@ -212,5 +137,5 @@ t_no:
#define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \
boot_cpu_data.x86_model
-#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */
+#endif /* defined(__KERNEL__) && !defined(__ASSEMBLER__) */
#endif /* _ASM_X86_CPUFEATURE_H */
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 3c7434329661..6c2c152d8a67 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -2,14 +2,6 @@
#ifndef _ASM_X86_CPUFEATURES_H
#define _ASM_X86_CPUFEATURES_H
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#include <asm/required-features.h>
-#endif
-
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#include <asm/disabled-features.h>
-#endif
-
/*
* Defines x86 CPU feature bits
*/
@@ -18,170 +10,170 @@
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature
+ * bit is not displayed in /proc/cpuinfo at all.
*
* When adding new features here that depend on other features,
* please update the table in kernel/cpu/cpuid-deps.c as well.
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */
+#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
-#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
+/* Free ( 3*32+ 6) */
+/* Free ( 3*32+ 7) */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
+#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */
+#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */
+#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */
+#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
-#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */
+#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -189,93 +181,92 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
-#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
-#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
-#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
-#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
-#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
-#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
-#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
-#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
-#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
-#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
-#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
-#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */
+#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */
+#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */
+#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier without a guaranteed RSB flush */
+#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */
+#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */
/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
-#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
-#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
-#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
-#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
+#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
-#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */
+#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -283,181 +274,197 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
-#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
-#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
-#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
-#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
-#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
-#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
-#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
-#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
-#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
-#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
-#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
-#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
-#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
-#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
-#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
-#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
-#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
-#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
-#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
-#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
-#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
-#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
-#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
-#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
-#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
-#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
-#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */
+#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */
+#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */
+#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */
+#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */
+#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */
+#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */
+#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
-#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
-#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
-#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
-#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
-#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
-#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
-#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */
-#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
-#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */
-#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
-#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
-#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
+#define X86_FEATURE_SHA512 (12*32+ 0) /* SHA512 instructions */
+#define X86_FEATURE_SM3 (12*32+ 1) /* SM3 instructions */
+#define X86_FEATURE_SM4 (12*32+ 2) /* SM4 instructions */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */
+#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
+#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
+#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
+#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
+#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
-#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
-#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
-#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
-#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
-#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
-#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
-#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
-#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
-#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
-#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
-#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
-#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
+#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
+#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
+#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
+#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_AMD_IBPB_RET (13*32+30) /* IBPB clears return address predictor */
+#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */
+#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */
+#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
-#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
-#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
-#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
-#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */
+#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */
+#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
+#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
-#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
-#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */
-#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
-#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
-#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
-#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
-#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
-#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */
-#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
-#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
-#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
-#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */
-#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */
+#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */
+#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */
+#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
-#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */
-#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
-#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
-#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
-#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */
-#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
-#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
-#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
-#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
-#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
-#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
-#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
-#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
-#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
-#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */
+#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */
+#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */
+#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */
+#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
-#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */
-#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
-#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
-#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
-#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
-#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
-#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
+#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */
+#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */
+#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
+#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
-#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
-#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
-#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
-#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
-#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
-#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */
+#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
+#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
-#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
-#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
-#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
+#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */
+#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /*
+ * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs.
+ * (SRSO_MSR_FIX in the official doc).
+ */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -465,59 +472,65 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
-#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
-#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
-#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
-#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
+#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
+#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
* to avoid confusion.
*/
-#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
-#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
-#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
-#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
-#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
-#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
-#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
-#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
-#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
-#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
-#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
-#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
-#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
-#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
-#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
-#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
-#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
+#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */
+#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */
+#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */
+#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
-#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
-#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
+#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
+#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */
+#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h
index 6b122a31da06..d5749b25fa10 100644
--- a/arch/x86/include/asm/cpuid.h
+++ b/arch/x86/include/asm/cpuid.h
@@ -1,207 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * CPUID-related helpers/definitions
- */
#ifndef _ASM_X86_CPUID_H
#define _ASM_X86_CPUID_H
-#include <asm/string.h>
-
-struct cpuid_regs {
- u32 eax, ebx, ecx, edx;
-};
-
-enum cpuid_regs_idx {
- CPUID_EAX = 0,
- CPUID_EBX,
- CPUID_ECX,
- CPUID_EDX,
-};
-
-#ifdef CONFIG_X86_32
-extern int have_cpuid_p(void);
-#else
-static inline int have_cpuid_p(void)
-{
- return 1;
-}
-#endif
-static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile("cpuid"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx)
- : "memory");
-}
-
-#define native_cpuid_reg(reg) \
-static inline unsigned int native_cpuid_##reg(unsigned int op) \
-{ \
- unsigned int eax = op, ebx, ecx = 0, edx; \
- \
- native_cpuid(&eax, &ebx, &ecx, &edx); \
- \
- return reg; \
-}
-
-/*
- * Native CPUID functions returning a single datum.
- */
-native_cpuid_reg(eax)
-native_cpuid_reg(ebx)
-native_cpuid_reg(ecx)
-native_cpuid_reg(edx)
-
-#ifdef CONFIG_PARAVIRT_XXL
-#include <asm/paravirt.h>
-#else
-#define __cpuid native_cpuid
-#endif
-
-/*
- * Generic CPUID function
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
- * resulting in stale register contents being returned.
- */
-static inline void cpuid(unsigned int op,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = 0;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/* Some CPUID calls want 'count' to be placed in ecx */
-static inline void cpuid_count(unsigned int op, int count,
- unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- *eax = op;
- *ecx = count;
- __cpuid(eax, ebx, ecx, edx);
-}
-
-/*
- * CPUID functions returning a single datum
- */
-static inline unsigned int cpuid_eax(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return eax;
-}
-
-static inline unsigned int cpuid_ebx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ebx;
-}
-
-static inline unsigned int cpuid_ecx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return ecx;
-}
-
-static inline unsigned int cpuid_edx(unsigned int op)
-{
- unsigned int eax, ebx, ecx, edx;
-
- cpuid(op, &eax, &ebx, &ecx, &edx);
-
- return edx;
-}
-
-static inline void __cpuid_read(unsigned int leaf, unsigned int subleaf, u32 *regs)
-{
- regs[CPUID_EAX] = leaf;
- regs[CPUID_ECX] = subleaf;
- __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
-}
-
-#define cpuid_subleaf(leaf, subleaf, regs) { \
- static_assert(sizeof(*(regs)) == 16); \
- __cpuid_read(leaf, subleaf, (u32 *)(regs)); \
-}
-
-#define cpuid_leaf(leaf, regs) { \
- static_assert(sizeof(*(regs)) == 16); \
- __cpuid_read(leaf, 0, (u32 *)(regs)); \
-}
-
-static inline void __cpuid_read_reg(unsigned int leaf, unsigned int subleaf,
- enum cpuid_regs_idx regidx, u32 *reg)
-{
- u32 regs[4];
-
- __cpuid_read(leaf, subleaf, regs);
- *reg = regs[regidx];
-}
-
-#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \
- static_assert(sizeof(*(reg)) == 4); \
- __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \
-}
-
-#define cpuid_leaf_reg(leaf, regidx, reg) { \
- static_assert(sizeof(*(reg)) == 4); \
- __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
-}
-
-static __always_inline bool cpuid_function_is_indexed(u32 function)
-{
- switch (function) {
- case 4:
- case 7:
- case 0xb:
- case 0xd:
- case 0xf:
- case 0x10:
- case 0x12:
- case 0x14:
- case 0x17:
- case 0x18:
- case 0x1d:
- case 0x1e:
- case 0x1f:
- case 0x8000001d:
- return true;
- }
-
- return false;
-}
-
-#define for_each_possible_hypervisor_cpuid_base(function) \
- for (function = 0x40000000; function < 0x40010000; function += 0x100)
-
-static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
-{
- uint32_t base, eax, signature[3];
-
- for_each_possible_hypervisor_cpuid_base(base) {
- cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
-
- if (!memcmp(sig, signature, 12) &&
- (leaves == 0 || ((eax - base) >= leaves)))
- return base;
- }
-
- return 0;
-}
+#include <asm/cpuid/api.h>
#endif /* _ASM_X86_CPUID_H */
diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h
new file mode 100644
index 000000000000..9c180c9cc58e
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/api.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_API_H
+#define _ASM_X86_CPUID_API_H
+
+#include <asm/cpuid/types.h>
+
+#include <linux/build_bug.h>
+#include <linux/types.h>
+
+#include <asm/string.h>
+
+/*
+ * Raw CPUID accessors:
+ */
+
+#ifdef CONFIG_X86_32
+bool have_cpuid_p(void);
+#else
+static inline bool have_cpuid_p(void)
+{
+ return true;
+}
+#endif
+
+static inline void native_cpuid(u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+#define NATIVE_CPUID_REG(reg) \
+static inline u32 native_cpuid_##reg(u32 op) \
+{ \
+ u32 eax = op, ebx, ecx = 0, edx; \
+ \
+ native_cpuid(&eax, &ebx, &ecx, &edx); \
+ \
+ return reg; \
+}
+
+/*
+ * Native CPUID functions returning a single datum:
+ */
+NATIVE_CPUID_REG(eax)
+NATIVE_CPUID_REG(ebx)
+NATIVE_CPUID_REG(ecx)
+NATIVE_CPUID_REG(edx)
+
+#ifdef CONFIG_PARAVIRT_XXL
+# include <asm/paravirt.h>
+#else
+# define __cpuid native_cpuid
+#endif
+
+/*
+ * Generic CPUID function
+ *
+ * Clear ECX since some CPUs (Cyrix MII) do not set or clear ECX
+ * resulting in stale register contents being returned.
+ */
+static inline void cpuid(u32 op,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ *eax = op;
+ *ecx = 0;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/* Some CPUID calls want 'count' to be placed in ECX */
+static inline void cpuid_count(u32 op, int count,
+ u32 *eax, u32 *ebx,
+ u32 *ecx, u32 *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+/*
+ * CPUID functions returning a single datum:
+ */
+
+static inline u32 cpuid_eax(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return eax;
+}
+
+static inline u32 cpuid_ebx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ebx;
+}
+
+static inline u32 cpuid_ecx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return ecx;
+}
+
+static inline u32 cpuid_edx(u32 op)
+{
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(op, &eax, &ebx, &ecx, &edx);
+
+ return edx;
+}
+
+static inline void __cpuid_read(u32 leaf, u32 subleaf, u32 *regs)
+{
+ regs[CPUID_EAX] = leaf;
+ regs[CPUID_ECX] = subleaf;
+ __cpuid(regs + CPUID_EAX, regs + CPUID_EBX, regs + CPUID_ECX, regs + CPUID_EDX);
+}
+
+#define cpuid_subleaf(leaf, subleaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, subleaf, (u32 *)(regs)); \
+}
+
+#define cpuid_leaf(leaf, regs) { \
+ static_assert(sizeof(*(regs)) == 16); \
+ __cpuid_read(leaf, 0, (u32 *)(regs)); \
+}
+
+static inline void __cpuid_read_reg(u32 leaf, u32 subleaf,
+ enum cpuid_regs_idx regidx, u32 *reg)
+{
+ u32 regs[4];
+
+ __cpuid_read(leaf, subleaf, regs);
+ *reg = regs[regidx];
+}
+
+#define cpuid_subleaf_reg(leaf, subleaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, subleaf, regidx, (u32 *)(reg)); \
+}
+
+#define cpuid_leaf_reg(leaf, regidx, reg) { \
+ static_assert(sizeof(*(reg)) == 4); \
+ __cpuid_read_reg(leaf, 0, regidx, (u32 *)(reg)); \
+}
+
+static __always_inline bool cpuid_function_is_indexed(u32 function)
+{
+ switch (function) {
+ case 4:
+ case 7:
+ case 0xb:
+ case 0xd:
+ case 0xf:
+ case 0x10:
+ case 0x12:
+ case 0x14:
+ case 0x17:
+ case 0x18:
+ case 0x1d:
+ case 0x1e:
+ case 0x1f:
+ case 0x24:
+ case 0x8000001d:
+ return true;
+ }
+
+ return false;
+}
+
+#define for_each_possible_hypervisor_cpuid_base(function) \
+ for (function = 0x40000000; function < 0x40010000; function += 0x100)
+
+static inline u32 hypervisor_cpuid_base(const char *sig, u32 leaves)
+{
+ u32 base, eax, signature[3];
+
+ for_each_possible_hypervisor_cpuid_base(base) {
+ cpuid(base, &eax, &signature[0], &signature[1], &signature[2]);
+
+ /*
+ * This must not compile to "call memcmp" because it's called
+ * from PVH early boot code before instrumentation is set up
+ * and memcmp() itself may be instrumented.
+ */
+ if (!__builtin_memcmp(sig, signature, 12) &&
+ (leaves == 0 || ((eax - base) >= leaves)))
+ return base;
+ }
+
+ return 0;
+}
+
+#endif /* _ASM_X86_CPUID_API_H */
diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h
new file mode 100644
index 000000000000..8582e27e836d
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/types.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_TYPES_H
+#define _ASM_X86_CPUID_TYPES_H
+
+#include <linux/types.h>
+
+/*
+ * Types for raw CPUID access:
+ */
+
+struct cpuid_regs {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+};
+
+enum cpuid_regs_idx {
+ CPUID_EAX = 0,
+ CPUID_EBX,
+ CPUID_ECX,
+ CPUID_EDX,
+};
+
+#define CPUID_LEAF_MWAIT 0x05
+#define CPUID_LEAF_DCA 0x09
+#define CPUID_LEAF_XSTATE 0x0d
+#define CPUID_LEAF_TSC 0x15
+#define CPUID_LEAF_FREQ 0x16
+#define CPUID_LEAF_TILE 0x1d
+
+#endif /* _ASM_X86_CPUID_TYPES_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 4acfd57de8f1..70f6b60ad67b 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CPUMASK_H
#define _ASM_X86_CPUMASK_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
extern void setup_cpu_local_masks(void);
@@ -34,5 +34,5 @@ static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp
#define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu))
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index bf5953883ec3..cc4a3f725b37 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -5,52 +5,28 @@
#include <linux/build_bug.h>
#include <linux/compiler.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cache.h>
#include <asm/percpu.h>
struct task_struct;
-struct pcpu_hot {
- union {
- struct {
- struct task_struct *current_task;
- int preempt_count;
- int cpu_number;
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
- u64 call_depth;
-#endif
- unsigned long top_of_stack;
- void *hardirq_stack_ptr;
- u16 softirq_pending;
-#ifdef CONFIG_X86_64
- bool hardirq_stack_inuse;
-#else
- void *softirq_stack_ptr;
-#endif
- };
- u8 pad[64];
- };
-};
-static_assert(sizeof(struct pcpu_hot) == 64);
-
-DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
-
-/* const-qualified alias to pcpu_hot, aliased by linker. */
-DECLARE_PER_CPU_ALIGNED(const struct pcpu_hot __percpu_seg_override,
- const_pcpu_hot);
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct *, current_task);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(struct task_struct * const __percpu_seg_override,
+ const_current_task);
static __always_inline struct task_struct *get_current(void)
{
if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
- return this_cpu_read_const(const_pcpu_hot.current_task);
+ return this_cpu_read_const(const_current_task);
- return this_cpu_read_stable(pcpu_hot.current_task);
+ return this_cpu_read_stable(current_task);
}
#define current get_current()
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 62dc9f59ea76..ec95fe44fa3a 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -46,7 +46,6 @@ struct gdt_page {
} __attribute__((aligned(PAGE_SIZE)));
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
-DECLARE_INIT_PER_CPU(gdt_page);
/* Provide the original GDT */
static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h
index d440a65af8f3..7e6b9314758a 100644
--- a/arch/x86/include/asm/desc_defs.h
+++ b/arch/x86/include/asm/desc_defs.h
@@ -58,7 +58,7 @@
#define DESC_USER (_DESC_DPL(3))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -166,7 +166,7 @@ struct desc_ptr {
unsigned long address;
} __attribute__((packed)) ;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* Boot IDT definitions */
#define BOOT_IDT_ENTRIES 32
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
deleted file mode 100644
index c492bdc97b05..000000000000
--- a/arch/x86/include/asm/disabled-features.h
+++ /dev/null
@@ -1,161 +0,0 @@
-#ifndef _ASM_X86_DISABLED_FEATURES_H
-#define _ASM_X86_DISABLED_FEATURES_H
-
-/* These features, although they might be available in a CPU
- * will not be used because the compile options to support
- * them are not present.
- *
- * This code allows them to be checked and disabled at
- * compile time without an explicit #ifdef. Use
- * cpu_feature_enabled().
- */
-
-#ifdef CONFIG_X86_UMIP
-# define DISABLE_UMIP 0
-#else
-# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
-#endif
-
-#ifdef CONFIG_X86_64
-# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
-# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
-# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
-# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
-# define DISABLE_PCID 0
-#else
-# define DISABLE_VME 0
-# define DISABLE_K6_MTRR 0
-# define DISABLE_CYRIX_ARR 0
-# define DISABLE_CENTAUR_MCR 0
-# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
-#endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-# define DISABLE_PKU 0
-# define DISABLE_OSPKE 0
-#else
-# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
-# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
-#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
-
-#ifdef CONFIG_X86_5LEVEL
-# define DISABLE_LA57 0
-#else
-# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
-# define DISABLE_PTI 0
-#else
-# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETPOLINE
-# define DISABLE_RETPOLINE 0
-#else
-# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
- (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
-#endif
-
-#ifdef CONFIG_MITIGATION_RETHUNK
-# define DISABLE_RETHUNK 0
-#else
-# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_UNRET_ENTRY
-# define DISABLE_UNRET 0
-#else
-# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
-#endif
-
-#ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING
-# define DISABLE_CALL_DEPTH_TRACKING 0
-#else
-# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
-#endif
-
-#ifdef CONFIG_ADDRESS_MASKING
-# define DISABLE_LAM 0
-#else
-# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
-#endif
-
-#ifdef CONFIG_INTEL_IOMMU_SVM
-# define DISABLE_ENQCMD 0
-#else
-# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
-#endif
-
-#ifdef CONFIG_X86_SGX
-# define DISABLE_SGX 0
-#else
-# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
-#endif
-
-#ifdef CONFIG_XEN_PV
-# define DISABLE_XENPV 0
-#else
-# define DISABLE_XENPV (1 << (X86_FEATURE_XENPV & 31))
-#endif
-
-#ifdef CONFIG_INTEL_TDX_GUEST
-# define DISABLE_TDX_GUEST 0
-#else
-# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
-#endif
-
-#ifdef CONFIG_X86_USER_SHADOW_STACK
-#define DISABLE_USER_SHSTK 0
-#else
-#define DISABLE_USER_SHSTK (1 << (X86_FEATURE_USER_SHSTK & 31))
-#endif
-
-#ifdef CONFIG_X86_KERNEL_IBT
-#define DISABLE_IBT 0
-#else
-#define DISABLE_IBT (1 << (X86_FEATURE_IBT & 31))
-#endif
-
-#ifdef CONFIG_X86_FRED
-# define DISABLE_FRED 0
-#else
-# define DISABLE_FRED (1 << (X86_FEATURE_FRED & 31))
-#endif
-
-#ifdef CONFIG_KVM_AMD_SEV
-#define DISABLE_SEV_SNP 0
-#else
-#define DISABLE_SEV_SNP (1 << (X86_FEATURE_SEV_SNP & 31))
-#endif
-
-/*
- * Make sure to add features to the correct mask
- */
-#define DISABLED_MASK0 (DISABLE_VME)
-#define DISABLED_MASK1 0
-#define DISABLED_MASK2 0
-#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
-#define DISABLED_MASK4 (DISABLE_PCID)
-#define DISABLED_MASK5 0
-#define DISABLED_MASK6 0
-#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST)
-#define DISABLED_MASK9 (DISABLE_SGX)
-#define DISABLED_MASK10 0
-#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
- DISABLE_CALL_DEPTH_TRACKING|DISABLE_USER_SHSTK)
-#define DISABLED_MASK12 (DISABLE_FRED|DISABLE_LAM)
-#define DISABLED_MASK13 0
-#define DISABLED_MASK14 0
-#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
- DISABLE_ENQCMD)
-#define DISABLED_MASK17 0
-#define DISABLED_MASK18 (DISABLE_IBT)
-#define DISABLED_MASK19 (DISABLE_SEV_SNP)
-#define DISABLED_MASK20 0
-#define DISABLED_MASK21 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index 430fca13bb56..302e11b15da8 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_DWARF2_H
#define _ASM_X86_DWARF2_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#warning "asm/dwarf2.h should be only included in pure assembly files"
#endif
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index e8f58ddd06d9..c83645d5b2a8 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -17,6 +17,7 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
+extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
extern void e820__print_table(char *who);
extern int e820__update_table(struct e820_table *table);
@@ -28,7 +29,6 @@ extern unsigned long e820__end_of_low_ram_pfn(void);
extern u64 e820__memblock_alloc_reserved(u64 size, u64 align);
extern void e820__memblock_setup(void);
-extern void e820__reserve_setup_data(void);
extern void e820__finish_early_params(void);
extern void e820__reserve_resources(void);
extern void e820__reserve_resources_late(void);
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
index 314f75d886d0..80c4a7266629 100644
--- a/arch/x86/include/asm/e820/types.h
+++ b/arch/x86/include/asm/e820/types.h
@@ -35,15 +35,6 @@ enum e820_type {
* marking it with the IORES_DESC_SOFT_RESERVED designation.
*/
E820_TYPE_SOFT_RESERVED = 0xefffffff,
-
- /*
- * Reserved RAM used by the kernel itself if
- * CONFIG_INTEL_TXT=y is enabled, memory of this type
- * will be included in the S3 integrity calculation
- * and so should not include any memory that the BIOS
- * might alter over the S3 transition:
- */
- E820_TYPE_RESERVED_KERN = 128,
};
/*
diff --git a/arch/x86/include/asm/edac.h b/arch/x86/include/asm/edac.h
index 426fc53ff803..dfbd1ebb9f10 100644
--- a/arch/x86/include/asm/edac.h
+++ b/arch/x86/include/asm/edac.h
@@ -13,7 +13,7 @@ static inline void edac_atomic_scrub(void *va, u32 size)
* are interrupt, DMA and SMP safe.
*/
for (i = 0; i < size / 4; i++, virt_addr++)
- asm volatile("lock; addl $0, %0"::"m" (*virt_addr));
+ asm volatile("lock addl $0, %0"::"m" (*virt_addr));
}
#endif /* _ASM_X86_EDAC_H */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 1dc600fa3ba5..f227a70ac91f 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -229,7 +229,8 @@ static inline bool efi_is_native(void)
static inline void *efi64_zero_upper(void *p)
{
- ((u32 *)p)[1] = 0;
+ if (p)
+ ((u32 *)p)[1] = 0;
return p;
}
@@ -249,6 +250,9 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_allocate_pool(type, size, buffer) \
((type), (size), efi64_zero_upper(buffer))
+#define __efi64_argmap_locate_handle_buffer(type, proto, key, num, buf) \
+ ((type), (proto), (key), efi64_zero_upper(num), efi64_zero_upper(buf))
+
#define __efi64_argmap_create_event(type, tpl, f, c, event) \
((type), (tpl), (f), (c), efi64_zero_upper(event))
@@ -315,6 +319,10 @@ static inline u32 efi64_convert_status(efi_status_t status)
#define __efi64_argmap_clear_memory_attributes(protocol, phys, size, flags) \
((protocol), __efi64_split(phys), __efi64_split(size), __efi64_split(flags))
+/* EFI SMBIOS protocol */
+#define __efi64_argmap_get_next(protocol, smbioshandle, type, record, phandle) \
+ ((protocol), (smbioshandle), (type), efi64_zero_upper(record), \
+ efi64_zero_upper(phandle))
/*
* The macros below handle the plumbing for the argument mapping. To add a
* mapping for a specific EFI method, simply define a macro
@@ -384,24 +392,8 @@ static inline void efi_reserve_boot_services(void)
}
#endif /* CONFIG_EFI */
-#ifdef CONFIG_EFI_FAKE_MEMMAP
-extern void __init efi_fake_memmap_early(void);
-extern void __init efi_fake_memmap(void);
-#else
-static inline void efi_fake_memmap_early(void)
-{
-}
-
-static inline void efi_fake_memmap(void)
-{
-}
-#endif
-
extern int __init efi_memmap_alloc(unsigned int num_entries,
struct efi_memory_map_data *data);
-extern void __efi_memmap_free(u64 phys, unsigned long size,
- unsigned long flags);
-#define __efi_memmap_free __efi_memmap_free
extern int __init efi_memmap_install(struct efi_memory_map_data *data);
extern int __init efi_memmap_split_count(efi_memory_desc_t *md,
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1fb83d47711f..128602612eca 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -54,8 +54,9 @@ typedef struct user_i387_struct elf_fpregset_t;
#define R_X86_64_GLOB_DAT 6 /* Create GOT entry */
#define R_X86_64_JUMP_SLOT 7 /* Create PLT entry */
#define R_X86_64_RELATIVE 8 /* Adjust by program base */
-#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative
- offset to GOT */
+#define R_X86_64_GOTPCREL 9 /* 32 bit signed pc relative offset to GOT */
+#define R_X86_64_GOTPCRELX 41
+#define R_X86_64_REX_GOTPCRELX 42
#define R_X86_64_32 10 /* Direct 32 bit zero extended */
#define R_X86_64_32S 11 /* Direct 32 bit sign extended */
#define R_X86_64_16 12 /* Direct 16 bit zero extended */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7e523bb3d2d3..77d20555e04d 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -8,6 +8,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/fpu/api.h>
+#include <asm/fred.h>
/* Check that the stack and regs on entry from user mode are sane. */
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
@@ -44,8 +45,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
}
#define arch_enter_from_user_mode arch_enter_from_user_mode
-static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
- unsigned long ti_work)
+static inline void arch_exit_work(unsigned long ti_work)
{
if (ti_work & _TIF_USER_RETURN_NOTIFY)
fire_user_return_notifiers();
@@ -56,6 +56,15 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
fpregs_assert_state_consistent();
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
+}
+
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
+ arch_exit_work(ti_work);
+
+ fred_update_rsp0();
#ifdef CONFIG_COMPAT
/*
@@ -73,19 +82,16 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
#endif
/*
- * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(),
- * but not enough for x86 stack utilization comfort. To keep
- * reasonable stack head room, reduce the maximum offset to 8 bits.
- *
- * The actual entropy will be further reduced by the compiler when
- * applying stack alignment constraints (see cc_stack_align4/8 in
+ * This value will get limited by KSTACK_OFFSET_MAX(), which is 10
+ * bits. The actual entropy will be further reduced by the compiler
+ * when applying stack alignment constraints (see cc_stack_align4/8 in
* arch/x86/Makefile), which will remove the 3 (x86_64) or 2 (ia32)
* low bits from any entropy chosen here.
*
- * Therefore, final stack offset entropy will be 5 (x86_64) or
- * 6 (ia32) bits.
+ * Therefore, final stack offset entropy will be 7 (x86_64) or
+ * 8 (ia32) bits.
*/
- choose_random_kstack_offset(rdtsc() & 0xFF);
+ choose_random_kstack_offset(rdtsc());
}
#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
diff --git a/arch/x86/include/asm/extable.h b/arch/x86/include/asm/extable.h
index eeed395c3177..a0e0c6b50155 100644
--- a/arch/x86/include/asm/extable.h
+++ b/arch/x86/include/asm/extable.h
@@ -37,7 +37,6 @@ struct pt_regs;
extern int fixup_exception(struct pt_regs *regs, int trapnr,
unsigned long error_code, unsigned long fault_addr);
-extern int fixup_bug(struct pt_regs *regs, int trapnr);
extern int ex_get_fixup_type(unsigned long ip);
extern void early_fixup_exception(struct pt_regs *regs, int trapnr);
diff --git a/arch/x86/include/asm/extable_fixup_types.h b/arch/x86/include/asm/extable_fixup_types.h
index 7acf0383be80..906b0d5541e8 100644
--- a/arch/x86/include/asm/extable_fixup_types.h
+++ b/arch/x86/include/asm/extable_fixup_types.h
@@ -36,7 +36,7 @@
#define EX_TYPE_DEFAULT 1
#define EX_TYPE_FAULT 2
#define EX_TYPE_UACCESS 3
-#define EX_TYPE_COPY 4
+/* unused, was: #define EX_TYPE_COPY 4 */
#define EX_TYPE_CLEAR_FS 5
#define EX_TYPE_FPU_RESTORE 6
#define EX_TYPE_BPF 7
diff --git a/arch/x86/include/asm/fb.h b/arch/x86/include/asm/fb.h
deleted file mode 100644
index c3b9582de7ef..000000000000
--- a/arch/x86/include/asm/fb.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_FB_H
-#define _ASM_X86_FB_H
-
-#include <asm/page.h>
-
-struct fb_info;
-
-pgprot_t pgprot_framebuffer(pgprot_t prot,
- unsigned long vm_start, unsigned long vm_end,
- unsigned long offset);
-#define pgprot_framebuffer pgprot_framebuffer
-
-int fb_is_primary_device(struct fb_info *info);
-#define fb_is_primary_device fb_is_primary_device
-
-#include <asm-generic/fb.h>
-
-#endif /* _ASM_X86_FB_H */
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index d0dcefb5cc59..4519c9f35ba0 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -31,7 +31,7 @@
/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
#define FIXMAP_PMD_TOP 507
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/kernel.h>
#include <asm/apicdef.h>
#include <asm/page.h>
@@ -196,5 +196,5 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
void __early_set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_FIXMAP_H */
diff --git a/arch/x86/include/asm/fpu.h b/arch/x86/include/asm/fpu.h
new file mode 100644
index 000000000000..b2743fe19339
--- /dev/null
+++ b/arch/x86/include/asm/fpu.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023 SiFive
+ */
+
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+#include <asm/fpu/api.h>
+
+#define kernel_fpu_available() true
+
+#endif /* ! _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index a2be3aefff9f..f42de5f05e7e 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -16,10 +16,9 @@
/*
* Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
- * disables preemption so be careful if you intend to use it for long periods
- * of time.
- * If you intend to use the FPU in irq/softirq you need to check first with
- * irq_fpu_usable() if it is possible.
+ * disables preemption and softirq processing, so be careful if you intend to
+ * use it for long periods of time. Kernel-mode FPU cannot be used in all
+ * contexts -- see irq_fpu_usable() for details.
*/
/* Kernel FPU states to initialize in kernel_fpu_begin_mask() */
@@ -50,10 +49,10 @@ static inline void kernel_fpu_begin(void)
}
/*
- * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate.
- * A context switch will (and softirq might) save CPU's FPU registers to
- * fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
- * a random state.
+ * Use fpregs_lock() while editing CPU's FPU registers or fpu->fpstate, or while
+ * using the FPU in kernel mode. A context switch will (and softirq might) save
+ * CPU's FPU registers to fpu->fpstate.regs and set TIF_NEED_FPU_LOAD leaving
+ * CPU's FPU registers in a random state.
*
* local_bh_disable() protects against both preemption and soft interrupts
* on !RT kernels.
@@ -63,8 +62,6 @@ static inline void kernel_fpu_begin(void)
* preemptible. Disabling preemption is the right choice here as bottom
* half processing is always in thread context on RT kernels so it
* implicitly prevents bottom half processing as well.
- *
- * Disabling preemption also serializes against kernel_fpu_begin().
*/
static inline void fpregs_lock(void)
{
@@ -143,6 +140,9 @@ extern void fpstate_clear_xstate_component(struct fpstate *fps, unsigned int xfe
extern u64 xstate_get_guest_group_perm(void);
+extern void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+
+
/* KVM specific functions */
extern bool fpu_alloc_guest_fpstate(struct fpu_guest *gfpu);
extern void fpu_free_guest_fpstate(struct fpu_guest *gfpu);
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 611fa41711af..eccc75bc9c4f 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -29,7 +29,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
unsigned long fpu__get_fpstate_size(void);
-extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
+extern bool copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size, u32 pkru);
extern void fpu__clear_user_states(struct fpu *fpu);
extern bool fpu__restore_sig(void __user *buf, int ia32_frame);
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index ace9aa3b78a3..de16862bf230 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -2,8 +2,8 @@
/*
* FPU data structures:
*/
-#ifndef _ASM_X86_FPU_H
-#define _ASM_X86_FPU_H
+#ifndef _ASM_X86_FPU_TYPES_H
+#define _ASM_X86_FPU_TYPES_H
#include <asm/page_types.h>
@@ -591,9 +591,16 @@ struct fpu_state_config {
* even without XSAVE support, i.e. legacy features FP + SSE
*/
u64 legacy_features;
+ /*
+ * @independent_features:
+ *
+ * Features that are supported by XSAVES, but not managed as part of
+ * the FPU core, such as LBR
+ */
+ u64 independent_features;
};
/* FPU state configuration information */
extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
-#endif /* _ASM_X86_FPU_H */
+#endif /* _ASM_X86_FPU_TYPES_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index d4427b88ee12..7f39fe7980c5 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -12,10 +12,6 @@
/* Bit 63 of XCR0 is reserved for future expansion */
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
-#define XSTATE_CPUID 0x0000000d
-
-#define TILE_CPUID 0x0000001d
-
#define FXSAVE_SIZE 512
#define XSAVE_HDR_SIZE 64
diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h
index fb42659f6e98..0ab65073c1cc 100644
--- a/arch/x86/include/asm/frame.h
+++ b/arch/x86/include/asm/frame.h
@@ -11,7 +11,7 @@
#ifdef CONFIG_FRAME_POINTER
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro FRAME_BEGIN
push %_ASM_BP
@@ -51,7 +51,7 @@
.endm
#endif /* CONFIG_X86_64 */
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#define FRAME_BEGIN \
"push %" _ASM_BP "\n" \
@@ -82,18 +82,18 @@ static inline unsigned long encode_frame_pointer(struct pt_regs *regs)
#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define FRAME_OFFSET __ASM_SEL(4, 8)
#else /* !CONFIG_FRAME_POINTER */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro ENCODE_FRAME_POINTER ptregs_offset=0
.endm
-#else /* !__ASSEMBLY */
+#else /* !__ASSEMBLER__ */
#define ENCODE_FRAME_POINTER
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index e86c7ba32435..2a29e5216881 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -32,10 +32,11 @@
#define FRED_CONFIG_INT_STKLVL(l) (_AT(unsigned long, l) << 9)
#define FRED_CONFIG_ENTRYPOINT(p) _AT(unsigned long, (p))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_X86_FRED
#include <linux/kernel.h>
+#include <linux/sched/task_stack.h>
#include <asm/ptrace.h>
@@ -84,14 +85,34 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
}
void cpu_init_fred_exceptions(void);
+void cpu_init_fred_rsps(void);
void fred_complete_exception_setup(void);
+DECLARE_PER_CPU(unsigned long, fred_rsp0);
+
+static __always_inline void fred_sync_rsp0(unsigned long rsp0)
+{
+ __this_cpu_write(fred_rsp0, rsp0);
+}
+
+static __always_inline void fred_update_rsp0(void)
+{
+ unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
+
+ if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
+ wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
+ __this_cpu_write(fred_rsp0, rsp0);
+ }
+}
#else /* CONFIG_X86_FRED */
static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
static inline void cpu_init_fred_exceptions(void) { }
+static inline void cpu_init_fred_rsps(void) { }
static inline void fred_complete_exception_setup(void) { }
-static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
+static inline void fred_sync_rsp0(unsigned long rsp0) { }
+static inline void fred_update_rsp0(void) { }
#endif /* CONFIG_X86_FRED */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* ASM_X86_FRED_H */
diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h
index 9e7e8ca8e299..02f239569b93 100644
--- a/arch/x86/include/asm/fsgsbase.h
+++ b/arch/x86/include/asm/fsgsbase.h
@@ -2,7 +2,7 @@
#ifndef _ASM_FSGSBASE_H
#define _ASM_FSGSBASE_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_X86_64
@@ -80,6 +80,6 @@ extern unsigned long x86_fsgsbase_read_task(struct task_struct *task,
#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_FSGSBASE_H */
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 897cf02c20b1..93156ac4ffe0 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -2,6 +2,8 @@
#ifndef _ASM_X86_FTRACE_H
#define _ASM_X86_FTRACE_H
+#include <asm/ptrace.h>
+
#ifdef CONFIG_FUNCTION_TRACER
#ifndef CC_USING_FENTRY
# error Compiler does not support fentry?
@@ -20,9 +22,7 @@
#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif
-#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern void __fentry__(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
@@ -34,38 +34,44 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+static inline unsigned long arch_ftrace_get_symaddr(unsigned long fentry_ip)
+{
+ if (is_endbr((void*)(fentry_ip - ENDBR_INSN_SIZE)))
+ fentry_ip -= ENDBR_INSN_SIZE;
+
+ return fentry_ip;
+}
+#define ftrace_get_symaddr(fentry_ip) arch_ftrace_get_symaddr(fentry_ip)
+
#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
-struct ftrace_regs {
- struct pt_regs regs;
-};
+
+#include <linux/ftrace_regs.h>
static __always_inline struct pt_regs *
arch_ftrace_get_regs(struct ftrace_regs *fregs)
{
/* Only when FL_SAVE_REGS is set, cs will be non zero */
- if (!fregs->regs.cs)
+ if (!arch_ftrace_regs(fregs)->regs.cs)
return NULL;
- return &fregs->regs;
+ return &arch_ftrace_regs(fregs)->regs;
}
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do { \
+ (_regs)->ip = arch_ftrace_regs(fregs)->regs.ip; \
+ (_regs)->sp = arch_ftrace_regs(fregs)->regs.sp; \
+ (_regs)->cs = __KERNEL_CS; \
+ (_regs)->flags = 0; \
+ } while (0)
+
#define ftrace_regs_set_instruction_pointer(fregs, _ip) \
- do { (fregs)->regs.ip = (_ip); } while (0)
-
-#define ftrace_regs_get_instruction_pointer(fregs) \
- ((fregs)->regs.ip)
-
-#define ftrace_regs_get_argument(fregs, n) \
- regs_get_kernel_argument(&(fregs)->regs, n)
-#define ftrace_regs_get_stack_pointer(fregs) \
- kernel_stack_pointer(&(fregs)->regs)
-#define ftrace_regs_return_value(fregs) \
- regs_return_value(&(fregs)->regs)
-#define ftrace_regs_set_return_value(fregs, ret) \
- regs_set_return_value(&(fregs)->regs, ret)
-#define ftrace_override_function_with_return(fregs) \
- override_function_with_return(&(fregs)->regs)
-#define ftrace_regs_query_register_offset(name) \
- regs_query_register_offset(name)
+ do { arch_ftrace_regs(fregs)->regs.ip = (_ip); } while (0)
+
+
+static __always_inline unsigned long
+ftrace_regs_get_return_address(struct ftrace_regs *fregs)
+{
+ return *(unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+}
struct ftrace_ops;
#define ftrace_graph_func ftrace_graph_func
@@ -90,7 +96,7 @@ __arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
regs->orig_ax = addr;
}
#define arch_ftrace_set_direct_caller(fregs, addr) \
- __arch_ftrace_set_direct_caller(&(fregs)->regs, addr)
+ __arch_ftrace_set_direct_caller(&arch_ftrace_regs(fregs)->regs, addr)
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -100,11 +106,11 @@ struct dyn_arch_ftrace {
};
#endif /* CONFIG_DYNAMIC_FTRACE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_FUNCTION_TRACER */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
void prepare_ftrace_return(unsigned long ip, unsigned long *parent,
unsigned long frame_pointer);
@@ -148,26 +154,6 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs)
}
#endif /* CONFIG_FTRACE_SYSCALLS && CONFIG_IA32_EMULATION */
#endif /* !COMPILE_OFFSETS */
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-struct fgraph_ret_regs {
- unsigned long ax;
- unsigned long dx;
- unsigned long bp;
-};
-
-static inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs)
-{
- return ret_regs->ax;
-}
-
-static inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs)
-{
- return ret_regs->bp;
-}
-#endif /* ifdef CONFIG_FUNCTION_GRAPH_TRACER */
-#endif
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_FTRACE_H */
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 99d345b686fa..6e2458088800 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -48,7 +48,9 @@ do { \
static __always_inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
switch (op) {
@@ -84,7 +86,9 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
{
int ret = 0;
- if (!user_access_begin(uaddr, sizeof(u32)))
+ if (can_do_masked_user_access())
+ uaddr = masked_user_access_begin(uaddr);
+ else if (!user_access_begin(uaddr, sizeof(u32)))
return -EFAULT;
asm volatile("\n"
"1:\t" LOCK_PREFIX "cmpxchgl %3, %2\n"
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index fbc7722b87d1..f00c09ffe6a9 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -3,7 +3,6 @@
#define _ASM_X86_HARDIRQ_H
#include <linux/threads.h>
-#include <asm/current.h>
typedef struct {
#if IS_ENABLED(CONFIG_KVM_INTEL)
@@ -44,10 +43,16 @@ typedef struct {
unsigned int irq_hv_reenlightenment_count;
unsigned int hyperv_stimer0_count;
#endif
+#ifdef CONFIG_X86_POSTED_MSI
+ unsigned int posted_msi_notification_count;
+#endif
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+#ifdef CONFIG_X86_POSTED_MSI
+DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
+#endif
#define __ARCH_IRQ_STAT
#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
@@ -60,10 +65,15 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
extern u64 arch_irq_stat(void);
#define arch_irq_stat arch_irq_stat
-#define local_softirq_pending_ref pcpu_hot.softirq_pending
+DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending);
+#define local_softirq_pending_ref __softirq_pending
#if IS_ENABLED(CONFIG_KVM_INTEL)
-static inline void kvm_set_cpu_l1tf_flush_l1d(void)
+/*
+ * This function is called from noinstr interrupt contexts
+ * and must be inlined to not get instrumentation.
+ */
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void)
{
__this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1);
}
@@ -78,7 +88,7 @@ static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void)
return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d);
}
#else /* !IS_ENABLED(CONFIG_KVM_INTEL) */
-static inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
+static __always_inline void kvm_set_cpu_l1tf_flush_l1d(void) { }
#endif /* IS_ENABLED(CONFIG_KVM_INTEL) */
#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h
index 731ee7cc40a5..585bdadba47d 100644
--- a/arch/x86/include/asm/highmem.h
+++ b/arch/x86/include/asm/highmem.h
@@ -69,9 +69,6 @@ extern unsigned long highstart_pfn, highend_pfn;
arch_flush_lazy_mmu_mode(); \
} while (0)
-extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
- unsigned long end_pfn);
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_HIGHMEM_H */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index ab9f3dd87c80..ab0c78855ecb 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -84,7 +84,6 @@ extern int hpet_set_rtc_irq_bit(unsigned long bit_mask);
extern int hpet_set_alarm_time(unsigned char hrs, unsigned char min,
unsigned char sec);
extern int hpet_set_periodic_freq(unsigned long freq);
-extern int hpet_rtc_dropped_irq(void);
extern int hpet_rtc_timer_init(void);
extern irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id);
extern int hpet_register_irq_handler(rtc_irq_handler handler);
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index edebf1020e04..162ebd73a698 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -16,7 +16,7 @@
#include <asm/irq_vectors.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/percpu.h>
#include <linux/profile.h>
@@ -128,6 +128,6 @@ extern char spurious_entries_start[];
typedef struct irq_desc* vector_irq_t[NR_VECTORS];
DECLARE_PER_CPU(vector_irq_t, vector_irq);
-#endif /* !ASSEMBLY_ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_HW_IRQ_H */
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
deleted file mode 100644
index 3787d26810c1..000000000000
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ /dev/null
@@ -1,811 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-/*
- * This file contains definitions from Hyper-V Hypervisor Top-Level Functional
- * Specification (TLFS):
- * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs
- */
-
-#ifndef _ASM_X86_HYPERV_TLFS_H
-#define _ASM_X86_HYPERV_TLFS_H
-
-#include <linux/types.h>
-#include <asm/page.h>
-/*
- * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent
- * is set by CPUID(HvCpuIdFunctionVersionAndFeatures).
- */
-#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
-#define HYPERV_CPUID_INTERFACE 0x40000001
-#define HYPERV_CPUID_VERSION 0x40000002
-#define HYPERV_CPUID_FEATURES 0x40000003
-#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
-#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
-#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
-#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
-#define HYPERV_CPUID_ISOLATION_CONFIG 0x4000000C
-
-#define HYPERV_CPUID_VIRT_STACK_INTERFACE 0x40000081
-#define HYPERV_VS_INTERFACE_EAX_SIGNATURE 0x31235356 /* "VS#1" */
-
-#define HYPERV_CPUID_VIRT_STACK_PROPERTIES 0x40000082
-/* Support for the extended IOAPIC RTE format */
-#define HYPERV_VS_PROPERTIES_EAX_EXTENDED_IOAPIC_RTE BIT(2)
-
-#define HYPERV_HYPERVISOR_PRESENT_BIT 0x80000000
-#define HYPERV_CPUID_MIN 0x40000005
-#define HYPERV_CPUID_MAX 0x4000ffff
-
-/*
- * Group D Features. The bit assignments are custom to each architecture.
- * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits.
- */
-/* The MWAIT instruction is available (per section MONITOR / MWAIT) */
-#define HV_X64_MWAIT_AVAILABLE BIT(0)
-/* Guest debugging support is available */
-#define HV_X64_GUEST_DEBUGGING_AVAILABLE BIT(1)
-/* Performance Monitor support is available*/
-#define HV_X64_PERF_MONITOR_AVAILABLE BIT(2)
-/* Support for physical CPU dynamic partitioning events is available*/
-#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE BIT(3)
-/*
- * Support for passing hypercall input parameter block via XMM
- * registers is available
- */
-#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE BIT(4)
-/* Support for a virtual guest idle state is available */
-#define HV_X64_GUEST_IDLE_STATE_AVAILABLE BIT(5)
-/* Frequency MSRs available */
-#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8)
-/* Crash MSR available */
-#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10)
-/* Support for debug MSRs available */
-#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11)
-/* Support for extended gva ranges for flush hypercalls available */
-#define HV_FEATURE_EXT_GVA_RANGES_FLUSH BIT(14)
-/*
- * Support for returning hypercall output block via XMM
- * registers is available
- */
-#define HV_X64_HYPERCALL_XMM_OUTPUT_AVAILABLE BIT(15)
-/* stimer Direct Mode is available */
-#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19)
-
-/*
- * Implementation recommendations. Indicates which behaviors the hypervisor
- * recommends the OS implement for optimal performance.
- * These are HYPERV_CPUID_ENLIGHTMENT_INFO.EAX bits.
- */
-/*
- * Recommend using hypercall for address space switches rather
- * than MOV to CR3 instruction
- */
-#define HV_X64_AS_SWITCH_RECOMMENDED BIT(0)
-/* Recommend using hypercall for local TLB flushes rather
- * than INVLPG or MOV to CR3 instructions */
-#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED BIT(1)
-/*
- * Recommend using hypercall for remote TLB flushes rather
- * than inter-processor interrupts
- */
-#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED BIT(2)
-/*
- * Recommend using MSRs for accessing APIC registers
- * EOI, ICR and TPR rather than their memory-mapped counterparts
- */
-#define HV_X64_APIC_ACCESS_RECOMMENDED BIT(3)
-/* Recommend using the hypervisor-provided MSR to initiate a system RESET */
-#define HV_X64_SYSTEM_RESET_RECOMMENDED BIT(4)
-/*
- * Recommend using relaxed timing for this partition. If used,
- * the VM should disable any watchdog timeouts that rely on the
- * timely delivery of external interrupts
- */
-#define HV_X64_RELAXED_TIMING_RECOMMENDED BIT(5)
-
-/*
- * Recommend not using Auto End-Of-Interrupt feature
- */
-#define HV_DEPRECATING_AEOI_RECOMMENDED BIT(9)
-
-/*
- * Recommend using cluster IPI hypercalls.
- */
-#define HV_X64_CLUSTER_IPI_RECOMMENDED BIT(10)
-
-/* Recommend using the newer ExProcessorMasks interface */
-#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
-
-/* Indicates that the hypervisor is nested within a Hyper-V partition. */
-#define HV_X64_HYPERV_NESTED BIT(12)
-
-/* Recommend using enlightened VMCS */
-#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
-
-/* Use hypercalls for MMIO config space access */
-#define HV_X64_USE_MMIO_HYPERCALLS BIT(21)
-
-/*
- * CPU management features identification.
- * These are HYPERV_CPUID_CPU_MANAGEMENT_FEATURES.EAX bits.
- */
-#define HV_X64_START_LOGICAL_PROCESSOR BIT(0)
-#define HV_X64_CREATE_ROOT_VIRTUAL_PROCESSOR BIT(1)
-#define HV_X64_PERFORMANCE_COUNTER_SYNC BIT(2)
-#define HV_X64_RESERVED_IDENTITY_BIT BIT(31)
-
-/*
- * Virtual processor will never share a physical core with another virtual
- * processor, except for virtual processors that are reported as sibling SMT
- * threads.
- */
-#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
-
-/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
-#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
-#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
-#define HV_X64_NESTED_MSR_BITMAP BIT(19)
-
-/* Nested features #2. These are HYPERV_CPUID_NESTED_FEATURES.EBX bits. */
-#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL BIT(0)
-
-/*
- * This is specific to AMD and specifies that enlightened TLB flush is
- * supported. If guest opts in to this feature, ASID invalidations only
- * flushes gva -> hpa mapping entries. To flush the TLB entries derived
- * from NPT, hypercalls should be used (HvFlushGuestPhysicalAddressSpace
- * or HvFlushGuestPhysicalAddressList).
- */
-#define HV_X64_NESTED_ENLIGHTENED_TLB BIT(22)
-
-/* HYPERV_CPUID_ISOLATION_CONFIG.EAX bits. */
-#define HV_PARAVISOR_PRESENT BIT(0)
-
-/* HYPERV_CPUID_ISOLATION_CONFIG.EBX bits. */
-#define HV_ISOLATION_TYPE GENMASK(3, 0)
-#define HV_SHARED_GPA_BOUNDARY_ACTIVE BIT(5)
-#define HV_SHARED_GPA_BOUNDARY_BITS GENMASK(11, 6)
-
-enum hv_isolation_type {
- HV_ISOLATION_TYPE_NONE = 0,
- HV_ISOLATION_TYPE_VBS = 1,
- HV_ISOLATION_TYPE_SNP = 2,
- HV_ISOLATION_TYPE_TDX = 3
-};
-
-/* Hyper-V specific model specific registers (MSRs) */
-
-/* MSR used to identify the guest OS. */
-#define HV_X64_MSR_GUEST_OS_ID 0x40000000
-
-/* MSR used to setup pages used to communicate with the hypervisor. */
-#define HV_X64_MSR_HYPERCALL 0x40000001
-
-/* MSR used to provide vcpu index */
-#define HV_X64_MSR_VP_INDEX 0x40000002
-
-/* MSR used to reset the guest OS. */
-#define HV_X64_MSR_RESET 0x40000003
-
-/* MSR used to provide vcpu runtime in 100ns units */
-#define HV_X64_MSR_VP_RUNTIME 0x40000010
-
-/* MSR used to read the per-partition time reference counter */
-#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
-
-/* A partition's reference time stamp counter (TSC) page */
-#define HV_X64_MSR_REFERENCE_TSC 0x40000021
-
-/* MSR used to retrieve the TSC frequency */
-#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
-
-/* MSR used to retrieve the local APIC timer frequency */
-#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
-
-/* Define the virtual APIC registers */
-#define HV_X64_MSR_EOI 0x40000070
-#define HV_X64_MSR_ICR 0x40000071
-#define HV_X64_MSR_TPR 0x40000072
-#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
-
-/* Define synthetic interrupt controller model specific registers. */
-#define HV_X64_MSR_SCONTROL 0x40000080
-#define HV_X64_MSR_SVERSION 0x40000081
-#define HV_X64_MSR_SIEFP 0x40000082
-#define HV_X64_MSR_SIMP 0x40000083
-#define HV_X64_MSR_EOM 0x40000084
-#define HV_X64_MSR_SINT0 0x40000090
-#define HV_X64_MSR_SINT1 0x40000091
-#define HV_X64_MSR_SINT2 0x40000092
-#define HV_X64_MSR_SINT3 0x40000093
-#define HV_X64_MSR_SINT4 0x40000094
-#define HV_X64_MSR_SINT5 0x40000095
-#define HV_X64_MSR_SINT6 0x40000096
-#define HV_X64_MSR_SINT7 0x40000097
-#define HV_X64_MSR_SINT8 0x40000098
-#define HV_X64_MSR_SINT9 0x40000099
-#define HV_X64_MSR_SINT10 0x4000009A
-#define HV_X64_MSR_SINT11 0x4000009B
-#define HV_X64_MSR_SINT12 0x4000009C
-#define HV_X64_MSR_SINT13 0x4000009D
-#define HV_X64_MSR_SINT14 0x4000009E
-#define HV_X64_MSR_SINT15 0x4000009F
-
-/*
- * Define synthetic interrupt controller model specific registers for
- * nested hypervisor.
- */
-#define HV_X64_MSR_NESTED_SCONTROL 0x40001080
-#define HV_X64_MSR_NESTED_SVERSION 0x40001081
-#define HV_X64_MSR_NESTED_SIEFP 0x40001082
-#define HV_X64_MSR_NESTED_SIMP 0x40001083
-#define HV_X64_MSR_NESTED_EOM 0x40001084
-#define HV_X64_MSR_NESTED_SINT0 0x40001090
-
-/*
- * Synthetic Timer MSRs. Four timers per vcpu.
- */
-#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
-#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
-#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
-#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
-#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
-#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
-#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
-#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
-
-/* Hyper-V guest idle MSR */
-#define HV_X64_MSR_GUEST_IDLE 0x400000F0
-
-/* Hyper-V guest crash notification MSR's */
-#define HV_X64_MSR_CRASH_P0 0x40000100
-#define HV_X64_MSR_CRASH_P1 0x40000101
-#define HV_X64_MSR_CRASH_P2 0x40000102
-#define HV_X64_MSR_CRASH_P3 0x40000103
-#define HV_X64_MSR_CRASH_P4 0x40000104
-#define HV_X64_MSR_CRASH_CTL 0x40000105
-
-/* TSC emulation after migration */
-#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
-#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
-#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
-
-/* TSC invariant control */
-#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
-
-/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
-#define HV_EXPOSE_INVARIANT_TSC BIT_ULL(0)
-
-/*
- * To support arch-generic code calling hv_set/get_register:
- * - On x86, HV_MSR_ indicates an MSR accessed via rdmsrl/wrmsrl
- * - On ARM, HV_MSR_ indicates a VP register accessed via hypercall
- */
-#define HV_MSR_CRASH_P0 (HV_X64_MSR_CRASH_P0)
-#define HV_MSR_CRASH_P1 (HV_X64_MSR_CRASH_P1)
-#define HV_MSR_CRASH_P2 (HV_X64_MSR_CRASH_P2)
-#define HV_MSR_CRASH_P3 (HV_X64_MSR_CRASH_P3)
-#define HV_MSR_CRASH_P4 (HV_X64_MSR_CRASH_P4)
-#define HV_MSR_CRASH_CTL (HV_X64_MSR_CRASH_CTL)
-
-#define HV_MSR_VP_INDEX (HV_X64_MSR_VP_INDEX)
-#define HV_MSR_TIME_REF_COUNT (HV_X64_MSR_TIME_REF_COUNT)
-#define HV_MSR_REFERENCE_TSC (HV_X64_MSR_REFERENCE_TSC)
-
-#define HV_MSR_SINT0 (HV_X64_MSR_SINT0)
-#define HV_MSR_SVERSION (HV_X64_MSR_SVERSION)
-#define HV_MSR_SCONTROL (HV_X64_MSR_SCONTROL)
-#define HV_MSR_SIEFP (HV_X64_MSR_SIEFP)
-#define HV_MSR_SIMP (HV_X64_MSR_SIMP)
-#define HV_MSR_EOM (HV_X64_MSR_EOM)
-
-#define HV_MSR_NESTED_SCONTROL (HV_X64_MSR_NESTED_SCONTROL)
-#define HV_MSR_NESTED_SVERSION (HV_X64_MSR_NESTED_SVERSION)
-#define HV_MSR_NESTED_SIEFP (HV_X64_MSR_NESTED_SIEFP)
-#define HV_MSR_NESTED_SIMP (HV_X64_MSR_NESTED_SIMP)
-#define HV_MSR_NESTED_EOM (HV_X64_MSR_NESTED_EOM)
-#define HV_MSR_NESTED_SINT0 (HV_X64_MSR_NESTED_SINT0)
-
-#define HV_MSR_STIMER0_CONFIG (HV_X64_MSR_STIMER0_CONFIG)
-#define HV_MSR_STIMER0_COUNT (HV_X64_MSR_STIMER0_COUNT)
-
-/*
- * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and
- * there is not associated MSR address.
- */
-#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003
-#define HV_X64_VTL_MASK GENMASK(3, 0)
-
-/* Hyper-V memory host visibility */
-enum hv_mem_host_visibility {
- VMBUS_PAGE_NOT_VISIBLE = 0,
- VMBUS_PAGE_VISIBLE_READ_ONLY = 1,
- VMBUS_PAGE_VISIBLE_READ_WRITE = 3
-};
-
-/* HvCallModifySparseGpaPageHostVisibility hypercall */
-#define HV_MAX_MODIFY_GPA_REP_COUNT ((PAGE_SIZE / sizeof(u64)) - 2)
-struct hv_gpa_range_for_visibility {
- u64 partition_id;
- u32 host_visibility:2;
- u32 reserved0:30;
- u32 reserved1;
- u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT];
-} __packed;
-
-/*
- * Declare the MSR used to setup pages used to communicate with the hypervisor.
- */
-union hv_x64_msr_hypercall_contents {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 guest_physical_address:52;
- } __packed;
-};
-
-union hv_vp_assist_msr_contents {
- u64 as_uint64;
- struct {
- u64 enable:1;
- u64 reserved:11;
- u64 pfn:52;
- } __packed;
-};
-
-struct hv_reenlightenment_control {
- __u64 vector:8;
- __u64 reserved1:8;
- __u64 enabled:1;
- __u64 reserved2:15;
- __u64 target_vp:32;
-} __packed;
-
-struct hv_tsc_emulation_control {
- __u64 enabled:1;
- __u64 reserved:63;
-} __packed;
-
-struct hv_tsc_emulation_status {
- __u64 inprogress:1;
- __u64 reserved:63;
-} __packed;
-
-#define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1))
-
-#define HV_X64_MSR_CRASH_PARAMS \
- (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0))
-
-#define HV_IPI_LOW_VECTOR 0x10
-#define HV_IPI_HIGH_VECTOR 0xff
-
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
-/* Hyper-V Enlightened VMCS version mask in nested features CPUID */
-#define HV_X64_ENLIGHTENED_VMCS_VERSION 0xff
-
-#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001
-#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12
-
-/* Number of XMM registers used in hypercall input/output */
-#define HV_HYPERCALL_MAX_XMM_REGISTERS 6
-
-struct hv_nested_enlightenments_control {
- struct {
- __u32 directhypercall:1;
- __u32 reserved:31;
- } features;
- struct {
- __u32 inter_partition_comm:1;
- __u32 reserved:31;
- } hypercallControls;
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
- __u32 apic_assist;
- __u32 reserved1;
- __u32 vtl_entry_reason;
- __u32 vtl_reserved;
- __u64 vtl_ret_x64rax;
- __u64 vtl_ret_x64rcx;
- struct hv_nested_enlightenments_control nested_control;
- __u8 enlighten_vmentry;
- __u8 reserved2[7];
- __u64 current_nested_vmcs;
- __u8 synthetic_time_unhalted_timer_expired;
- __u8 reserved3[7];
- __u8 virtualization_fault_information[40];
- __u8 reserved4[8];
- __u8 intercept_message[256];
- __u8 vtl_ret_actions[256];
-} __packed;
-
-struct hv_enlightened_vmcs {
- u32 revision_id;
- u32 abort;
-
- u16 host_es_selector;
- u16 host_cs_selector;
- u16 host_ss_selector;
- u16 host_ds_selector;
- u16 host_fs_selector;
- u16 host_gs_selector;
- u16 host_tr_selector;
-
- u16 padding16_1;
-
- u64 host_ia32_pat;
- u64 host_ia32_efer;
-
- u64 host_cr0;
- u64 host_cr3;
- u64 host_cr4;
-
- u64 host_ia32_sysenter_esp;
- u64 host_ia32_sysenter_eip;
- u64 host_rip;
- u32 host_ia32_sysenter_cs;
-
- u32 pin_based_vm_exec_control;
- u32 vm_exit_controls;
- u32 secondary_vm_exec_control;
-
- u64 io_bitmap_a;
- u64 io_bitmap_b;
- u64 msr_bitmap;
-
- u16 guest_es_selector;
- u16 guest_cs_selector;
- u16 guest_ss_selector;
- u16 guest_ds_selector;
- u16 guest_fs_selector;
- u16 guest_gs_selector;
- u16 guest_ldtr_selector;
- u16 guest_tr_selector;
-
- u32 guest_es_limit;
- u32 guest_cs_limit;
- u32 guest_ss_limit;
- u32 guest_ds_limit;
- u32 guest_fs_limit;
- u32 guest_gs_limit;
- u32 guest_ldtr_limit;
- u32 guest_tr_limit;
- u32 guest_gdtr_limit;
- u32 guest_idtr_limit;
-
- u32 guest_es_ar_bytes;
- u32 guest_cs_ar_bytes;
- u32 guest_ss_ar_bytes;
- u32 guest_ds_ar_bytes;
- u32 guest_fs_ar_bytes;
- u32 guest_gs_ar_bytes;
- u32 guest_ldtr_ar_bytes;
- u32 guest_tr_ar_bytes;
-
- u64 guest_es_base;
- u64 guest_cs_base;
- u64 guest_ss_base;
- u64 guest_ds_base;
- u64 guest_fs_base;
- u64 guest_gs_base;
- u64 guest_ldtr_base;
- u64 guest_tr_base;
- u64 guest_gdtr_base;
- u64 guest_idtr_base;
-
- u64 padding64_1[3];
-
- u64 vm_exit_msr_store_addr;
- u64 vm_exit_msr_load_addr;
- u64 vm_entry_msr_load_addr;
-
- u64 cr3_target_value0;
- u64 cr3_target_value1;
- u64 cr3_target_value2;
- u64 cr3_target_value3;
-
- u32 page_fault_error_code_mask;
- u32 page_fault_error_code_match;
-
- u32 cr3_target_count;
- u32 vm_exit_msr_store_count;
- u32 vm_exit_msr_load_count;
- u32 vm_entry_msr_load_count;
-
- u64 tsc_offset;
- u64 virtual_apic_page_addr;
- u64 vmcs_link_pointer;
-
- u64 guest_ia32_debugctl;
- u64 guest_ia32_pat;
- u64 guest_ia32_efer;
-
- u64 guest_pdptr0;
- u64 guest_pdptr1;
- u64 guest_pdptr2;
- u64 guest_pdptr3;
-
- u64 guest_pending_dbg_exceptions;
- u64 guest_sysenter_esp;
- u64 guest_sysenter_eip;
-
- u32 guest_activity_state;
- u32 guest_sysenter_cs;
-
- u64 cr0_guest_host_mask;
- u64 cr4_guest_host_mask;
- u64 cr0_read_shadow;
- u64 cr4_read_shadow;
- u64 guest_cr0;
- u64 guest_cr3;
- u64 guest_cr4;
- u64 guest_dr7;
-
- u64 host_fs_base;
- u64 host_gs_base;
- u64 host_tr_base;
- u64 host_gdtr_base;
- u64 host_idtr_base;
- u64 host_rsp;
-
- u64 ept_pointer;
-
- u16 virtual_processor_id;
- u16 padding16_2[3];
-
- u64 padding64_2[5];
- u64 guest_physical_address;
-
- u32 vm_instruction_error;
- u32 vm_exit_reason;
- u32 vm_exit_intr_info;
- u32 vm_exit_intr_error_code;
- u32 idt_vectoring_info_field;
- u32 idt_vectoring_error_code;
- u32 vm_exit_instruction_len;
- u32 vmx_instruction_info;
-
- u64 exit_qualification;
- u64 exit_io_instruction_ecx;
- u64 exit_io_instruction_esi;
- u64 exit_io_instruction_edi;
- u64 exit_io_instruction_eip;
-
- u64 guest_linear_address;
- u64 guest_rsp;
- u64 guest_rflags;
-
- u32 guest_interruptibility_info;
- u32 cpu_based_vm_exec_control;
- u32 exception_bitmap;
- u32 vm_entry_controls;
- u32 vm_entry_intr_info_field;
- u32 vm_entry_exception_error_code;
- u32 vm_entry_instruction_len;
- u32 tpr_threshold;
-
- u64 guest_rip;
-
- u32 hv_clean_fields;
- u32 padding32_1;
- u32 hv_synthetic_controls;
- struct {
- u32 nested_flush_hypercall:1;
- u32 msr_bitmap:1;
- u32 reserved:30;
- } __packed hv_enlightenments_control;
- u32 hv_vp_id;
- u32 padding32_2;
- u64 hv_vm_id;
- u64 partition_assist_page;
- u64 padding64_4[4];
- u64 guest_bndcfgs;
- u64 guest_ia32_perf_global_ctrl;
- u64 guest_ia32_s_cet;
- u64 guest_ssp;
- u64 guest_ia32_int_ssp_table_addr;
- u64 guest_ia32_lbr_ctl;
- u64 padding64_5[2];
- u64 xss_exit_bitmap;
- u64 encls_exiting_bitmap;
- u64 host_ia32_perf_global_ctrl;
- u64 tsc_multiplier;
- u64 host_ia32_s_cet;
- u64 host_ssp;
- u64 host_ia32_int_ssp_table_addr;
- u64 padding64_6;
-} __packed;
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
-
-#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
-
-/*
- * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by
- * pairing it with architecturally impossible exit reasons. Bit 28 is set only
- * on SMI exits to a SMI transfer monitor (STM) and if and only if a MTF VM-Exit
- * is pending. I.e. it will never be set by hardware for non-SMI exits (there
- * are only three), nor will it ever be set unless the VMM is an STM.
- */
-#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
-
-/*
- * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose
- * SVM enlightenments to guests.
- */
-struct hv_vmcb_enlightenments {
- struct __packed hv_enlightenments_control {
- u32 nested_flush_hypercall:1;
- u32 msr_bitmap:1;
- u32 enlightened_npt_tlb: 1;
- u32 reserved:29;
- } __packed hv_enlightenments_control;
- u32 hv_vp_id;
- u64 hv_vm_id;
- u64 partition_assist_page;
- u64 reserved;
-} __packed;
-
-/*
- * Hyper-V uses the software reserved clean bit in VMCB.
- */
-#define HV_VMCB_NESTED_ENLIGHTENMENTS 31
-
-/* Synthetic VM-Exit */
-#define HV_SVM_EXITCODE_ENL 0xf0000000
-#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
-
-struct hv_partition_assist_pg {
- u32 tlb_lock_count;
-};
-
-enum hv_interrupt_type {
- HV_X64_INTERRUPT_TYPE_FIXED = 0x0000,
- HV_X64_INTERRUPT_TYPE_LOWESTPRIORITY = 0x0001,
- HV_X64_INTERRUPT_TYPE_SMI = 0x0002,
- HV_X64_INTERRUPT_TYPE_REMOTEREAD = 0x0003,
- HV_X64_INTERRUPT_TYPE_NMI = 0x0004,
- HV_X64_INTERRUPT_TYPE_INIT = 0x0005,
- HV_X64_INTERRUPT_TYPE_SIPI = 0x0006,
- HV_X64_INTERRUPT_TYPE_EXTINT = 0x0007,
- HV_X64_INTERRUPT_TYPE_LOCALINT0 = 0x0008,
- HV_X64_INTERRUPT_TYPE_LOCALINT1 = 0x0009,
- HV_X64_INTERRUPT_TYPE_MAXIMUM = 0x000A,
-};
-
-union hv_msi_address_register {
- u32 as_uint32;
- struct {
- u32 reserved1:2;
- u32 destination_mode:1;
- u32 redirection_hint:1;
- u32 reserved2:8;
- u32 destination_id:8;
- u32 msi_base:12;
- };
-} __packed;
-
-union hv_msi_data_register {
- u32 as_uint32;
- struct {
- u32 vector:8;
- u32 delivery_mode:3;
- u32 reserved1:3;
- u32 level_assert:1;
- u32 trigger_mode:1;
- u32 reserved2:16;
- };
-} __packed;
-
-/* HvRetargetDeviceInterrupt hypercall */
-union hv_msi_entry {
- u64 as_uint64;
- struct {
- union hv_msi_address_register address;
- union hv_msi_data_register data;
- } __packed;
-};
-
-struct hv_x64_segment_register {
- u64 base;
- u32 limit;
- u16 selector;
- union {
- struct {
- u16 segment_type : 4;
- u16 non_system_segment : 1;
- u16 descriptor_privilege_level : 2;
- u16 present : 1;
- u16 reserved : 4;
- u16 available : 1;
- u16 _long : 1;
- u16 _default : 1;
- u16 granularity : 1;
- } __packed;
- u16 attributes;
- };
-} __packed;
-
-struct hv_x64_table_register {
- u16 pad[3];
- u16 limit;
- u64 base;
-} __packed;
-
-struct hv_init_vp_context {
- u64 rip;
- u64 rsp;
- u64 rflags;
-
- struct hv_x64_segment_register cs;
- struct hv_x64_segment_register ds;
- struct hv_x64_segment_register es;
- struct hv_x64_segment_register fs;
- struct hv_x64_segment_register gs;
- struct hv_x64_segment_register ss;
- struct hv_x64_segment_register tr;
- struct hv_x64_segment_register ldtr;
-
- struct hv_x64_table_register idtr;
- struct hv_x64_table_register gdtr;
-
- u64 efer;
- u64 cr0;
- u64 cr3;
- u64 cr4;
- u64 msr_cr_pat;
-} __packed;
-
-union hv_input_vtl {
- u8 as_uint8;
- struct {
- u8 target_vtl: 4;
- u8 use_target_vtl: 1;
- u8 reserved_z: 3;
- };
-} __packed;
-
-struct hv_enable_vp_vtl {
- u64 partition_id;
- u32 vp_index;
- union hv_input_vtl target_vtl;
- u8 mbz0;
- u16 mbz1;
- struct hv_init_vp_context vp_context;
-} __packed;
-
-struct hv_get_vp_from_apic_id_in {
- u64 partition_id;
- union hv_input_vtl target_vtl;
- u8 res[7];
- u32 apic_ids[];
-} __packed;
-
-#include <asm-generic/hyperv-tlfs.h>
-
-#endif
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index 4212c00c9708..9d69f3f8dbab 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -56,17 +56,6 @@ struct stat64 {
unsigned long long st_ino;
} __attribute__((packed));
-#define IA32_STACK_TOP IA32_PAGE_OFFSET
-
-#ifdef __KERNEL__
-struct linux_binprm;
-extern int ia32_setup_arg_pages(struct linux_binprm *bprm,
- unsigned long stack_top, int exec_stack);
-struct mm_struct;
-extern void ia32_pick_mmap_layout(struct mm_struct *mm);
-
-#endif
-
extern bool __ia32_enabled;
static __always_inline bool ia32_enabled(void)
diff --git a/arch/x86/include/asm/ia32_unistd.h b/arch/x86/include/asm/ia32_unistd.h
deleted file mode 100644
index aa065c94ccf5..000000000000
--- a/arch/x86/include/asm/ia32_unistd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_IA32_UNISTD_H
-#define _ASM_X86_IA32_UNISTD_H
-
-/*
- * This file contains the system call numbers of the ia32 compat ABI,
- * this is for the kernel only.
- */
-#define __SYSCALL_ia32_NR(x) (x)
-#include <asm/unistd_32_ia32.h>
-
-#endif /* _ASM_X86_IA32_UNISTD_H */
diff --git a/arch/x86/include/asm/ibt.h b/arch/x86/include/asm/ibt.h
index 1e59581d500c..28d845257303 100644
--- a/arch/x86/include/asm/ibt.h
+++ b/arch/x86/include/asm/ibt.h
@@ -21,7 +21,7 @@
#define HAS_KERNEL_IBT 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_X86_64
#define ASM_ENDBR "endbr64\n\t"
@@ -41,7 +41,7 @@
_ASM_PTR fname "\n\t" \
".popsection\n\t"
-static inline __attribute_const__ u32 gen_endbr(void)
+static __always_inline __attribute_const__ u32 gen_endbr(void)
{
u32 endbr;
@@ -56,7 +56,7 @@ static inline __attribute_const__ u32 gen_endbr(void)
return endbr;
}
-static inline __attribute_const__ u32 gen_endbr_poison(void)
+static __always_inline __attribute_const__ u32 gen_endbr_poison(void)
{
/*
* 4 byte NOP that isn't NOP4 (in fact it is OSP NOP3), such that it
@@ -65,19 +65,24 @@ static inline __attribute_const__ u32 gen_endbr_poison(void)
return 0x001f0f66; /* osp nopl (%rax) */
}
-static inline bool is_endbr(u32 val)
+static inline bool __is_endbr(u32 val)
{
if (val == gen_endbr_poison())
return true;
+ /* See cfi_fineibt_bhi_preamble() */
+ if (IS_ENABLED(CONFIG_FINEIBT_BHI) && val == 0x001f0ff5)
+ return true;
+
val &= ~0x01000000U; /* ENDBR32 -> ENDBR64 */
return val == gen_endbr();
}
+extern __noendbr bool is_endbr(u32 *val);
extern __noendbr u64 ibt_save(bool disable);
extern __noendbr void ibt_restore(u64 save);
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#ifdef CONFIG_X86_64
#define ENDBR endbr64
@@ -85,29 +90,29 @@ extern __noendbr void ibt_restore(u64 save);
#define ENDBR endbr32
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#else /* !IBT */
#define HAS_KERNEL_IBT 0
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define ASM_ENDBR
#define IBT_NOSEAL(name)
#define __noendbr
-static inline bool is_endbr(u32 val) { return false; }
+static inline bool is_endbr(u32 *val) { return false; }
static inline u64 ibt_save(bool disable) { return 0; }
static inline void ibt_restore(u64 save) { }
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define ENDBR
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_X86_KERNEL_IBT */
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 749c7411d2f1..a4ec27c67988 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -7,7 +7,7 @@
#define IDT_ALIGN (8 * (1 + HAS_KERNEL_IBT))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/entry-common.h>
#include <linux/hardirq.h>
@@ -212,8 +212,8 @@ __visible noinstr void func(struct pt_regs *regs, \
irqentry_state_t state = irqentry_enter(regs); \
u32 vector = (u32)(u8)error_code; \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_irq_on_irqstack_cond(__##func, regs, vector); \
instrumentation_end(); \
irqentry_exit(regs, state); \
@@ -250,7 +250,6 @@ static void __##func(struct pt_regs *regs); \
\
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
- kvm_set_cpu_l1tf_flush_l1d(); \
run_sysvec_on_irqstack_cond(__##func, regs); \
} \
\
@@ -258,6 +257,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
@@ -288,7 +288,6 @@ static __always_inline void __##func(struct pt_regs *regs); \
static __always_inline void instr_##func(struct pt_regs *regs) \
{ \
__irq_enter_raw(); \
- kvm_set_cpu_l1tf_flush_l1d(); \
__##func (regs); \
__irq_exit_raw(); \
} \
@@ -297,6 +296,7 @@ __visible noinstr void func(struct pt_regs *regs) \
{ \
irqentry_state_t state = irqentry_enter(regs); \
\
+ kvm_set_cpu_l1tf_flush_l1d(); \
instrumentation_begin(); \
instr_##func (regs); \
instrumentation_end(); \
@@ -474,7 +474,7 @@ static inline void fred_install_sysvec(unsigned int vector, const idtentry_t fun
idt_install_sysvec(vector, asm_##function); \
}
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
/*
* The ASM variants for DECLARE_IDTENTRY*() which emit the ASM entry stubs.
@@ -579,7 +579,7 @@ SYM_CODE_START(spurious_entries_start)
SYM_CODE_END(spurious_entries_start)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* The actual entry points. Note that DECLARE_IDTENTRY*() serves two
@@ -751,6 +751,12 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
# define fred_sysvec_kvm_posted_intr_nested_ipi NULL
#endif
+# ifdef CONFIG_X86_POSTED_MSI
+DECLARE_IDTENTRY_SYSVEC(POSTED_MSI_NOTIFICATION_VECTOR, sysvec_posted_msi_notification);
+#else
+# define fred_sysvec_posted_msi_notification NULL
+# endif
+
#if IS_ENABLED(CONFIG_HYPERV)
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index b56c5741581a..53e4015242b4 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -35,6 +35,8 @@
#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
#define INAT_PFX_EVEX 15 /* EVEX prefix */
+/* x86-64 REX2 prefix */
+#define INAT_PFX_REX2 16 /* 0xD5 */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@@ -50,7 +52,7 @@
/* Legacy prefix */
#define INAT_PFX_OFFS 0
-#define INAT_PFX_BITS 4
+#define INAT_PFX_BITS 5
#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
/* Escape opcodes */
@@ -77,6 +79,9 @@
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
+#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
+#define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9))
+#define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10))
/* Attribute making macros for attribute tables */
#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
@@ -128,6 +133,11 @@ static inline int inat_is_rex_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
}
+static inline int inat_is_rex2_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX2;
+}
+
static inline int inat_last_prefix_id(insn_attr_t attr)
{
if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
@@ -227,4 +237,9 @@ static inline int inat_must_evex(insn_attr_t attr)
{
return attr & INAT_EVEXONLY;
}
+
+static inline int inat_evex_scalable(insn_attr_t attr)
+{
+ return attr & INAT_EVEX_SCALABLE;
+}
#endif
diff --git a/arch/x86/include/asm/init.h b/arch/x86/include/asm/init.h
index cc9ccf61b6bd..8b1b1abcef15 100644
--- a/arch/x86/include/asm/init.h
+++ b/arch/x86/include/asm/init.h
@@ -2,10 +2,15 @@
#ifndef _ASM_X86_INIT_H
#define _ASM_X86_INIT_H
-#define __head __section(".head.text")
+#if defined(CONFIG_CC_IS_CLANG) && CONFIG_CLANG_VERSION < 170000
+#define __head __section(".head.text") __no_sanitize_undefined __no_stack_protector
+#else
+#define __head __section(".head.text") __no_sanitize_undefined
+#endif
struct x86_mapping_info {
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
+ void (*free_pgt_page)(void *, void *); /* free buf for page table */
void *context; /* context for alloc_pgt_page */
unsigned long page_flag; /* page flag for PMD or PUD entry */
unsigned long offset; /* ident mapping offset */
@@ -16,4 +21,6 @@ struct x86_mapping_info {
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long pstart, unsigned long pend);
+void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
+
#endif /* _ASM_X86_INIT_H */
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 1b29f58f730f..7152ea809e6a 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -112,10 +112,15 @@ struct insn {
#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
#define X86_SIB_BASE(sib) ((sib) & 0x07)
-#define X86_REX_W(rex) ((rex) & 8)
-#define X86_REX_R(rex) ((rex) & 4)
-#define X86_REX_X(rex) ((rex) & 2)
-#define X86_REX_B(rex) ((rex) & 1)
+#define X86_REX2_M(rex) ((rex) & 0x80) /* REX2 M0 */
+#define X86_REX2_R(rex) ((rex) & 0x40) /* REX2 R4 */
+#define X86_REX2_X(rex) ((rex) & 0x20) /* REX2 X4 */
+#define X86_REX2_B(rex) ((rex) & 0x10) /* REX2 B4 */
+
+#define X86_REX_W(rex) ((rex) & 8) /* REX or REX2 W */
+#define X86_REX_R(rex) ((rex) & 4) /* REX or REX2 R3 */
+#define X86_REX_X(rex) ((rex) & 2) /* REX or REX2 X3 */
+#define X86_REX_B(rex) ((rex) & 1) /* REX or REX2 B3 */
/* VEX bit flags */
#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
@@ -161,6 +166,18 @@ static inline void insn_get_attribute(struct insn *insn)
/* Instruction uses RIP-relative addressing */
extern int insn_rip_relative(struct insn *insn);
+static inline int insn_is_rex2(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return insn->rex_prefix.nbytes == 2;
+}
+
+static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
+{
+ return X86_REX2_M(insn->rex_prefix.bytes[1]);
+}
+
static inline int insn_is_avx(struct insn *insn)
{
if (!insn->prefixes.got)
@@ -198,6 +215,13 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
return X86_VEX_P(insn->vex_prefix.bytes[2]);
}
+static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes < 3)
+ return 0;
+ return X86_VEX_W(insn->vex_prefix.bytes[2]);
+}
+
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
diff --git a/arch/x86/include/asm/inst.h b/arch/x86/include/asm/inst.h
index 438ccd4f3cc4..e48a00b3311d 100644
--- a/arch/x86/include/asm/inst.h
+++ b/arch/x86/include/asm/inst.h
@@ -6,7 +6,7 @@
#ifndef X86_ASM_INST_H
#define X86_ASM_INST_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define REG_NUM_INVALID 100
diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
index d0941f4c2724..be10c188614f 100644
--- a/arch/x86/include/asm/intel-family.h
+++ b/arch/x86/include/asm/intel-family.h
@@ -10,7 +10,7 @@
* that group keep the CPUID for the variants sorted by model number.
*
* The defined symbol names have the following form:
- * INTEL_FAM6{OPTFAMILY}_{MICROARCH}{OPTDIFF}
+ * INTEL_{OPTFAMILY}_{MICROARCH}{OPTDIFF}
* where:
* OPTFAMILY Describes the family of CPUs that this belongs to. Default
* is assumed to be "_CORE" (and should be omitted). Other values
@@ -40,137 +40,184 @@
* their own names :-(
*/
-/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
-#define INTEL_FAM6_ANY X86_MODEL_ANY
+#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model)
-#define INTEL_FAM6_CORE_YONAH 0x0E
+/* Wildcard match so X86_MATCH_VFM(ANY) works */
+#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
-#define INTEL_FAM6_CORE2_MEROM 0x0F
-#define INTEL_FAM6_CORE2_MEROM_L 0x16
-#define INTEL_FAM6_CORE2_PENRYN 0x17
-#define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
-
-#define INTEL_FAM6_NEHALEM 0x1E
-#define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
-#define INTEL_FAM6_NEHALEM_EP 0x1A
-#define INTEL_FAM6_NEHALEM_EX 0x2E
-
-#define INTEL_FAM6_WESTMERE 0x25
-#define INTEL_FAM6_WESTMERE_EP 0x2C
-#define INTEL_FAM6_WESTMERE_EX 0x2F
-
-#define INTEL_FAM6_SANDYBRIDGE 0x2A
-#define INTEL_FAM6_SANDYBRIDGE_X 0x2D
-#define INTEL_FAM6_IVYBRIDGE 0x3A
-#define INTEL_FAM6_IVYBRIDGE_X 0x3E
-
-#define INTEL_FAM6_HASWELL 0x3C
-#define INTEL_FAM6_HASWELL_X 0x3F
-#define INTEL_FAM6_HASWELL_L 0x45
-#define INTEL_FAM6_HASWELL_G 0x46
-
-#define INTEL_FAM6_BROADWELL 0x3D
-#define INTEL_FAM6_BROADWELL_G 0x47
-#define INTEL_FAM6_BROADWELL_X 0x4F
-#define INTEL_FAM6_BROADWELL_D 0x56
-
-#define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */
-#define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */
-#define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */
+/* Family 5 */
+#define INTEL_FAM5_START IFM(5, 0x00) /* Notational marker, also P5 A-step */
+#define INTEL_PENTIUM_75 IFM(5, 0x02) /* P54C */
+#define INTEL_PENTIUM_MMX IFM(5, 0x04) /* P55C */
+#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
+
+/* Family 6 */
+#define INTEL_PENTIUM_PRO IFM(6, 0x01)
+#define INTEL_PENTIUM_II_KLAMATH IFM(6, 0x03)
+#define INTEL_PENTIUM_III_DESCHUTES IFM(6, 0x05)
+#define INTEL_PENTIUM_III_TUALATIN IFM(6, 0x0B)
+#define INTEL_PENTIUM_M_DOTHAN IFM(6, 0x0D)
+
+#define INTEL_CORE_YONAH IFM(6, 0x0E)
+
+#define INTEL_CORE2_MEROM IFM(6, 0x0F)
+#define INTEL_CORE2_MEROM_L IFM(6, 0x16)
+#define INTEL_CORE2_PENRYN IFM(6, 0x17)
+#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D)
+
+#define INTEL_NEHALEM IFM(6, 0x1E)
+#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */
+#define INTEL_NEHALEM_EP IFM(6, 0x1A)
+#define INTEL_NEHALEM_EX IFM(6, 0x2E)
+
+#define INTEL_WESTMERE IFM(6, 0x25)
+#define INTEL_WESTMERE_EP IFM(6, 0x2C)
+#define INTEL_WESTMERE_EX IFM(6, 0x2F)
+
+#define INTEL_SANDYBRIDGE IFM(6, 0x2A)
+#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D)
+#define INTEL_IVYBRIDGE IFM(6, 0x3A)
+#define INTEL_IVYBRIDGE_X IFM(6, 0x3E)
+
+#define INTEL_HASWELL IFM(6, 0x3C)
+#define INTEL_HASWELL_X IFM(6, 0x3F)
+#define INTEL_HASWELL_L IFM(6, 0x45)
+#define INTEL_HASWELL_G IFM(6, 0x46)
+
+#define INTEL_BROADWELL IFM(6, 0x3D)
+#define INTEL_BROADWELL_G IFM(6, 0x47)
+#define INTEL_BROADWELL_X IFM(6, 0x4F)
+#define INTEL_BROADWELL_D IFM(6, 0x56)
+
+#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */
+#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */
+#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */
/* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */
/* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */
-#define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */
+#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */
/* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */
/* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */
/* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */
-#define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */
+#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */
/* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */
-#define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */
-#define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */
+#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */
+#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */
+
+#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */
-#define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */
+#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */
+#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */
+#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */
+#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */
+#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */
-#define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */
+#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */
-#define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */
+#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */
+#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */
-#define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
-#define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */
+#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */
-#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
+#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF) /* Raptor Cove */
-#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
+#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD) /* Redwood Cove */
+#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
-#define INTEL_FAM6_GRANITERAPIDS_X 0xAD
-#define INTEL_FAM6_GRANITERAPIDS_D 0xAE
+#define INTEL_BARTLETTLAKE IFM(6, 0xD7) /* Raptor Cove */
/* "Hybrid" Processors (P-Core/E-Core) */
-#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
+#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */
-#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
-#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
+#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */
+#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */
-#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */
-#define INTEL_FAM6_RAPTORLAKE_P 0xBA
-#define INTEL_FAM6_RAPTORLAKE_S 0xBF
+#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */
+#define INTEL_RAPTORLAKE_P IFM(6, 0xBA)
+#define INTEL_RAPTORLAKE_S IFM(6, 0xBF)
-#define INTEL_FAM6_METEORLAKE 0xAC
-#define INTEL_FAM6_METEORLAKE_L 0xAA
+#define INTEL_METEORLAKE IFM(6, 0xAC) /* Redwood Cove / Crestmont */
+#define INTEL_METEORLAKE_L IFM(6, 0xAA)
-#define INTEL_FAM6_ARROWLAKE_H 0xC5
-#define INTEL_FAM6_ARROWLAKE 0xC6
-#define INTEL_FAM6_ARROWLAKE_U 0xB5
+#define INTEL_ARROWLAKE_H IFM(6, 0xC5) /* Lion Cove / Skymont */
+#define INTEL_ARROWLAKE IFM(6, 0xC6)
+#define INTEL_ARROWLAKE_U IFM(6, 0xB5)
-#define INTEL_FAM6_LUNARLAKE_M 0xBD
+#define INTEL_LUNARLAKE_M IFM(6, 0xBD) /* Lion Cove / Skymont */
+
+#define INTEL_PANTHERLAKE_L IFM(6, 0xCC) /* Cougar Cove / Crestmont */
/* "Small Core" Processors (Atom/E-Core) */
-#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
-#define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
+#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */
+#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */
-#define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
-#define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
-#define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
+#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */
+#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */
+#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */
-#define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
-#define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */
-#define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
+#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */
+#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */
+#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */
+#define INTEL_ATOM_SILVERMONT_MID2 IFM(6, 0x5A) /* Anniedale */
-#define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
-#define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
-#define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */
+#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */
+#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */
-#define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
-#define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */
+#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */
+#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */
/* Note: the micro-architecture is "Goldmont Plus" */
-#define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
+#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */
-#define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */
-#define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
-#define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
+#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */
+#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */
+#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */
-#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */
+#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */
-#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
-#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
+#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */
+#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */
-#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
+#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */
/* Xeon Phi */
-#define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
-#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
+#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */
+#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */
-/* Family 5 */
-#define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
+/* Notational marker denoting the last Family 6 model */
+#define INTEL_FAM6_LAST IFM(6, 0xFF)
+
+/* Family 15 - NetBurst */
+#define INTEL_P4_WILLAMETTE IFM(15, 0x01) /* Also Xeon Foster */
+#define INTEL_P4_PRESCOTT IFM(15, 0x03)
+#define INTEL_P4_PRESCOTT_2M IFM(15, 0x04)
+#define INTEL_P4_CEDARMILL IFM(15, 0x06) /* Also Xeon Dempsey */
+
+/* Family 19 */
+#define INTEL_PANTHERCOVE_X IFM(19, 0x01) /* Diamond Rapids */
+
+/*
+ * Intel CPU core types
+ *
+ * CPUID.1AH.EAX[31:0] uniquely identifies the microarchitecture
+ * of the core. Bits 31-24 indicates its core type (Core or Atom)
+ * and Bits [23:0] indicates the native model ID of the core.
+ * Core type and native model ID are defined in below enumerations.
+ */
+enum intel_cpu_type {
+ INTEL_CPU_TYPE_UNKNOWN,
+ INTEL_CPU_TYPE_ATOM = 0x20,
+ INTEL_CPU_TYPE_CORE = 0x40,
+};
+
+enum intel_native_id {
+ INTEL_ATOM_CMT_NATIVE_ID = 0x2, /* Crestmont */
+ INTEL_ATOM_SKT_NATIVE_ID = 0x3, /* Skymont */
+};
#endif /* _ASM_X86_INTEL_FAMILY_H */
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index 2f9eeb5c3069..5dbeac48a5b9 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -9,6 +9,7 @@
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS_FMT4 8
#define MAX_PEBS_EVENTS 32
+#define MAX_PEBS_EVENTS_MASK GENMASK_ULL(MAX_PEBS_EVENTS - 1, 0)
#define MAX_FIXED_PEBS_EVENTS 16
/*
diff --git a/arch/x86/include/asm/intel_pconfig.h b/arch/x86/include/asm/intel_pconfig.h
deleted file mode 100644
index 994638ef171b..000000000000
--- a/arch/x86/include/asm/intel_pconfig.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef _ASM_X86_INTEL_PCONFIG_H
-#define _ASM_X86_INTEL_PCONFIG_H
-
-#include <asm/asm.h>
-#include <asm/processor.h>
-
-enum pconfig_target {
- INVALID_TARGET = 0,
- MKTME_TARGET = 1,
- PCONFIG_TARGET_NR
-};
-
-int pconfig_target_supported(enum pconfig_target target);
-
-enum pconfig_leaf {
- MKTME_KEY_PROGRAM = 0,
- PCONFIG_LEAF_INVALID,
-};
-
-#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
-
-/* Defines and structure for MKTME_KEY_PROGRAM of PCONFIG instruction */
-
-/* mktme_key_program::keyid_ctrl COMMAND, bits [7:0] */
-#define MKTME_KEYID_SET_KEY_DIRECT 0
-#define MKTME_KEYID_SET_KEY_RANDOM 1
-#define MKTME_KEYID_CLEAR_KEY 2
-#define MKTME_KEYID_NO_ENCRYPT 3
-
-/* mktme_key_program::keyid_ctrl ENC_ALG, bits [23:8] */
-#define MKTME_AES_XTS_128 (1 << 8)
-
-/* Return codes from the PCONFIG MKTME_KEY_PROGRAM */
-#define MKTME_PROG_SUCCESS 0
-#define MKTME_INVALID_PROG_CMD 1
-#define MKTME_ENTROPY_ERROR 2
-#define MKTME_INVALID_KEYID 3
-#define MKTME_INVALID_ENC_ALG 4
-#define MKTME_DEVICE_BUSY 5
-
-/* Hardware requires the structure to be 256 byte aligned. Otherwise #GP(0). */
-struct mktme_key_program {
- u16 keyid;
- u32 keyid_ctrl;
- u8 __rsvd[58];
- u8 key_field_1[64];
- u8 key_field_2[64];
-} __packed __aligned(256);
-
-static inline int mktme_key_program(struct mktme_key_program *key_program)
-{
- unsigned long rax = MKTME_KEY_PROGRAM;
-
- if (!pconfig_target_supported(MKTME_TARGET))
- return -ENXIO;
-
- asm volatile(PCONFIG
- : "=a" (rax), "=b" (key_program)
- : "0" (rax), "1" (key_program)
- : "memory", "cc");
-
- return rax;
-}
-
-#endif /* _ASM_X86_INTEL_PCONFIG_H */
diff --git a/arch/x86/include/asm/intel_punit_ipc.h b/arch/x86/include/asm/intel_punit_ipc.h
index ce16da719596..1f9b5d225912 100644
--- a/arch/x86/include/asm/intel_punit_ipc.h
+++ b/arch/x86/include/asm/intel_punit_ipc.h
@@ -80,17 +80,10 @@ typedef enum {
#if IS_ENABLED(CONFIG_INTEL_PUNIT_IPC)
-int intel_punit_ipc_simple_command(int cmd, int para1, int para2);
int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2, u32 *in, u32 *out);
#else
-static inline int intel_punit_ipc_simple_command(int cmd,
- int para1, int para2)
-{
- return -ENODEV;
-}
-
static inline int intel_punit_ipc_command(u32 cmd, u32 para1, u32 para2,
u32 *in, u32 *out)
{
diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h
deleted file mode 100644
index 8537f597d20a..000000000000
--- a/arch/x86/include/asm/intel_scu_ipc.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_SCU_IPC_H_
-#define _ASM_X86_INTEL_SCU_IPC_H_
-
-#include <linux/ioport.h>
-
-struct device;
-struct intel_scu_ipc_dev;
-
-/**
- * struct intel_scu_ipc_data - Data used to configure SCU IPC
- * @mem: Base address of SCU IPC MMIO registers
- * @irq: The IRQ number used for SCU (optional)
- */
-struct intel_scu_ipc_data {
- struct resource mem;
- int irq;
-};
-
-struct intel_scu_ipc_dev *
-__intel_scu_ipc_register(struct device *parent,
- const struct intel_scu_ipc_data *scu_data,
- struct module *owner);
-
-#define intel_scu_ipc_register(parent, scu_data) \
- __intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
-
-void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu);
-
-struct intel_scu_ipc_dev *
-__devm_intel_scu_ipc_register(struct device *parent,
- const struct intel_scu_ipc_data *scu_data,
- struct module *owner);
-
-#define devm_intel_scu_ipc_register(parent, scu_data) \
- __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE)
-
-struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void);
-void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu);
-struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev);
-
-int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr,
- u8 *data);
-int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr,
- u8 data);
-int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr,
- u8 *data, size_t len);
-int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr,
- u8 *data, size_t len);
-
-int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr,
- u8 data, u8 mask);
-
-int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd,
- int sub);
-int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd,
- int sub, const void *in, size_t inlen,
- size_t size, void *out, size_t outlen);
-
-static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd,
- int sub, const void *in, size_t inlen,
- void *out, size_t outlen)
-{
- return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen,
- inlen, out, outlen);
-}
-
-#endif
diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h
index 8046e70dfd7c..43b7657febca 100644
--- a/arch/x86/include/asm/intel_telemetry.h
+++ b/arch/x86/include/asm/intel_telemetry.h
@@ -10,7 +10,7 @@
#define TELEM_MAX_EVENTS_SRAM 28
#define TELEM_MAX_OS_ALLOCATED_EVENTS 20
-#include <asm/intel_scu_ipc.h>
+#include <linux/platform_data/x86/intel_scu_ipc.h>
enum telemetry_unit {
TELEM_PSS = 0,
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 294cd2a40818..e889c3bab5a2 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -42,6 +42,7 @@
#include <asm/early_ioremap.h>
#include <asm/pgtable_types.h>
#include <asm/shared/io.h>
+#include <asm/special_insns.h>
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
@@ -151,11 +152,6 @@ static inline void *phys_to_virt(phys_addr_t address)
#define phys_to_virt phys_to_virt
/*
- * Change "struct page" to physical address.
- */
-#define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
-
-/*
* ISA I/O bus memory addresses are 1:1 with the physical address.
* However, we truncate the address to unsigned int to avoid undesirable
* promotions in legacy drivers.
@@ -174,11 +170,14 @@ extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size);
#define ioremap_uc ioremap_uc
extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
#define ioremap_cache ioremap_cache
-extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val);
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, pgprot_t prot);
#define ioremap_prot ioremap_prot
extern void __iomem *ioremap_encrypted(resource_size_t phys_addr, unsigned long size);
#define ioremap_encrypted ioremap_encrypted
+void *arch_memremap_wb(phys_addr_t phys_addr, size_t size, unsigned long flags);
+#define arch_memremap_wb arch_memremap_wb
+
/**
* ioremap - map bus memory into CPU space
* @offset: bus address of the memory
@@ -209,6 +208,23 @@ void memset_io(volatile void __iomem *, int, size_t);
#define memcpy_toio memcpy_toio
#define memset_io memset_io
+#ifdef CONFIG_X86_64
+/*
+ * Commit 0f07496144c2 ("[PATCH] Add faster __iowrite32_copy routine for
+ * x86_64") says that circa 2006 rep movsl is noticeably faster than a copy
+ * loop.
+ */
+static inline void __iowrite32_copy(void __iomem *to, const void *from,
+ size_t count)
+{
+ asm volatile("rep ; movsl"
+ : "=&c"(count), "=&D"(to), "=&S"(from)
+ : "0"(count), "1"(to), "2"(from)
+ : "memory");
+}
+#define __iowrite32_copy __iowrite32_copy
+#endif
+
/*
* ISA space is 'always mapped' on a typical x86 system, no need to
* explicitly ioremap() it. The fact that the ISA IO space is mapped
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h
index af7541c11821..8ace6559d399 100644
--- a/arch/x86/include/asm/iosf_mbi.h
+++ b/arch/x86/include/asm/iosf_mbi.h
@@ -168,13 +168,6 @@ void iosf_mbi_unblock_punit_i2c_access(void);
int iosf_mbi_register_pmic_bus_access_notifier(struct notifier_block *nb);
/**
- * iosf_mbi_register_pmic_bus_access_notifier - Unregister PMIC bus notifier
- *
- * @nb: notifier_block to unregister
- */
-int iosf_mbi_unregister_pmic_bus_access_notifier(struct notifier_block *nb);
-
-/**
* iosf_mbi_unregister_pmic_bus_access_notifier_unlocked - Unregister PMIC bus
* notifier, unlocked
*
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h
index 7a2ed154a5e1..5036f13ab69f 100644
--- a/arch/x86/include/asm/irq_remapping.h
+++ b/arch/x86/include/asm/irq_remapping.h
@@ -50,6 +50,13 @@ static inline struct irq_domain *arch_get_ir_parent_domain(void)
return x86_vector_domain;
}
+extern bool enable_posted_msi;
+
+static inline bool posted_msi_supported(void)
+{
+ return enable_posted_msi && irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
#else /* CONFIG_IRQ_REMAP */
static inline bool irq_remapping_cap(enum irq_remap_cap cap) { return 0; }
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 798183867d78..735c3a491f60 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -100,8 +100,8 @@
}
#define ASM_CALL_ARG0 \
- "call %P[__func] \n" \
- ASM_REACHABLE
+ "1: call %c[__func] \n" \
+ ANNOTATE_REACHABLE(1b)
#define ASM_CALL_ARG1 \
"movq %[arg1], %%rdi \n" \
@@ -116,7 +116,7 @@
ASM_CALL_ARG2
#define call_on_irqstack(func, asm_call, argconstr...) \
- call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \
+ call_on_stack(__this_cpu_read(hardirq_stack_ptr), \
func, asm_call, argconstr)
/* Macros to assert type correctness for run_*_on_irqstack macros */
@@ -135,7 +135,7 @@
* User mode entry and interrupt on the irq stack do not \
* switch stacks. If from user mode the task stack is empty. \
*/ \
- if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \
+ if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \
irq_enter_rcu(); \
func(c_args); \
irq_exit_rcu(); \
@@ -146,9 +146,9 @@
* places. Invoke the stack switch macro with the call \
* sequence which matches the above direct invocation. \
*/ \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
+ __this_cpu_write(hardirq_stack_inuse, true); \
call_on_irqstack(func, asm_call, constr); \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
+ __this_cpu_write(hardirq_stack_inuse, false); \
} \
}
@@ -212,9 +212,9 @@
*/
#define do_softirq_own_stack() \
{ \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \
+ __this_cpu_write(hardirq_stack_inuse, true); \
call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \
- __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \
+ __this_cpu_write(hardirq_stack_inuse, false); \
}
#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index d18bfb238f66..47051871b436 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... LOCAL_TIMER_VECTOR-1
- * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
+ * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts
+ * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
@@ -97,10 +97,16 @@
#define LOCAL_TIMER_VECTOR 0xec
+/*
+ * Posted interrupt notification vector for all device MSIs delivered to
+ * the host kernel.
+ */
+#define POSTED_MSI_NOTIFICATION_VECTOR 0xeb
+
#define NR_VECTORS 256
#ifdef CONFIG_X86_LOCAL_APIC
-#define FIRST_SYSTEM_VECTOR LOCAL_TIMER_VECTOR
+#define FIRST_SYSTEM_VECTOR POSTED_MSI_NOTIFICATION_VECTOR
#else
#define FIRST_SYSTEM_VECTOR NR_VECTORS
#endif
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 8c5ae649d2df..9a9b21b78905 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -4,7 +4,7 @@
#include <asm/processor-flags.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/nospec-branch.h>
@@ -54,29 +54,30 @@ static __always_inline void native_halt(void)
asm volatile("hlt": : :"memory");
}
-#endif
-
-#ifdef CONFIG_PARAVIRT_XXL
-#include <asm/paravirt.h>
-#else
-#ifndef __ASSEMBLY__
-#include <linux/types.h>
-
-static __always_inline unsigned long arch_local_save_flags(void)
+static __always_inline int native_irqs_disabled_flags(unsigned long flags)
{
- return native_save_fl();
+ return !(flags & X86_EFLAGS_IF);
}
-static __always_inline void arch_local_irq_disable(void)
+static __always_inline unsigned long native_local_irq_save(void)
{
+ unsigned long flags = native_save_fl();
+
native_irq_disable();
+
+ return flags;
}
-static __always_inline void arch_local_irq_enable(void)
+static __always_inline void native_local_irq_restore(unsigned long flags)
{
- native_irq_enable();
+ if (!native_irqs_disabled_flags(flags))
+ native_irq_enable();
}
+#endif
+
+#ifndef CONFIG_PARAVIRT
+#ifndef __ASSEMBLY__
/*
* Used in the idle loop; sti takes one instruction cycle
* to complete:
@@ -94,6 +95,29 @@ static __always_inline void halt(void)
{
native_halt();
}
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_PARAVIRT */
+
+#ifdef CONFIG_PARAVIRT_XXL
+#include <asm/paravirt.h>
+#else
+#ifndef __ASSEMBLER__
+#include <linux/types.h>
+
+static __always_inline unsigned long arch_local_save_flags(void)
+{
+ return native_save_fl();
+}
+
+static __always_inline void arch_local_irq_disable(void)
+{
+ native_irq_disable();
+}
+
+static __always_inline void arch_local_irq_enable(void)
+{
+ native_irq_enable();
+}
/*
* For spinlocks, etc:
@@ -113,10 +137,10 @@ static __always_inline unsigned long arch_local_irq_save(void)
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_PARAVIRT_XXL */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static __always_inline int arch_irqs_disabled_flags(unsigned long flags)
{
return !(flags & X86_EFLAGS_IF);
@@ -134,6 +158,6 @@ static __always_inline void arch_local_irq_restore(unsigned long flags)
if (!arch_irqs_disabled_flags(flags))
arch_local_irq_enable();
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index cbbef32517f0..61dd1dee7812 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -7,40 +7,33 @@
#include <asm/asm.h>
#include <asm/nops.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/stringify.h>
#include <linux/types.h>
-#define JUMP_TABLE_ENTRY \
+#define JUMP_TABLE_ENTRY(key, label) \
".pushsection __jump_table, \"aw\" \n\t" \
_ASM_ALIGN "\n\t" \
".long 1b - . \n\t" \
- ".long %l[l_yes] - . \n\t" \
- _ASM_PTR "%c0 + %c1 - .\n\t" \
+ ".long " label " - . \n\t" \
+ _ASM_PTR " " key " - . \n\t" \
".popsection \n\t"
+/* This macro is also expanded on the Rust side. */
#ifdef CONFIG_HAVE_JUMP_LABEL_HACK
-
-static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
-{
- asm goto("1:"
- "jmp %l[l_yes] # objtool NOPs this \n\t"
- JUMP_TABLE_ENTRY
- : : "i" (key), "i" (2 | branch) : : l_yes);
-
- return false;
-l_yes:
- return true;
-}
-
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: jmp " label " # objtool NOPs this \n\t" \
+ JUMP_TABLE_ENTRY(key " + 2", label)
#else /* !CONFIG_HAVE_JUMP_LABEL_HACK */
+#define ARCH_STATIC_BRANCH_ASM(key, label) \
+ "1: .byte " __stringify(BYTES_NOP5) "\n\t" \
+ JUMP_TABLE_ENTRY(key, label)
+#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm goto("1:"
- ".byte " __stringify(BYTES_NOP5) "\n\t"
- JUMP_TABLE_ENTRY
+ asm goto(ARCH_STATIC_BRANCH_ASM("%c0 + %c1", "%l[l_yes]")
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -48,13 +41,11 @@ l_yes:
return true;
}
-#endif /* CONFIG_HAVE_JUMP_LABEL_HACK */
-
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
asm goto("1:"
"jmp %l[l_yes]\n\t"
- JUMP_TABLE_ENTRY
+ JUMP_TABLE_ENTRY("%c0 + %c1", "%l[l_yes]")
: : "i" (key), "i" (branch) : : l_yes);
return false;
@@ -64,6 +55,6 @@ l_yes:
extern int arch_jump_entry_size(struct jump_entry *entry);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index de75306b932e..d7e33c7f096b 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -23,7 +23,7 @@
(1ULL << (__VIRTUAL_MASK_SHIFT - \
KASAN_SHADOW_SCALE_SHIFT)))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_KASAN
void __init kasan_early_init(void);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 91ca9a9ee3a2..5432457d2338 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -8,21 +8,17 @@
# define PA_PGD 2
# define PA_SWAP_PAGE 3
# define PAGES_NR 4
-#else
-# define PA_CONTROL_PAGE 0
-# define VA_CONTROL_PAGE 1
-# define PA_TABLE_PAGE 2
-# define PA_SWAP_PAGE 3
-# define PAGES_NR 4
#endif
+# define KEXEC_CONTROL_PAGE_SIZE 4096
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/string.h>
#include <linux/kernel.h>
+#include <asm/asm.h>
#include <asm/page.h>
#include <asm/ptrace.h>
@@ -43,7 +39,6 @@ struct kimage;
/* Maximum address we can use for the control code buffer */
# define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
-# define KEXEC_CONTROL_PAGE_SIZE 4096
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_386
@@ -58,11 +53,12 @@ struct kimage;
/* Maximum address we can use for the control pages */
# define KEXEC_CONTROL_MEMORY_LIMIT (MAXMEM-1)
-/* Allocate one page for the pdp and the second for the code */
-# define KEXEC_CONTROL_PAGE_SIZE (4096UL + 4096UL)
-
/* The native architecture */
# define KEXEC_ARCH KEXEC_ARCH_X86_64
+
+extern unsigned long kexec_va_control_page;
+extern unsigned long kexec_pa_table_page;
+extern unsigned long kexec_pa_swap_page;
#endif
/*
@@ -76,61 +72,52 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
if (oldregs) {
memcpy(newregs, oldregs, sizeof(*newregs));
} else {
+ asm volatile("mov %%" _ASM_BX ",%0" : "=m"(newregs->bx));
+ asm volatile("mov %%" _ASM_CX ",%0" : "=m"(newregs->cx));
+ asm volatile("mov %%" _ASM_DX ",%0" : "=m"(newregs->dx));
+ asm volatile("mov %%" _ASM_SI ",%0" : "=m"(newregs->si));
+ asm volatile("mov %%" _ASM_DI ",%0" : "=m"(newregs->di));
+ asm volatile("mov %%" _ASM_BP ",%0" : "=m"(newregs->bp));
+ asm volatile("mov %%" _ASM_AX ",%0" : "=m"(newregs->ax));
+ asm volatile("mov %%" _ASM_SP ",%0" : "=m"(newregs->sp));
+#ifdef CONFIG_X86_64
+ asm volatile("mov %%r8,%0" : "=m"(newregs->r8));
+ asm volatile("mov %%r9,%0" : "=m"(newregs->r9));
+ asm volatile("mov %%r10,%0" : "=m"(newregs->r10));
+ asm volatile("mov %%r11,%0" : "=m"(newregs->r11));
+ asm volatile("mov %%r12,%0" : "=m"(newregs->r12));
+ asm volatile("mov %%r13,%0" : "=m"(newregs->r13));
+ asm volatile("mov %%r14,%0" : "=m"(newregs->r14));
+ asm volatile("mov %%r15,%0" : "=m"(newregs->r15));
+#endif
+ asm volatile("mov %%ss,%k0" : "=a"(newregs->ss));
+ asm volatile("mov %%cs,%k0" : "=a"(newregs->cs));
#ifdef CONFIG_X86_32
- asm volatile("movl %%ebx,%0" : "=m"(newregs->bx));
- asm volatile("movl %%ecx,%0" : "=m"(newregs->cx));
- asm volatile("movl %%edx,%0" : "=m"(newregs->dx));
- asm volatile("movl %%esi,%0" : "=m"(newregs->si));
- asm volatile("movl %%edi,%0" : "=m"(newregs->di));
- asm volatile("movl %%ebp,%0" : "=m"(newregs->bp));
- asm volatile("movl %%eax,%0" : "=m"(newregs->ax));
- asm volatile("movl %%esp,%0" : "=m"(newregs->sp));
- asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
- asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
- asm volatile("movl %%ds, %%eax;" :"=a"(newregs->ds));
- asm volatile("movl %%es, %%eax;" :"=a"(newregs->es));
- asm volatile("pushfl; popl %0" :"=m"(newregs->flags));
-#else
- asm volatile("movq %%rbx,%0" : "=m"(newregs->bx));
- asm volatile("movq %%rcx,%0" : "=m"(newregs->cx));
- asm volatile("movq %%rdx,%0" : "=m"(newregs->dx));
- asm volatile("movq %%rsi,%0" : "=m"(newregs->si));
- asm volatile("movq %%rdi,%0" : "=m"(newregs->di));
- asm volatile("movq %%rbp,%0" : "=m"(newregs->bp));
- asm volatile("movq %%rax,%0" : "=m"(newregs->ax));
- asm volatile("movq %%rsp,%0" : "=m"(newregs->sp));
- asm volatile("movq %%r8,%0" : "=m"(newregs->r8));
- asm volatile("movq %%r9,%0" : "=m"(newregs->r9));
- asm volatile("movq %%r10,%0" : "=m"(newregs->r10));
- asm volatile("movq %%r11,%0" : "=m"(newregs->r11));
- asm volatile("movq %%r12,%0" : "=m"(newregs->r12));
- asm volatile("movq %%r13,%0" : "=m"(newregs->r13));
- asm volatile("movq %%r14,%0" : "=m"(newregs->r14));
- asm volatile("movq %%r15,%0" : "=m"(newregs->r15));
- asm volatile("movl %%ss, %%eax;" :"=a"(newregs->ss));
- asm volatile("movl %%cs, %%eax;" :"=a"(newregs->cs));
- asm volatile("pushfq; popq %0" :"=m"(newregs->flags));
+ asm volatile("mov %%ds,%k0" : "=a"(newregs->ds));
+ asm volatile("mov %%es,%k0" : "=a"(newregs->es));
#endif
+ asm volatile("pushf\n\t"
+ "pop %0" : "=m"(newregs->flags));
newregs->ip = _THIS_IP_;
}
}
#ifdef CONFIG_X86_32
-asmlinkage unsigned long
-relocate_kernel(unsigned long indirection_page,
- unsigned long control_page,
- unsigned long start_address,
- unsigned int has_pae,
- unsigned int preserve_context);
+typedef asmlinkage unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+ unsigned long control_page,
+ unsigned long start_address,
+ unsigned int has_pae,
+ unsigned int preserve_context);
#else
-unsigned long
-relocate_kernel(unsigned long indirection_page,
- unsigned long page_list,
- unsigned long start_address,
- unsigned int preserve_context,
- unsigned int host_mem_enc_active);
+typedef unsigned long
+relocate_kernel_fn(unsigned long indirection_page,
+ unsigned long pa_control_page,
+ unsigned long start_address,
+ unsigned int preserve_context,
+ unsigned int host_mem_enc_active);
#endif
-
+extern relocate_kernel_fn relocate_kernel;
#define ARCH_HAS_KIMAGE_ARCH
#ifdef CONFIG_X86_32
@@ -145,6 +132,19 @@ struct kimage_arch {
};
#else
struct kimage_arch {
+ /*
+ * This is a kimage control page, as it must not overlap with either
+ * source or destination address ranges.
+ */
+ pgd_t *pgd;
+ /*
+ * The virtual mapping of the control code page itself is used only
+ * during the transition, while the current kernel's pages are all
+ * in place. Thus the intermediate page table pages used to map it
+ * are not control pages, but instead just normal pages obtained
+ * with get_zeroed_page(). And have to be tracked (below) so that
+ * they can be freed.
+ */
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
@@ -207,23 +207,16 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image);
extern void kdump_nmi_shootdown_cpus(void);
#ifdef CONFIG_CRASH_HOTPLUG
-void arch_crash_handle_hotplug_event(struct kimage *image);
+void arch_crash_handle_hotplug_event(struct kimage *image, void *arg);
#define arch_crash_handle_hotplug_event arch_crash_handle_hotplug_event
-#ifdef CONFIG_HOTPLUG_CPU
-int arch_crash_hotplug_cpu_support(void);
-#define crash_hotplug_cpu_support arch_crash_hotplug_cpu_support
-#endif
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-int arch_crash_hotplug_memory_support(void);
-#define crash_hotplug_memory_support arch_crash_hotplug_memory_support
-#endif
+int arch_crash_hotplug_support(struct kimage *image, unsigned long kexec_flags);
+#define arch_crash_hotplug_support arch_crash_hotplug_support
unsigned int arch_crash_get_elfcorehdr_size(void);
#define crash_get_elfcorehdr_size arch_crash_get_elfcorehdr_size
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_KEXEC_H */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 110d7f29ca9a..823c0434bbad 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -9,14 +9,13 @@ BUILD_BUG_ON(1)
* "static_call_update()" calls.
*
* KVM_X86_OP_OPTIONAL() can be used for those functions that can have
- * a NULL definition, for example if "static_call_cond()" will be used
- * at the call sites. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
+ * a NULL definition. KVM_X86_OP_OPTIONAL_RET0() can be used likewise
* to make a definition optional, but in this case the default will
* be __static_call_return0.
*/
KVM_X86_OP(check_processor_compatibility)
-KVM_X86_OP(hardware_enable)
-KVM_X86_OP(hardware_disable)
+KVM_X86_OP(enable_virtualization_cpu)
+KVM_X86_OP(disable_virtualization_cpu)
KVM_X86_OP(hardware_unsetup)
KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
@@ -35,6 +34,7 @@ KVM_X86_OP(set_msr)
KVM_X86_OP(get_segment_base)
KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
+KVM_X86_OP(get_cpl_no_cache)
KVM_X86_OP(set_segment)
KVM_X86_OP(get_cs_db_l_bits)
KVM_X86_OP(is_valid_cr0)
@@ -48,6 +48,7 @@ KVM_X86_OP(set_idt)
KVM_X86_OP(get_gdt)
KVM_X86_OP(set_gdt)
KVM_X86_OP(sync_dirty_debug_regs)
+KVM_X86_OP(set_dr6)
KVM_X86_OP(set_dr7)
KVM_X86_OP(cache_reg)
KVM_X86_OP(get_rflags)
@@ -83,9 +84,7 @@ KVM_X86_OP(enable_nmi_window)
KVM_X86_OP(enable_irq_window)
KVM_X86_OP_OPTIONAL(update_cr8_intercept)
KVM_X86_OP(refresh_apicv_exec_ctrl)
-KVM_X86_OP_OPTIONAL(hwapic_irr_update)
KVM_X86_OP_OPTIONAL(hwapic_isr_update)
-KVM_X86_OP_OPTIONAL_RET0(guest_apic_has_interrupt)
KVM_X86_OP_OPTIONAL(load_eoi_exitmap)
KVM_X86_OP_OPTIONAL(set_virtual_apic_mode)
KVM_X86_OP_OPTIONAL(set_apic_access_page_addr)
@@ -95,15 +94,19 @@ KVM_X86_OP_OPTIONAL_RET0(set_tss_addr)
KVM_X86_OP_OPTIONAL_RET0(set_identity_map_addr)
KVM_X86_OP_OPTIONAL_RET0(get_mt_mask)
KVM_X86_OP(load_mmu_pgd)
+KVM_X86_OP_OPTIONAL(link_external_spt)
+KVM_X86_OP_OPTIONAL(set_external_spte)
+KVM_X86_OP_OPTIONAL(free_external_spt)
+KVM_X86_OP_OPTIONAL(remove_external_spte)
KVM_X86_OP(has_wbinvd_exit)
KVM_X86_OP(get_l2_tsc_offset)
KVM_X86_OP(get_l2_tsc_multiplier)
KVM_X86_OP(write_tsc_offset)
KVM_X86_OP(write_tsc_multiplier)
KVM_X86_OP(get_exit_info)
+KVM_X86_OP(get_entry_info)
KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
-KVM_X86_OP(sched_in)
KVM_X86_OP_OPTIONAL(update_cpu_dirty_logging)
KVM_X86_OP_OPTIONAL(vcpu_blocking)
KVM_X86_OP_OPTIONAL(vcpu_unblocking)
@@ -121,13 +124,14 @@ KVM_X86_OP(enter_smm)
KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
#endif
+KVM_X86_OP_OPTIONAL(dev_get_attr)
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
KVM_X86_OP_OPTIONAL(vm_copy_enc_context_from)
KVM_X86_OP_OPTIONAL(vm_move_enc_context_from)
KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
-KVM_X86_OP(get_msr_feature)
+KVM_X86_OP(get_feature_msr)
KVM_X86_OP(check_emulate_instruction)
KVM_X86_OP(apic_init_signal_blocked)
KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
@@ -138,6 +142,9 @@ KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr)
KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
+KVM_X86_OP_OPTIONAL_RET0(gmem_prepare)
+KVM_X86_OP_OPTIONAL_RET0(private_max_mapping_level)
+KVM_X86_OP_OPTIONAL(gmem_invalidate)
#undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL
diff --git a/arch/x86/include/asm/kvm-x86-pmu-ops.h b/arch/x86/include/asm/kvm-x86-pmu-ops.h
index f852b13aeefe..9159bf1a4730 100644
--- a/arch/x86/include/asm/kvm-x86-pmu-ops.h
+++ b/arch/x86/include/asm/kvm-x86-pmu-ops.h
@@ -9,8 +9,7 @@ BUILD_BUG_ON(1)
* "static_call_update()" calls.
*
* KVM_X86_PMU_OP_OPTIONAL() can be used for those functions that can have
- * a NULL definition, for example if "static_call_cond()" will be used
- * at the call sites.
+ * a NULL definition.
*/
KVM_X86_PMU_OP(rdpmc_ecx_to_pmc)
KVM_X86_PMU_OP(msr_idx_to_pmc)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6efd1497b026..7bc174a1f1cb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -24,8 +24,10 @@
#include <linux/pvclock_gtod.h>
#include <linux/clocksource.h>
#include <linux/irqbypass.h>
-#include <linux/hyperv.h>
#include <linux/kfifo.h>
+#include <linux/sched/vhost_task.h>
+#include <linux/call_once.h>
+#include <linux/atomic.h>
#include <asm/apic.h>
#include <asm/pvclock-abi.h>
@@ -33,9 +35,11 @@
#include <asm/mtrr.h>
#include <asm/msr-index.h>
#include <asm/asm.h>
+#include <asm/irq_remapping.h>
#include <asm/kvm_page_track.h>
#include <asm/kvm_vcpu_regs.h>
-#include <asm/hyperv-tlfs.h>
+#include <asm/reboot.h>
+#include <hyperv/hvhdk.h>
#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
@@ -121,6 +125,7 @@
KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HV_TLB_FLUSH \
KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(34)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -159,7 +164,6 @@
#define KVM_MIN_FREE_MMU_PAGES 5
#define KVM_REFILL_PAGES 25
#define KVM_MAX_CPUID_ENTRIES 256
-#define KVM_NR_FIXED_MTRR_REGION 88
#define KVM_NR_VAR_MTRR 8
#define ASYNC_PF_PER_VCPU 64
@@ -211,6 +215,7 @@ enum exit_fastpath_completion {
EXIT_FASTPATH_NONE,
EXIT_FASTPATH_REENTER_GUEST,
EXIT_FASTPATH_EXIT_HANDLED,
+ EXIT_FASTPATH_EXIT_USERSPACE,
};
typedef enum exit_fastpath_completion fastpath_t;
@@ -254,32 +259,31 @@ enum x86_intercept_stage;
KVM_GUESTDBG_INJECT_DB | \
KVM_GUESTDBG_BLOCKIRQ)
+#define PFERR_PRESENT_MASK BIT(0)
+#define PFERR_WRITE_MASK BIT(1)
+#define PFERR_USER_MASK BIT(2)
+#define PFERR_RSVD_MASK BIT(3)
+#define PFERR_FETCH_MASK BIT(4)
+#define PFERR_PK_MASK BIT(5)
+#define PFERR_SGX_MASK BIT(15)
+#define PFERR_GUEST_RMP_MASK BIT_ULL(31)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(32)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(33)
+#define PFERR_GUEST_ENC_MASK BIT_ULL(34)
+#define PFERR_GUEST_SIZEM_MASK BIT_ULL(35)
+#define PFERR_GUEST_VMPL_MASK BIT_ULL(36)
-#define PFERR_PRESENT_BIT 0
-#define PFERR_WRITE_BIT 1
-#define PFERR_USER_BIT 2
-#define PFERR_RSVD_BIT 3
-#define PFERR_FETCH_BIT 4
-#define PFERR_PK_BIT 5
-#define PFERR_SGX_BIT 15
-#define PFERR_GUEST_FINAL_BIT 32
-#define PFERR_GUEST_PAGE_BIT 33
-#define PFERR_IMPLICIT_ACCESS_BIT 48
-
-#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
-#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
-#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
-#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
-#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
-
-#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
- PFERR_WRITE_MASK | \
- PFERR_PRESENT_MASK)
+/*
+ * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks
+ * when emulating instructions that triggers implicit access.
+ */
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(48)
+/*
+ * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred
+ * when the guest was accessing private memory.
+ */
+#define PFERR_PRIVATE_ACCESS BIT_ULL(49)
+#define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS)
/* apic attention bits */
#define KVM_APIC_CHECK_VAPIC 0
@@ -311,10 +315,11 @@ struct kvm_kernel_irq_routing_entry;
* the number of unique SPs that can theoretically be created is 2^n, where n
* is the number of bits that are used to compute the role.
*
- * But, even though there are 19 bits in the mask below, not all combinations
+ * But, even though there are 20 bits in the mask below, not all combinations
* of modes and flags are possible:
*
- * - invalid shadow pages are not accounted, so the bits are effectively 18
+ * - invalid shadow pages are not accounted, mirror pages are not shadowed,
+ * so the bits are effectively 18.
*
* - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
* execonly and ad_disabled are only used for nested EPT which has
@@ -347,7 +352,8 @@ union kvm_mmu_page_role {
unsigned ad_disabled:1;
unsigned guest_mode:1;
unsigned passthrough:1;
- unsigned :5;
+ unsigned is_mirror:1;
+ unsigned :4;
/*
* This is left at the top of the word so that
@@ -401,7 +407,7 @@ union kvm_cpu_role {
};
struct kvm_rmap_head {
- unsigned long val;
+ atomic_long_t val;
};
struct kvm_pio_request {
@@ -455,6 +461,7 @@ struct kvm_mmu {
int (*sync_spte)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *sp, int i);
struct kvm_mmu_root_info root;
+ hpa_t mirror_root_hpa;
union kvm_cpu_role cpu_role;
union kvm_mmu_page_role root_role;
@@ -530,12 +537,16 @@ struct kvm_pmc {
};
/* More counters may conflict with other existing Architectural MSRs */
-#define KVM_INTEL_PMC_MAX_GENERIC 8
-#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
-#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
-#define KVM_PMC_MAX_FIXED 3
-#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
-#define KVM_AMD_PMC_MAX_GENERIC 6
+#define KVM_MAX(a, b) ((a) >= (b) ? (a) : (b))
+#define KVM_MAX_NR_INTEL_GP_COUNTERS 8
+#define KVM_MAX_NR_AMD_GP_COUNTERS 6
+#define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \
+ KVM_MAX_NR_AMD_GP_COUNTERS)
+
+#define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3
+#define KVM_MAX_NR_AMD_FIXED_COUTNERS 0
+#define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \
+ KVM_MAX_NR_AMD_FIXED_COUTNERS)
struct kvm_pmu {
u8 version;
@@ -543,16 +554,16 @@ struct kvm_pmu {
unsigned nr_arch_fixed_counters;
unsigned available_event_types;
u64 fixed_ctr_ctrl;
- u64 fixed_ctr_ctrl_mask;
+ u64 fixed_ctr_ctrl_rsvd;
u64 global_ctrl;
u64 global_status;
u64 counter_bitmask[2];
- u64 global_ctrl_mask;
- u64 global_status_mask;
+ u64 global_ctrl_rsvd;
+ u64 global_status_rsvd;
u64 reserved_bits;
u64 raw_event_mask;
- struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
- struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
+ struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS];
+ struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS];
/*
* Overlay the bitmap with a 64-bit atomic so that all bits can be
@@ -568,9 +579,9 @@ struct kvm_pmu {
u64 ds_area;
u64 pebs_enable;
- u64 pebs_enable_mask;
+ u64 pebs_enable_rsvd;
u64 pebs_data_cfg;
- u64 pebs_data_cfg_mask;
+ u64 pebs_data_cfg_rsvd;
/*
* If a guest counter is cross-mapped to host counter with different
@@ -601,18 +612,12 @@ enum {
KVM_DEBUGREG_WONT_EXIT = 2,
};
-struct kvm_mtrr_range {
- u64 base;
- u64 mask;
- struct list_head node;
-};
-
struct kvm_mtrr {
- struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
- mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
+ u64 var[KVM_NR_VAR_MTRR * 2];
+ u64 fixed_64k;
+ u64 fixed_16k[2];
+ u64 fixed_4k[8];
u64 deftype;
-
- struct list_head head;
};
/* Hyper-V SynIC timer */
@@ -739,6 +744,23 @@ struct kvm_queued_exception {
bool has_payload;
};
+/*
+ * Hardware-defined CPUID leafs that are either scattered by the kernel or are
+ * unknown to the kernel, but need to be directly used by KVM. Note, these
+ * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
+ */
+enum kvm_only_cpuid_leafs {
+ CPUID_12_EAX = NCAPINTS,
+ CPUID_7_1_EDX,
+ CPUID_8000_0007_EDX,
+ CPUID_8000_0022_EAX,
+ CPUID_7_2_EDX,
+ CPUID_24_0_EBX,
+ NR_KVM_CPU_CAPS,
+
+ NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
+};
+
struct kvm_vcpu_arch {
/*
* rip and regs accesses must go through
@@ -760,6 +782,7 @@ struct kvm_vcpu_arch {
u32 pkru;
u32 hflags;
u64 efer;
+ u64 host_debugctl;
u64 apic_base;
struct kvm_lapic *apic; /* kernel irqchip context */
bool load_eoi_exitmap_pending;
@@ -813,6 +836,11 @@ struct kvm_vcpu_arch {
struct kvm_mmu_memory_cache mmu_shadow_page_cache;
struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
struct kvm_mmu_memory_cache mmu_page_header_cache;
+ /*
+ * This cache is to allocate external page table. E.g. private EPT used
+ * by the TDX module.
+ */
+ struct kvm_mmu_memory_cache mmu_external_spt_cache;
/*
* QEMU userspace and the guest each have their own FPU state.
@@ -854,27 +882,24 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
- struct kvm_hypervisor_cpuid kvm_cpuid;
+ bool cpuid_dynamic_bits_dirty;
bool is_amd_compatible;
/*
- * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
- * when "struct kvm_vcpu_arch" is no longer defined in an
- * arch/x86/include/asm header. The max is mostly arbitrary, i.e.
- * can be increased as necessary.
- */
-#define KVM_MAX_NR_GOVERNED_FEATURES BITS_PER_LONG
-
- /*
- * Track whether or not the guest is allowed to use features that are
- * governed by KVM, where "governed" means KVM needs to manage state
- * and/or explicitly enable the feature in hardware. Typically, but
- * not always, governed features can be used by the guest if and only
- * if both KVM and userspace want to expose the feature to the guest.
+ * cpu_caps holds the effective guest capabilities, i.e. the features
+ * the vCPU is allowed to use. Typically, but not always, features can
+ * be used by the guest if and only if both KVM and userspace want to
+ * expose the feature to the guest.
+ *
+ * A common exception is for virtualization holes, i.e. when KVM can't
+ * prevent the guest from using a feature, in which case the vCPU "has"
+ * the feature regardless of what KVM or userspace desires.
+ *
+ * Note, features that don't require KVM involvement in any way are
+ * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the
+ * guest CPUID provided by userspace.
*/
- struct {
- DECLARE_BITMAP(enabled, KVM_MAX_NR_GOVERNED_FEATURES);
- } governed_features;
+ u32 cpu_caps[NR_KVM_CPU_CAPS];
u64 reserved_gpa_bits;
int maxphyaddr;
@@ -887,7 +912,8 @@ struct kvm_vcpu_arch {
int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
gpa_t time;
- struct pvclock_vcpu_time_info hv_clock;
+ s8 pvclock_tsc_shift;
+ u32 pvclock_tsc_mul;
unsigned int hw_tsc_khz;
struct gfn_to_pfn_cache pv_time;
/* set guest stopped flag in pvclock flags field */
@@ -975,8 +1001,8 @@ struct kvm_vcpu_arch {
u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
u16 vec;
u32 id;
- bool send_user_only;
u32 host_apf_flags;
+ bool send_always;
bool delivery_as_pf_vmexit;
bool pageready_pending;
} apf;
@@ -994,9 +1020,6 @@ struct kvm_vcpu_arch {
u64 msr_kvm_poll_control;
- /* set at EPT violation at this point */
- unsigned long exit_qualification;
-
/* pv related host specific info */
struct {
bool pv_unhalted;
@@ -1034,6 +1057,7 @@ struct kvm_vcpu_arch {
/* Protected Guests */
bool guest_state_protected;
+ bool guest_tsc_protected;
/*
* Set when PDPTS were loaded directly by the userspace without
@@ -1170,6 +1194,8 @@ struct kvm_xen {
struct gfn_to_pfn_cache shinfo_cache;
struct idr evtchn_ports;
unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+
+ struct kvm_xen_hvm_config hvm_config;
};
#endif
@@ -1207,7 +1233,7 @@ enum kvm_apicv_inhibit {
* APIC acceleration is disabled by a module parameter
* and/or not supported in hardware.
*/
- APICV_INHIBIT_REASON_DISABLE,
+ APICV_INHIBIT_REASON_DISABLED,
/*
* APIC acceleration is inhibited because AutoEOI feature is
@@ -1277,18 +1303,39 @@ enum kvm_apicv_inhibit {
* mapping between logical ID and vCPU.
*/
APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
+
+ NR_APICV_INHIBIT_REASONS,
};
+#define __APICV_INHIBIT_REASON(reason) \
+ { BIT(APICV_INHIBIT_REASON_##reason), #reason }
+
+#define APICV_INHIBIT_REASONS \
+ __APICV_INHIBIT_REASON(DISABLED), \
+ __APICV_INHIBIT_REASON(HYPERV), \
+ __APICV_INHIBIT_REASON(ABSENT), \
+ __APICV_INHIBIT_REASON(BLOCKIRQ), \
+ __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \
+ __APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \
+ __APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \
+ __APICV_INHIBIT_REASON(NESTED), \
+ __APICV_INHIBIT_REASON(IRQWIN), \
+ __APICV_INHIBIT_REASON(PIT_REINJ), \
+ __APICV_INHIBIT_REASON(SEV), \
+ __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED)
+
struct kvm_arch {
- unsigned long vm_type;
unsigned long n_used_mmu_pages;
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
u8 mmu_valid_gen;
+ u8 vm_type;
+ bool has_private_mem;
+ bool has_protected_state;
+ bool pre_fault_allowed;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct list_head active_mmu_pages;
- struct list_head zapped_obsolete_pages;
/*
* A list of kvm_mmu_page structs that, if zapped, could possibly be
* replaced by an NX huge page. A shadow page is on this list if its
@@ -1312,8 +1359,8 @@ struct kvm_arch {
*/
spinlock_t mmu_unsync_pages_lock;
- struct iommu_domain *iommu_domain;
- bool iommu_noncoherent;
+ u64 shadow_mmio_value;
+
#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
atomic_t noncoherent_dma_count;
#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
@@ -1360,6 +1407,7 @@ struct kvm_arch {
u32 default_tsc_khz;
bool user_set_tsc;
+ u64 apic_bus_cycle_ns;
seqcount_raw_spinlock_t pvclock_sc;
bool use_master_clock;
@@ -1368,8 +1416,6 @@ struct kvm_arch {
struct delayed_work kvmclock_update_work;
struct delayed_work kvmclock_sync_work;
- struct kvm_xen_hvm_config xen_hvm_config;
-
/* reads protected by irq_srcu, writes by irq_lock */
struct hlist_head mask_notifier_list;
@@ -1422,11 +1468,18 @@ struct kvm_arch {
bool sgx_provisioning_allowed;
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
- struct task_struct *nx_huge_page_recovery_thread;
+ struct vhost_task *nx_huge_page_recovery_thread;
+ u64 nx_huge_page_last;
+ struct once nx_once;
#ifdef CONFIG_X86_64
- /* The number of TDP MMU pages across all roots. */
+#ifdef CONFIG_KVM_PROVE_MMU
+ /*
+ * The number of TDP MMU pages across all roots. Used only to sanity
+ * check that KVM isn't leaking TDP MMU pages.
+ */
atomic64_t tdp_mmu_pages;
+#endif
/*
* List of struct kvm_mmu_pages being used as roots.
@@ -1434,6 +1487,7 @@ struct kvm_arch {
* tdp_mmu_page set.
*
* For reads, this list is protected by:
+ * RCU alone or
* the MMU lock in read mode + RCU or
* the MMU lock in write mode
*
@@ -1514,6 +1568,8 @@ struct kvm_arch {
*/
#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
struct kvm_mmu_memory_cache split_desc_cache;
+
+ gfn_t gfn_direct_bits;
};
struct kvm_vm_stat {
@@ -1606,8 +1662,10 @@ struct kvm_x86_ops {
int (*check_processor_compatibility)(void);
- int (*hardware_enable)(void);
- void (*hardware_disable)(void);
+ int (*enable_virtualization_cpu)(void);
+ void (*disable_virtualization_cpu)(void);
+ cpu_emergency_virt_cb *emergency_disable_virtualization_cpu;
+
void (*hardware_unsetup)(void);
bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
@@ -1633,6 +1691,7 @@ struct kvm_x86_ops {
void (*get_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
int (*get_cpl)(struct kvm_vcpu *vcpu);
+ int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu);
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
@@ -1647,6 +1706,7 @@ struct kvm_x86_ops {
void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
+ void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
@@ -1704,13 +1764,12 @@ struct kvm_x86_ops {
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
- bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
+
+ const bool x2apic_icr_is_split;
const unsigned long required_apicv_inhibits;
bool allow_apicv_in_x2apic_without_x2apic_virtualization;
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
- void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
- void (*hwapic_isr_update)(int isr);
- bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
+ void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
@@ -1724,6 +1783,21 @@ struct kvm_x86_ops {
void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
int root_level);
+ /* Update external mapping with page table link. */
+ int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ void *external_spt);
+ /* Update the external page table from spte getting set. */
+ int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ kvm_pfn_t pfn_for_gfn);
+
+ /* Update external page tables for page table about to be freed. */
+ int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ void *external_spt);
+
+ /* Update external page table from spte getting removed, and flush TLB. */
+ int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level,
+ kvm_pfn_t pfn_for_gfn);
+
bool (*has_wbinvd_exit)(void);
u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
@@ -1732,12 +1806,15 @@ struct kvm_x86_ops {
void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu);
/*
- * Retrieve somewhat arbitrary exit information. Intended to
+ * Retrieve somewhat arbitrary exit/entry information. Intended to
* be used only from within tracepoints or error paths.
*/
void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
u64 *info1, u64 *info2,
- u32 *exit_int_info, u32 *exit_int_info_err_code);
+ u32 *intr_info, u32 *error_code);
+
+ void (*get_entry_info)(struct kvm_vcpu *vcpu,
+ u32 *intr_info, u32 *error_code);
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
@@ -1745,8 +1822,6 @@ struct kvm_x86_ops {
struct x86_exception *exception);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
- void (*sched_in)(struct kvm_vcpu *vcpu, int cpu);
-
/*
* Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero
* value indicates CPU dirty logging is unsupported or disabled.
@@ -1779,6 +1854,7 @@ struct kvm_x86_ops {
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
#endif
+ int (*dev_get_attr)(u32 group, u64 attr, u64 *val);
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
@@ -1786,7 +1862,7 @@ struct kvm_x86_ops {
int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
void (*guest_memory_reclaimed)(struct kvm *kvm);
- int (*get_msr_feature)(struct kvm_msr_entry *entry);
+ int (*get_feature_msr)(u32 msr, u64 *data);
int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
void *insn, int insn_len);
@@ -1807,6 +1883,9 @@ struct kvm_x86_ops {
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
+ int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
+ void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end);
+ int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn);
};
struct kvm_x86_nested_ops {
@@ -1814,7 +1893,7 @@ struct kvm_x86_nested_ops {
bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
u32 error_code);
int (*check_events)(struct kvm_vcpu *vcpu);
- bool (*has_events)(struct kvm_vcpu *vcpu);
+ bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection);
void (*triple_fault)(struct kvm_vcpu *vcpu);
int (*get_state)(struct kvm_vcpu *vcpu,
struct kvm_nested_state __user *user_kvm_nested_state,
@@ -1844,14 +1923,17 @@ struct kvm_arch_async_pf {
gfn_t gfn;
unsigned long cr3;
bool direct_map;
+ u64 error_code;
};
extern u32 __read_mostly kvm_nr_uret_msrs;
-extern u64 __read_mostly host_efer;
extern bool __read_mostly allow_smaller_maxphyaddr;
extern bool __read_mostly enable_apicv;
extern struct kvm_x86_ops kvm_x86_ops;
+#define kvm_x86_call(func) static_call(kvm_x86_##func)
+#define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func)
+
#define KVM_X86_OP(func) \
DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
#define KVM_X86_OP_OPTIONAL KVM_X86_OP
@@ -1875,7 +1957,7 @@ void kvm_arch_free_vm(struct kvm *kvm);
static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
if (kvm_x86_ops.flush_remote_tlbs &&
- !static_call(kvm_x86_flush_remote_tlbs)(kvm))
+ !kvm_x86_call(flush_remote_tlbs)(kvm))
return 0;
else
return -ENOTSUPP;
@@ -1888,7 +1970,7 @@ static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn,
if (!kvm_x86_ops.flush_remote_tlbs_range)
return -EOPNOTSUPP;
- return static_call(kvm_x86_flush_remote_tlbs_range)(kvm, gfn, nr_pages);
+ return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages);
}
#endif /* CONFIG_HYPERV */
@@ -1927,12 +2009,13 @@ void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
const struct kvm_memory_slot *memslot,
u64 start, u64 end,
int target_level);
-void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *memslot);
+void kvm_mmu_recover_huge_pages(struct kvm *kvm,
+ const struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
const struct kvm_memory_slot *memslot);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
+void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
@@ -1988,8 +2071,8 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
* VMware backdoor emulation handles select instructions
* and reinjects the #GP for all other cases.
*
- * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
- * case the CR2/GPA value pass on the stack is valid.
+ * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case
+ * the CR2/GPA value pass on the stack is valid.
*
* EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
* state and inject single-step #DBs after skipping
@@ -2024,6 +2107,11 @@ u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
+static inline bool kvm_can_emulate_event_vectoring(int emul_type)
+{
+ return !(emul_type & EMULTYPE_PF);
+}
+
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
@@ -2031,8 +2119,12 @@ void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
u64 *data, u8 ndata);
void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
+void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa);
+
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
+int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data);
+int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data);
int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
@@ -2081,8 +2173,8 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
-void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
-void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
+void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr,
+ bool has_error_code, u32 error_code);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
struct x86_exception *fault);
@@ -2109,7 +2201,15 @@ int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);
-int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
+bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
+ bool always_retry);
+
+static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu,
+ gpa_t cr2_or_gpa)
+{
+ return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false);
+}
+
void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
ulong roots_to_free);
void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
@@ -2140,10 +2240,9 @@ static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
}
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
-
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len);
+void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
u64 addr, unsigned long roots);
@@ -2153,12 +2252,15 @@ void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
int tdp_max_root_level, int tdp_huge_page_level);
+
#ifdef CONFIG_KVM_PRIVATE_MEM
-#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.vm_type != KVM_X86_DEFAULT_VM)
+#define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem)
#else
#define kvm_arch_has_private_mem(kvm) false
#endif
+#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state)
+
static inline u16 kvm_read_ldt(void)
{
u16 ldt;
@@ -2219,6 +2321,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_cpu_has_extint(struct kvm_vcpu *v);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
+int kvm_cpu_get_extint(struct kvm_vcpu *v);
int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
@@ -2280,12 +2383,12 @@ static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
- static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
+ kvm_x86_call(vcpu_blocking)(vcpu);
}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
{
- static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
+ kvm_x86_call(vcpu_unblocking)(vcpu);
}
static inline int kvm_cpu_get_apicid(int mps_cpu)
@@ -2310,7 +2413,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
KVM_X86_QUIRK_OUT_7E_INC_RIP | \
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
- KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
+ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
+ KVM_X86_QUIRK_SLOT_ZAP_ALL | \
+ KVM_X86_QUIRK_STUFF_FEATURE_MSRS)
/*
* KVM previously used a u32 field in kvm_run to indicate the hypercall was
@@ -2319,4 +2424,9 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
*/
#define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1)
+static inline bool kvm_arch_has_irq_bypass(void)
+{
+ return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
+}
+
#endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index dc31b13b87a0..b51d8a4673f5 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -38,7 +38,7 @@
#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN)
#define SYM_F_ALIGN __FUNC_ALIGN
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define RET jmp __x86_return_thunk
@@ -50,7 +50,7 @@
#endif
#endif /* CONFIG_MITIGATION_RETPOLINE */
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#if defined(CONFIG_MITIGATION_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
#define ASM_RET "jmp __x86_return_thunk\n\t"
@@ -62,7 +62,7 @@
#endif
#endif /* CONFIG_MITIGATION_RETPOLINE */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
@@ -119,33 +119,27 @@
/* SYM_FUNC_START -- use for global functions */
#define SYM_FUNC_START(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN)
/* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
#define SYM_FUNC_START_NOALIGN(name) \
- SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE)
/* SYM_FUNC_START_LOCAL -- use for local functions */
#define SYM_FUNC_START_LOCAL(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN)
/* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
#define SYM_FUNC_START_LOCAL_NOALIGN(name) \
- SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_LOCAL, SYM_A_NONE)
/* SYM_FUNC_START_WEAK -- use for weak functions */
#define SYM_FUNC_START_WEAK(name) \
- SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \
- ENDBR
+ SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN)
/* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
#define SYM_FUNC_START_WEAK_NOALIGN(name) \
- SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \
- ENDBR
+ SYM_START(name, SYM_L_WEAK, SYM_A_NONE)
#endif /* _ASM_X86_LINKAGE_H */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index de3118305838..6c77c03139f7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -13,6 +13,7 @@
#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
#define MCG_EXT_P BIT_ULL(9) /* Extended registers available */
#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
+#define MCG_SEAM_NR BIT_ULL(12) /* MCG_STATUS_SEAM_NR supported */
#define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */
#define MCG_EXT_CNT_SHIFT 16
#define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT)
@@ -25,6 +26,7 @@
#define MCG_STATUS_EIPV BIT_ULL(1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP BIT_ULL(2) /* machine check in progress */
#define MCG_STATUS_LMCES BIT_ULL(3) /* LMCE signaled */
+#define MCG_STATUS_SEAM_NR BIT_ULL(12) /* Machine check inside SEAM non-root mode */
/* MCG_EXT_CTL register defines */
#define MCG_EXT_CTL_LMCE_EN BIT_ULL(0) /* Enable LMCE */
@@ -59,6 +61,7 @@
* - TCC bit is present in MCx_STATUS.
*/
#define MCI_CONFIG_MCAX 0x1
+#define MCI_CONFIG_FRUTEXT BIT_ULL(9)
#define MCI_IPID_MCATYPE 0xFFFF0000
#define MCI_IPID_HWID 0xFFF
@@ -120,6 +123,9 @@
#define MSR_AMD64_SMCA_MC0_DESTAT 0xc0002008
#define MSR_AMD64_SMCA_MC0_DEADDR 0xc0002009
#define MSR_AMD64_SMCA_MC0_MISC1 0xc000200a
+/* Registers MISC2 to MISC4 are at offsets B to D. */
+#define MSR_AMD64_SMCA_MC0_SYND1 0xc000200e
+#define MSR_AMD64_SMCA_MC0_SYND2 0xc000200f
#define MSR_AMD64_SMCA_MCx_CTL(x) (MSR_AMD64_SMCA_MC0_CTL + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_STATUS(x) (MSR_AMD64_SMCA_MC0_STATUS + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_ADDR(x) (MSR_AMD64_SMCA_MC0_ADDR + 0x10*(x))
@@ -130,6 +136,8 @@
#define MSR_AMD64_SMCA_MCx_DESTAT(x) (MSR_AMD64_SMCA_MC0_DESTAT + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_DEADDR(x) (MSR_AMD64_SMCA_MC0_DEADDR + 0x10*(x))
#define MSR_AMD64_SMCA_MCx_MISCy(x, y) ((MSR_AMD64_SMCA_MC0_MISC1 + y) + (0x10*(x)))
+#define MSR_AMD64_SMCA_MCx_SYND1(x) (MSR_AMD64_SMCA_MC0_SYND1 + 0x10*(x))
+#define MSR_AMD64_SMCA_MCx_SYND2(x) (MSR_AMD64_SMCA_MC0_SYND2 + 0x10*(x))
#define XEC(x, mask) (((x) >> 16) & mask)
@@ -185,6 +193,32 @@ enum mce_notifier_prios {
MCE_PRIO_HIGHEST = MCE_PRIO_CEC
};
+/**
+ * struct mce_hw_err - Hardware Error Record.
+ * @m: Machine Check record.
+ * @vendor: Vendor-specific error information.
+ *
+ * Vendor-specific fields should not be added to struct mce. Instead, vendors
+ * should export their vendor-specific data through their structure in the
+ * vendor union below.
+ *
+ * AMD's vendor data is parsed by error decoding tools for supplemental error
+ * information. Thus, current offsets of existing fields must be maintained.
+ * Only add new fields at the end of AMD's vendor structure.
+ */
+struct mce_hw_err {
+ struct mce m;
+
+ union vendor_info {
+ struct {
+ u64 synd1; /* MCA_SYND1 MSR */
+ u64 synd2; /* MCA_SYND2 MSR */
+ } amd;
+ } vendor;
+};
+
+#define to_mce_hw_err(mce) container_of(mce, struct mce_hw_err, m)
+
struct notifier_block;
extern void mce_register_decode_chain(struct notifier_block *nb);
extern void mce_unregister_decode_chain(struct notifier_block *nb);
@@ -219,8 +253,8 @@ static inline int apei_smca_report_x86_error(struct cper_ia_proc_ctx *ctx_info,
u64 lapic_id) { return -EINVAL; }
#endif
-void mce_setup(struct mce *m);
-void mce_log(struct mce *m);
+void mce_prep_record(struct mce_hw_err *err);
+void mce_log(struct mce_hw_err *err);
DECLARE_PER_CPU(struct device *, mce_device);
/* Maximum number of MCA banks per CPU. */
@@ -242,7 +276,7 @@ static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
#endif
-int mce_available(struct cpuinfo_x86 *c);
+bool mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
bool mce_usable_address(struct mce *m);
@@ -259,9 +293,8 @@ enum mcp_flags {
MCP_DONTLOG = BIT(2), /* only clear, don't log */
MCP_QUEUE_LOG = BIT(3), /* only queue to genpool */
};
-bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
-int mce_notify_irq(void);
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
DECLARE_PER_CPU(struct mce, injectm);
@@ -351,8 +384,6 @@ static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
#endif
-static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
-
unsigned long copy_mc_fragile_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index f922b682b9b4..1530ee301dfe 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -10,7 +10,7 @@
#ifndef __X86_MEM_ENCRYPT_H__
#define __X86_MEM_ENCRYPT_H__
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/init.h>
#include <linux/cc_platform.h>
@@ -114,6 +114,6 @@ void add_encrypt_protection_map(void);
extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index ce4677b8b735..8b8055a8eb9e 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -37,6 +37,8 @@ typedef struct {
*/
atomic64_t tlb_gen;
+ unsigned long next_trim_cpumask;
+
#ifdef CONFIG_MODIFY_LDT_SYSCALL
struct rw_semaphore ldt_usr_sem;
struct ldt_struct *ldt;
@@ -67,6 +69,18 @@ typedef struct {
u16 pkey_allocation_map;
s16 execute_only_pkey;
#endif
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+ /*
+ * The global ASID will be a non-zero value when the process has
+ * the same ASID across all CPUs, allowing it to make use of
+ * hardware-assisted remote TLB invalidation like AMD INVLPGB.
+ */
+ u16 global_asid;
+
+ /* The process is transitioning to a new global ASID number. */
+ bool asid_transition;
+#endif
} mm_context_t;
#define INIT_MM_CONTEXT(mm) \
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8dac45a2c7fc..2398058b6e83 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -2,7 +2,6 @@
#ifndef _ASM_X86_MMU_CONTEXT_H
#define _ASM_X86_MMU_CONTEXT_H
-#include <asm/desc.h>
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/pkeys.h>
@@ -13,6 +12,7 @@
#include <asm/paravirt.h>
#include <asm/debugreg.h>
#include <asm/gsseg.h>
+#include <asm/desc.h>
extern atomic64_t last_mm_ctx_id;
@@ -88,7 +88,13 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
#ifdef CONFIG_ADDRESS_MASKING
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
- return mm->context.lam_cr3_mask;
+ /*
+ * When switch_mm_irqs_off() is called for a kthread, it may race with
+ * LAM enablement. switch_mm_irqs_off() uses the LAM mask to do two
+ * things: populate CR3 and populate 'cpu_tlbstate.lam'. Make sure it
+ * reads a single value for both.
+ */
+ return READ_ONCE(mm->context.lam_cr3_mask);
}
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
@@ -133,6 +139,11 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm)
#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
+#define mm_init_global_asid mm_init_global_asid
+extern void mm_init_global_asid(struct mm_struct *mm);
+
+extern void mm_free_global_asid(struct mm_struct *mm);
+
/*
* Init a new mm. Used on mm copies, like at fork()
* and on mm's that are brand-new, like at execve().
@@ -145,6 +156,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
atomic64_set(&mm->context.tlb_gen, 0);
+ mm->context.next_trim_cpumask = jiffies + HZ;
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
@@ -154,6 +166,8 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
+
+ mm_init_global_asid(mm);
mm_reset_untag_mask(mm);
init_new_context_ldt(mm);
return 0;
@@ -163,6 +177,7 @@ static inline int init_new_context(struct task_struct *tsk,
static inline void destroy_context(struct mm_struct *mm)
{
destroy_context_ldt(mm);
+ mm_free_global_asid(mm);
}
extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
@@ -232,11 +247,6 @@ static inline bool is_64bit_mm(struct mm_struct *mm)
}
#endif
-static inline void arch_unmap(struct mm_struct *mm, unsigned long start,
- unsigned long end)
-{
-}
-
/*
* We only want to enforce protection keys on the current process
* because we effectively have no access to PKRU for other
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
deleted file mode 100644
index c41b41edd691..000000000000
--- a/arch/x86/include/asm/mmzone.h
+++ /dev/null
@@ -1,6 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifdef CONFIG_X86_32
-# include <asm/mmzone_32.h>
-#else
-# include <asm/mmzone_64.h>
-#endif
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
deleted file mode 100644
index 2d4515e8b7df..000000000000
--- a/arch/x86/include/asm/mmzone_32.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Written by Pat Gaughen (gone@us.ibm.com) Mar 2002
- *
- */
-
-#ifndef _ASM_X86_MMZONE_32_H
-#define _ASM_X86_MMZONE_32_H
-
-#include <asm/smp.h>
-
-#ifdef CONFIG_NUMA
-extern struct pglist_data *node_data[];
-#define NODE_DATA(nid) (node_data[nid])
-#endif /* CONFIG_NUMA */
-
-#endif /* _ASM_X86_MMZONE_32_H */
diff --git a/arch/x86/include/asm/mmzone_64.h b/arch/x86/include/asm/mmzone_64.h
deleted file mode 100644
index 0c585046f744..000000000000
--- a/arch/x86/include/asm/mmzone_64.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* K8 NUMA support */
-/* Copyright 2002,2003 by Andi Kleen, SuSE Labs */
-/* 2.5 Version loosely based on the NUMAQ Code by Pat Gaughen. */
-#ifndef _ASM_X86_MMZONE_64_H
-#define _ASM_X86_MMZONE_64_H
-
-#ifdef CONFIG_NUMA
-
-#include <linux/mmdebug.h>
-#include <asm/smp.h>
-
-extern struct pglist_data *node_data[];
-
-#define NODE_DATA(nid) (node_data[nid])
-
-#endif
-#endif /* _ASM_X86_MMZONE_64_H */
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index c72c7ff78fcd..d593e52e6635 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -16,10 +16,10 @@ extern int pic_mode;
* Summit or generic (i.e. installer) kernels need lots of bus entries.
* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets.
*/
-#if CONFIG_BASE_SMALL == 0
-# define MAX_MP_BUSSES 260
-#else
+#ifdef CONFIG_BASE_SMALL
# define MAX_MP_BUSSES 32
+#else
+# define MAX_MP_BUSSES 260
#endif
#define MAX_IRQ_SOURCES 256
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 390c4d13956d..bab5ccfc60a7 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -6,10 +6,9 @@
#include <linux/nmi.h>
#include <linux/msi.h>
#include <linux/io.h>
-#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
-#include <asm/mshyperv.h>
+#include <hyperv/hvhdk.h>
/*
* Hyper-V always provides a single IO-APIC at this MMIO address.
@@ -40,13 +39,10 @@ static inline unsigned char hv_get_nmi_reason(void)
}
#if IS_ENABLED(CONFIG_HYPERV)
-extern int hyperv_init_cpuhp;
extern bool hyperv_paravisor_present;
extern void *hv_hypercall_pg;
-extern u64 hv_current_partition_id;
-
extern union hv_ghcb * __percpu *hv_ghcb_pg;
bool hv_isolation_type_snp(void);
@@ -60,10 +56,6 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2);
#define HV_AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL
#define HV_AP_SEGMENT_LIMIT 0xffffffff
-int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
-int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
-int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
-
/*
* If the hypercall involves no input or output parameters, the hypervisor
* ignores the corresponding GPA pointer.
@@ -79,11 +71,11 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_tdx_hypercall(control, input_address, output_address);
if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[output_address], %%r8\n"
"vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
- : "r" (output_address)
+ : [output_address] "r" (output_address)
: "cc", "memory", "r8", "r9", "r10", "r11");
return hv_status;
}
@@ -91,12 +83,12 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
if (!hv_hypercall_pg)
return U64_MAX;
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[output_address], %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input_address)
- : "r" (output_address),
- THUNK_TARGET(hv_hypercall_pg)
+ : [output_address] "r" (output_address),
+ THUNK_TARGET(hv_hypercall_pg)
: "cc", "memory", "r8", "r9", "r10", "r11");
#else
u32 input_address_hi = upper_32_bits(input_address);
@@ -162,7 +154,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
: "cc", "edi", "esi");
}
#endif
- return hv_status;
+ return hv_status;
}
static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
@@ -189,18 +181,18 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
return hv_tdx_hypercall(control, input1, input2);
if (hv_isolation_type_snp() && !hyperv_paravisor_present) {
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[input2], %%r8\n"
"vmmcall"
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
- : "r" (input2)
+ : [input2] "r" (input2)
: "cc", "r8", "r9", "r10", "r11");
} else {
- __asm__ __volatile__("mov %4, %%r8\n"
+ __asm__ __volatile__("mov %[input2], %%r8\n"
CALL_NOSPEC
: "=a" (hv_status), ASM_CALL_CONSTRAINT,
"+c" (control), "+d" (input1)
- : "r" (input2),
+ : [input2] "r" (input2),
THUNK_TARGET(hv_hypercall_pg)
: "cc", "r8", "r9", "r10", "r11");
}
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index e72c2b872957..e6134ef2263d 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -25,6 +25,7 @@
#define _EFER_SVME 12 /* Enable virtualization */
#define _EFER_LMSLE 13 /* Long Mode Segment Limit Enable */
#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define _EFER_TCE 15 /* Enable Translation Cache Extensions */
#define _EFER_AUTOIBRS 21 /* Enable Automatic IBRS */
#define EFER_SCE (1<<_EFER_SCE)
@@ -34,8 +35,23 @@
#define EFER_SVME (1<<_EFER_SVME)
#define EFER_LMSLE (1<<_EFER_LMSLE)
#define EFER_FFXSR (1<<_EFER_FFXSR)
+#define EFER_TCE (1<<_EFER_TCE)
#define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS)
+/*
+ * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc.
+ * Most MSRs support/allow only a subset of memory types, but the values
+ * themselves are common across all relevant MSRs.
+ */
+#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */
+#define X86_MEMTYPE_WC 1ull /* Write Combining */
+/* RESERVED 2 */
+/* RESERVED 3 */
+#define X86_MEMTYPE_WT 4ull /* Write Through */
+#define X86_MEMTYPE_WP 5ull /* Write Protected */
+#define X86_MEMTYPE_WB 6ull /* Write Back */
+#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */
+
/* FRED MSRs */
#define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */
#define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */
@@ -170,6 +186,10 @@
* CPU is not affected by Branch
* History Injection.
*/
+#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
+ * IA32_XAPIC_DISABLE_STATUS MSR
+ * supported
+ */
#define ARCH_CAP_PBRSB_NO BIT(24) /*
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
@@ -192,11 +212,6 @@
* File.
*/
-#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
- * IA32_XAPIC_DISABLE_STATUS MSR
- * supported
- */
-
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
* Writeback and invalidate the
@@ -248,6 +263,8 @@
#define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT)
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4
#define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT)
+#define MSR_INTEGRITY_CAPS_SBAF_BIT 8
+#define MSR_INTEGRITY_CAPS_SBAF BIT(MSR_INTEGRITY_CAPS_SBAF_BIT)
#define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9)
#define MSR_LBR_NHM_FROM 0x00000680
@@ -364,6 +381,12 @@
#define MSR_IA32_CR_PAT 0x00000277
+#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \
+ ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \
+ (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \
+ (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \
+ (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56))
+
#define MSR_IA32_DEBUGCTLMSR 0x000001d9
#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
@@ -374,7 +397,8 @@
#define MSR_IA32_PASID_VALID BIT_ULL(31)
/* DEBUGCTLMSR bits (others vary by model): */
-#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */
+#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT)
#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2)
@@ -567,6 +591,12 @@
#define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309
+/* V6 PMON MSR range */
+#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
+#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
+#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
+#define MSR_IA32_PMC_V6_STEP 4
+
/* KeyID partitioning between MKTME and TDX */
#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
@@ -581,6 +611,7 @@
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
@@ -617,6 +648,7 @@
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
@@ -655,11 +687,14 @@
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
#define MSR_AMD64_SNP_RESV_BIT 18
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
-
-#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
-
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_AMD64_RMP_CFG 0xc0010136
+#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0
+#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT)
+#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8)
+
+#define MSR_SVSM_CAA 0xc001f000
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
@@ -668,15 +703,17 @@
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
-#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
-#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
-#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
-#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
+/* Masks for use with MSR_AMD_CPPC_CAP1 */
+#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24)
-#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
-#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
-#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
-#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
+/* Masks for use with MSR_AMD_CPPC_REQ */
+#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0)
+#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8)
+#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16)
+#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24)
/* AMD Performance Counter Global Status and Control MSRs */
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
@@ -688,6 +725,7 @@
/* Zen4 */
#define MSR_ZEN4_BP_CFG 0xc001102e
+#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4
#define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5
/* Fam 19h MSRs */
@@ -782,6 +820,8 @@
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
+#define MSR_K7_HWCR_CPB_DIS_BIT 25
+#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
/* K6 MSRs */
#define MSR_K6_WHCR 0xc0000082
@@ -1148,15 +1188,6 @@
#define MSR_IA32_VMX_VMFUNC 0x00000491
#define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492
-/* VMX_BASIC bits and bitmasks */
-#define VMX_BASIC_VMCS_SIZE_SHIFT 32
-#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
-#define VMX_BASIC_64 0x0001000000000000LLU
-#define VMX_BASIC_MEM_TYPE_SHIFT 50
-#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
-#define VMX_BASIC_MEM_TYPE_WB 6LLU
-#define VMX_BASIC_INOUT 0x0040000000000000LLU
-
/* Resctrl MSRs: */
/* - Intel: */
#define MSR_IA32_L3_QOS_CFG 0xc81
@@ -1165,6 +1196,7 @@
#define MSR_IA32_QM_CTR 0xc8e
#define MSR_IA32_PQR_ASSOC 0xc8f
#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
#define MSR_IA32_L2_CBM_BASE 0xd10
#define MSR_IA32_MBA_THRTL_BASE 0xd50
@@ -1173,11 +1205,6 @@
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
-/* MSR_IA32_VMX_MISC bits */
-#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14)
-#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
-#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
-
/* AMD-V MSRs */
#define MSR_VM_CR 0xc0010114
#define MSR_VM_IGNNE 0xc0010115
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index d642037f9ed5..9397a319d165 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -4,7 +4,7 @@
#include "msr-index.h"
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/asm.h>
#include <asm/errno.h>
@@ -99,19 +99,6 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
: : "c" (msr), "a"(low), "d" (high) : "memory");
}
-/*
- * WRMSRNS behaves exactly like WRMSR with the only difference being
- * that it is not a serializing instruction by default.
- */
-static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
-{
- /* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
- asm volatile("1: .byte 0x0f,0x01,0xc6\n"
- "2:\n"
- _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
- : : "c" (msr), "a"(low), "d" (high));
-}
-
#define native_rdmsr(msr, val1, val2) \
do { \
u64 __val = __rdmsr((msr)); \
@@ -312,9 +299,19 @@ do { \
#endif /* !CONFIG_PARAVIRT_XXL */
+/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
+#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
+
+/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
static __always_inline void wrmsrns(u32 msr, u64 val)
{
- __wrmsrns(msr, val, val >> 32);
+ /*
+ * WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
+ * DS prefix to avoid a trailing NOP.
+ */
+ asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
+ "2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
+ : : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
}
/*
@@ -400,5 +397,5 @@ static inline int wrmsr_safe_regs_on_cpu(unsigned int cpu, u32 regs[8])
return wrmsr_safe_regs(regs);
}
#endif /* CONFIG_SMP */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_MSR_H */
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index 090d658a85a6..c69e269937c5 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -58,8 +58,8 @@ struct mtrr_state_type {
*/
# ifdef CONFIG_MTRR
void mtrr_bp_init(void);
-void mtrr_overwrite_state(struct mtrr_var_range *var, unsigned int num_var,
- mtrr_type def_type);
+void guest_force_mtrr_state(struct mtrr_var_range *var, unsigned int num_var,
+ mtrr_type def_type);
extern u8 mtrr_type_lookup(u64 addr, u64 end, u8 *uniform);
extern void mtrr_save_fixed_ranges(void *);
extern void mtrr_save_state(void);
@@ -69,16 +69,15 @@ extern int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, bool increment);
extern int mtrr_del(int reg, unsigned long base, unsigned long size);
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
-extern void mtrr_bp_restore(void);
extern int mtrr_trim_uncached_memory(unsigned long end_pfn);
extern int amd_special_default_mtrr(void);
void mtrr_disable(void);
void mtrr_enable(void);
void mtrr_generic_set_state(void);
# else
-static inline void mtrr_overwrite_state(struct mtrr_var_range *var,
- unsigned int num_var,
- mtrr_type def_type)
+static inline void guest_force_mtrr_state(struct mtrr_var_range *var,
+ unsigned int num_var,
+ mtrr_type def_type)
{
}
@@ -117,7 +116,6 @@ static inline int mtrr_trim_uncached_memory(unsigned long end_pfn)
return 0;
}
#define mtrr_bp_init() do {} while (0)
-#define mtrr_bp_restore() do {} while (0)
#define mtrr_disable() do {} while (0)
#define mtrr_enable() do {} while (0)
#define mtrr_generic_set_state() do {} while (0)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index 920426d691ce..ce857ef54cf1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -15,7 +15,6 @@
#define MWAIT_HINT2SUBSTATE(hint) ((hint) & MWAIT_CSTATE_MASK)
#define MWAIT_C1_SUBSTATE_MASK 0xf0
-#define CPUID_MWAIT_LEAF 5
#define CPUID5_ECX_EXTENSIONS_SUPPORTED 0x1
#define CPUID5_ECX_INTERRUPT_BREAK 0x2
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 41a0ebb699ec..f677382093f3 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -56,6 +56,8 @@ int __register_nmi_handler(unsigned int, struct nmiaction *);
void unregister_nmi_handler(unsigned int, const char *);
+void set_emergency_nmi_handler(unsigned int type, nmi_handler_t handler);
+
void stop_nmi(void);
void restart_nmi(void);
void local_touch_nmi(void);
diff --git a/arch/x86/include/asm/nops.h b/arch/x86/include/asm/nops.h
index 1c1b7550fa55..cd94221d8335 100644
--- a/arch/x86/include/asm/nops.h
+++ b/arch/x86/include/asm/nops.h
@@ -82,7 +82,7 @@
#define ASM_NOP7 _ASM_BYTES(BYTES_NOP7)
#define ASM_NOP8 _ASM_BYTES(BYTES_NOP8)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const unsigned char * const x86_nops[];
#endif
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index ff5f1ecc7d1e..5c43f145454d 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -12,7 +12,6 @@
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
-#include <asm/current.h>
/*
* Call depth tracking for Intel SKL CPUs to address the RSB underflow
@@ -78,21 +77,21 @@
#include <asm/asm-offsets.h>
#define CREDIT_CALL_DEPTH \
- movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+ movq $-1, PER_CPU_VAR(__x86_call_depth);
#define RESET_CALL_DEPTH \
xor %eax, %eax; \
bts $63, %rax; \
- movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+ movq %rax, PER_CPU_VAR(__x86_call_depth);
#define RESET_CALL_DEPTH_FROM_CALL \
movb $0xfc, %al; \
shl $56, %rax; \
- movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
+ movq %rax, PER_CPU_VAR(__x86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#define INCREMENT_CALL_DEPTH \
- sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \
+ sarq $5, PER_CPU_VAR(__x86_call_depth); \
CALL_THUNKS_DEBUG_INC_CALLS
#else
@@ -177,19 +176,7 @@
add $(BITS_PER_LONG/8), %_ASM_SP; \
lfence;
-#ifdef __ASSEMBLY__
-
-/*
- * This should be used immediately before an indirect jump/call. It tells
- * objtool the subsequent indirect jump/call is vouched safe for retpoline
- * builds.
- */
-.macro ANNOTATE_RETPOLINE_SAFE
-.Lhere_\@:
- .pushsection .discard.retpoline_safe
- .long .Lhere_\@
- .popsection
-.endm
+#ifdef __ASSEMBLER__
/*
* (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
@@ -210,9 +197,8 @@
.endm
/*
- * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call
- * to the retpoline thunk with a CS prefix when the register requires
- * a RAX prefix byte to encode. Also see apply_retpolines().
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
*/
.macro __CS_PREFIX reg:req
.irp rs,r8,r9,r10,r11,r12,r13,r14,r15
@@ -283,7 +269,7 @@
* typically has NO_MELTDOWN).
*
* While retbleed_untrain_ret() doesn't clobber anything but requires stack,
- * entry_ibpb() will clobber AX, CX, DX.
+ * write_ibpb() will clobber AX, CX, DX.
*
* As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
* where we have a stack but before any RET instruction.
@@ -293,7 +279,7 @@
VALIDATE_UNRET_END
CALL_UNTRAIN_RET
ALTERNATIVE_2 "", \
- "call entry_ibpb", \ibpb_feature, \
+ "call write_ibpb", \ibpb_feature, \
__stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH
#endif
.endm
@@ -323,7 +309,16 @@
* Note: Only the memory operand variant of VERW clears the CPU buffers.
*/
.macro CLEAR_CPU_BUFFERS
- ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+#ifdef CONFIG_X86_64
+ ALTERNATIVE "", "verw mds_verw_sel(%rip)", X86_FEATURE_CLEAR_CPU_BUF
+#else
+ /*
+ * In 32bit mode, the memory operand must be a %cs reference. The data
+ * segments may not be usable (vm86 mode), and the stack segment may not
+ * be flat (ESPFIX32).
+ */
+ ALTERNATIVE "", "verw %cs:mds_verw_sel", X86_FEATURE_CLEAR_CPU_BUF
+#endif
.endm
#ifdef CONFIG_X86_64
@@ -339,13 +334,7 @@
#define CLEAR_BRANCH_HISTORY_VMEXIT
#endif
-#else /* __ASSEMBLY__ */
-
-#define ANNOTATE_RETPOLINE_SAFE \
- "999:\n\t" \
- ".pushsection .discard.retpoline_safe\n\t" \
- ".long 999b\n\t" \
- ".popsection\n\t"
+#else /* __ASSEMBLER__ */
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
extern retpoline_thunk_t __x86_indirect_thunk_array[];
@@ -379,7 +368,7 @@ extern void srso_return_thunk(void);
extern void srso_alias_return_thunk(void);
extern void entry_untrain_ret(void);
-extern void entry_ibpb(void);
+extern void write_ibpb(void);
#ifdef CONFIG_X86_64
extern void clear_bhb_loop(void);
@@ -397,6 +386,8 @@ extern void call_depth_return_thunk(void);
__stringify(INCREMENT_CALL_DEPTH), \
X86_FEATURE_CALL_DEPTH)
+DECLARE_PER_CPU_CACHE_HOT(u64, __x86_call_depth);
+
#ifdef CONFIG_CALL_THUNKS_DEBUG
DECLARE_PER_CPU(u64, __x86_call_count);
DECLARE_PER_CPU(u64, __x86_ret_count);
@@ -430,19 +421,22 @@ static inline void call_depth_return_thunk(void) {}
#ifdef CONFIG_X86_64
/*
+ * Emits a conditional CS prefix that is compatible with
+ * -mindirect-branch-cs-prefix.
+ */
+#define __CS_PREFIX(reg) \
+ ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \
+ ".ifc \\rs," reg "\n" \
+ ".byte 0x2e\n" \
+ ".endif\n" \
+ ".endr\n"
+
+/*
* Inline asm uses the %V modifier which is only in newer GCC
* which is ensured when CONFIG_MITIGATION_RETPOLINE is defined.
*/
-# define CALL_NOSPEC \
- ALTERNATIVE_2( \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- "call __x86_indirect_thunk_%V[thunk_target]\n", \
- X86_FEATURE_RETPOLINE, \
- "lfence;\n" \
- ANNOTATE_RETPOLINE_SAFE \
- "call *%[thunk_target]\n", \
- X86_FEATURE_RETPOLINE_LFENCE)
+#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \
+ "call __x86_indirect_thunk_%V[thunk_target]\n"
# define THUNK_TARGET(addr) [thunk_target] "r" (addr)
@@ -520,11 +514,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
: "memory");
}
-extern u64 x86_pred_cmd;
-
static inline void indirect_branch_prediction_barrier(void)
{
- alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
+ asm_inline volatile(ALTERNATIVE("", "call write_ibpb", X86_FEATURE_IBPB)
+ : ASM_CALL_CONSTRAINT
+ :: "rax", "rcx", "rdx", "memory");
}
/* The Intel SPEC CTRL MSR base value cache */
@@ -561,6 +555,8 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
+DECLARE_STATIC_KEY_FALSE(switch_vcpu_ibpb);
+
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
@@ -605,6 +601,6 @@ static __always_inline void mds_idle_clear_cpu_buffers(void)
mds_clear_cpu_buffers();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index ef2844d69173..53ba39ce010c 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -10,8 +10,6 @@
#ifdef CONFIG_NUMA
-#define NR_NODE_MEMBLKS (MAX_NUMNODES*2)
-
extern int numa_off;
/*
@@ -25,9 +23,6 @@ extern int numa_off;
extern s16 __apicid_to_node[MAX_LOCAL_APIC];
extern nodemask_t numa_nodes_parsed __initdata;
-extern int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-extern void __init numa_set_distance(int from, int to, int distance);
-
static inline void set_apicid_to_node(int apicid, s16 node)
{
__apicid_to_node[apicid] = node;
@@ -46,39 +41,24 @@ static inline int numa_cpu_node(int cpu)
}
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_X86_32
-# include <asm/numa_32.h>
-#endif
-
#ifdef CONFIG_NUMA
extern void numa_set_node(int cpu, int node);
extern void numa_clear_node(int cpu);
extern void __init init_cpu_to_node(void);
-extern void numa_add_cpu(int cpu);
-extern void numa_remove_cpu(int cpu);
+extern void numa_add_cpu(unsigned int cpu);
+extern void numa_remove_cpu(unsigned int cpu);
extern void init_gi_nodes(void);
#else /* CONFIG_NUMA */
static inline void numa_set_node(int cpu, int node) { }
static inline void numa_clear_node(int cpu) { }
static inline void init_cpu_to_node(void) { }
-static inline void numa_add_cpu(int cpu) { }
-static inline void numa_remove_cpu(int cpu) { }
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
static inline void init_gi_nodes(void) { }
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-void debug_cpumask_set_cpu(int cpu, int node, bool enable);
+void debug_cpumask_set_cpu(unsigned int cpu, int node, bool enable);
#endif
-#ifdef CONFIG_NUMA_EMU
-#define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
-#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
-int numa_emu_cmdline(char *str);
-#else /* CONFIG_NUMA_EMU */
-static inline int numa_emu_cmdline(char *str)
-{
- return -EINVAL;
-}
-#endif /* CONFIG_NUMA_EMU */
-
#endif /* _ASM_X86_NUMA_H */
diff --git a/arch/x86/include/asm/numa_32.h b/arch/x86/include/asm/numa_32.h
deleted file mode 100644
index 9c8e9e85be77..000000000000
--- a/arch/x86/include/asm/numa_32.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_NUMA_32_H
-#define _ASM_X86_NUMA_32_H
-
-#ifdef CONFIG_HIGHMEM
-extern void set_highmem_pages_init(void);
-#else
-static inline void set_highmem_pages_init(void)
-{
-}
-#endif
-
-#endif /* _ASM_X86_NUMA_32_H */
diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h
index 46d7e06763c9..e0125afa53fb 100644
--- a/arch/x86/include/asm/orc_types.h
+++ b/arch/x86/include/asm/orc_types.h
@@ -45,7 +45,7 @@
#define ORC_TYPE_REGS 3
#define ORC_TYPE_REGS_PARTIAL 4
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/byteorder.h>
/*
@@ -73,6 +73,6 @@ struct orc_entry {
#endif
} __packed;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ORC_TYPES_H */
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 1b93ff80b43b..9265f2fca99a 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -14,7 +14,7 @@
#include <asm/page_32.h>
#endif /* CONFIG_X86_64 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct page;
@@ -35,7 +35,7 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
}
#define vma_alloc_zeroed_movable_folio(vma, vaddr) \
- vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr, false)
+ vma_alloc_folio(GFP_HIGHUSER_MOVABLE | __GFP_ZERO, 0, vma, vaddr)
#ifndef __pa
#define __pa(x) __phys_addr((unsigned long)(x))
@@ -84,7 +84,7 @@ static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits)
return __canonical_address(vaddr, vaddr_bits) == vaddr;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h
index 580d71aca65a..0c623706cb7e 100644
--- a/arch/x86/include/asm/page_32.h
+++ b/arch/x86/include/asm/page_32.h
@@ -4,7 +4,7 @@
#include <asm/page_32_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET)
#ifdef CONFIG_DEBUG_VIRTUAL
@@ -26,6 +26,6 @@ static inline void copy_page(void *to, void *from)
{
memcpy(to, from, PAGE_SIZE);
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_32_H */
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index faf9cc1c14bb..a9b62e0e6f79 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -11,8 +11,8 @@
* a virtual address space of one gigabyte, which limits the
* amount of physical memory you can use to about 950MB.
*
- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
- * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ * If you want more physical memory than this then see the CONFIG_VMSPLIT_2G
+ * and CONFIG_HIGHMEM4G options in the kernel configuration.
*/
#define __PAGE_OFFSET_BASE _AC(CONFIG_PAGE_OFFSET, UL)
#define __PAGE_OFFSET __PAGE_OFFSET_BASE
@@ -63,7 +63,7 @@
*/
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This much address space is reserved for vmalloc() and iomap()
@@ -75,6 +75,6 @@ extern int sysctl_legacy_va_layout;
extern void find_low_pfn_range(void);
extern void setup_bootmem_allocator(void);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_32_DEFS_H */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index cc6b8e087192..d3aab6f4e59a 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -4,7 +4,7 @@
#include <asm/page_64_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
@@ -17,6 +17,7 @@ extern unsigned long phys_base;
extern unsigned long page_offset_base;
extern unsigned long vmalloc_base;
extern unsigned long vmemmap_base;
+extern unsigned long direct_map_physmem_end;
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
{
@@ -54,11 +55,12 @@ static inline void clear_page(void *page)
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,
"=D" (page),
- "0" (page)
- : "cc", "memory", "rax", "rcx");
+ "D" (page),
+ "cc", "memory", "rax", "rcx");
}
void copy_page(void *to, void *from);
+KCFI_REFERENCE(copy_page);
#ifdef CONFIG_X86_5LEVEL
/*
@@ -93,7 +95,7 @@ static __always_inline unsigned long task_size_max(void)
}
#endif /* CONFIG_X86_5LEVEL */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_X86_VSYSCALL_EMULATION
# define __HAVE_ARCH_GATE_AREA 1
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 06ef25411d62..1faa8f88850a 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PAGE_64_DEFS_H
#define _ASM_X86_PAGE_64_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/kaslr.h>
#endif
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 9da9c8a2f1df..9f77bf03d747 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -6,10 +6,7 @@
#include <linux/types.h>
#include <linux/mem_encrypt.h>
-/* PAGE_SHIFT determines the page size */
-#define PAGE_SHIFT CONFIG_PAGE_SHIFT
-#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
-#define PAGE_MASK (~(PAGE_SIZE-1))
+#include <vdso/page.h>
#define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1)
@@ -31,10 +28,12 @@
#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC
-#define __PHYSICAL_START ALIGN(CONFIG_PHYSICAL_START, \
- CONFIG_PHYSICAL_ALIGN)
+/* Physical address where kernel should be loaded. */
+#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
+ + (CONFIG_PHYSICAL_ALIGN - 1)) \
+ & ~(CONFIG_PHYSICAL_ALIGN - 1))
-#define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START)
+#define __START_KERNEL (__START_KERNEL_map + LOAD_PHYSICAL_ADDR)
#ifdef CONFIG_X86_64
#include <asm/page_64_types.h>
@@ -44,7 +43,7 @@
#define IOREMAP_MAX_ORDER (PMD_SHIFT)
#endif /* CONFIG_X86_64 */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK
extern phys_addr_t physical_mask;
@@ -67,6 +66,6 @@ bool pfn_range_is_mapped(unsigned long start_pfn, unsigned long end_pfn);
extern void initmem_init(void);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PAGE_DEFS_H */
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index d4eb9e1d61b8..c4c23190925c 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -6,7 +6,7 @@
#include <asm/paravirt_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct mm_struct;
#endif
@@ -15,7 +15,7 @@ struct mm_struct;
#include <asm/asm.h>
#include <asm/nospec-branch.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/bug.h>
#include <linux/types.h>
#include <linux/cpumask.h>
@@ -91,11 +91,6 @@ static inline void __flush_tlb_multi(const struct cpumask *cpumask,
PVOP_VCALL2(mmu.flush_tlb_multi, cpumask, info);
}
-static inline void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- PVOP_VCALL2(mmu.tlb_remove_table, tlb, table);
-}
-
static inline void paravirt_arch_exit_mmap(struct mm_struct *mm)
{
PVOP_VCALL1(mmu.exit_mmap, mm);
@@ -107,6 +102,16 @@ static inline void notify_page_enc_status_changed(unsigned long pfn,
PVOP_VCALL3(mmu.notify_page_enc_status_changed, pfn, npages, enc);
}
+static __always_inline void arch_safe_halt(void)
+{
+ PVOP_VCALL0(irq.safe_halt);
+}
+
+static inline void halt(void)
+{
+ PVOP_VCALL0(irq.halt);
+}
+
#ifdef CONFIG_PARAVIRT_XXL
static inline void load_sp0(unsigned long sp0)
{
@@ -170,23 +175,6 @@ static inline void __write_cr4(unsigned long x)
PVOP_VCALL1(cpu.write_cr4, x);
}
-static __always_inline void arch_safe_halt(void)
-{
- PVOP_VCALL0(irq.safe_halt);
-}
-
-static inline void halt(void)
-{
- PVOP_VCALL0(irq.halt);
-}
-
-extern noinstr void pv_native_wbinvd(void);
-
-static __always_inline void wbinvd(void)
-{
- PVOP_ALT_VCALL0(cpu.wbinvd, "wbinvd", ALT_NOT_XEN);
-}
-
static inline u64 paravirt_read_msr(unsigned msr)
{
return PVOP_CALL1(u64, cpu.read_msr, msr);
@@ -727,7 +715,7 @@ static __always_inline unsigned long arch_local_irq_save(void)
extern void default_banner(void);
void native_pv_lock_init(void) __init;
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#ifdef CONFIG_X86_64
#ifdef CONFIG_PARAVIRT_XXL
@@ -747,18 +735,18 @@ void native_pv_lock_init(void) __init;
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#else /* CONFIG_PARAVIRT */
# define default_banner x86_init_noop
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
static inline void native_pv_lock_init(void)
{
}
#endif
#endif /* !CONFIG_PARAVIRT */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifndef CONFIG_PARAVIRT_XXL
static inline void paravirt_enter_mmap(struct mm_struct *mm)
{
@@ -776,5 +764,5 @@ static inline void paravirt_set_cap(void)
{
}
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PARAVIRT_H */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 8d4fbe1be489..631c306ce1ff 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -4,7 +4,7 @@
#ifdef CONFIG_PARAVIRT
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/desc_defs.h>
@@ -86,8 +86,6 @@ struct pv_cpu_ops {
void (*update_io_bitmap)(void);
#endif
- void (*wbinvd)(void);
-
/* cpuid emulation, mostly so that caps bits can be disabled */
void (*cpuid)(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx);
@@ -122,10 +120,9 @@ struct pv_irq_ops {
struct paravirt_callee_save save_fl;
struct paravirt_callee_save irq_disable;
struct paravirt_callee_save irq_enable;
-
+#endif
void (*safe_halt)(void);
void (*halt)(void);
-#endif
} __no_randomize_layout;
struct pv_mmu_ops {
@@ -136,8 +133,6 @@ struct pv_mmu_ops {
void (*flush_tlb_multi)(const struct cpumask *cpus,
const struct flush_tlb_info *info);
- void (*tlb_remove_table)(struct mmu_gather *tlb, void *table);
-
/* Hook for intercepting the destruction of an mm_struct. */
void (*exit_mmap)(struct mm_struct *mm);
void (*notify_page_enc_status_changed)(unsigned long pfn, int npages, bool enc);
@@ -244,9 +239,17 @@ extern struct paravirt_patch_template pv_ops;
#define paravirt_ptr(op) [paravirt_opptr] "m" (pv_ops.op)
-int paravirt_disable_iospace(void);
-
-/* This generates an indirect call based on the operation type number. */
+/*
+ * This generates an indirect call based on the operation type number.
+ *
+ * Since alternatives run after enabling CET/IBT -- the latter setting/clearing
+ * capabilities and the former requiring all capabilities being finalized --
+ * these indirect calls are subject to IBT and the paravirt stubs should have
+ * ENDBR on.
+ *
+ * OTOH since this is effectively a __nocfi indirect call, the paravirt stubs
+ * don't need to bother with CFI prefixes.
+ */
#define PARAVIRT_CALL \
ANNOTATE_RETPOLINE_SAFE \
"call *%[paravirt_opptr];"
@@ -521,7 +524,7 @@ unsigned long pv_native_read_cr2(void);
#define paravirt_nop ((void *)nop_func)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define ALT_NOT_XEN ALT_NOT(X86_FEATURE_XENPV)
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 44958ebaf626..5fe314a2e73e 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -3,31 +3,26 @@
#define _ASM_X86_PERCPU_H
#ifdef CONFIG_X86_64
-#define __percpu_seg gs
-#define __percpu_rel (%rip)
+# define __percpu_seg gs
+# define __percpu_rel (%rip)
#else
-#define __percpu_seg fs
-#define __percpu_rel
+# define __percpu_seg fs
+# define __percpu_rel
#endif
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#ifdef CONFIG_SMP
-#define __percpu %__percpu_seg:
+# define __percpu %__percpu_seg:
#else
-#define __percpu
+# define __percpu
#endif
#define PER_CPU_VAR(var) __percpu(var)__percpu_rel
-#ifdef CONFIG_X86_64_SMP
-#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
-#else
-#define INIT_PER_CPU_VAR(var) var
-#endif
-
-#else /* ...!ASSEMBLY */
+#else /* !__ASSEMBLY__: */
+#include <linux/args.h>
#include <linux/build_bug.h>
#include <linux/stringify.h>
#include <asm/asm.h>
@@ -37,19 +32,14 @@
#ifdef CONFIG_CC_HAS_NAMED_AS
#ifdef __CHECKER__
-#define __seg_gs __attribute__((address_space(__seg_gs)))
-#define __seg_fs __attribute__((address_space(__seg_fs)))
-#endif
-
-#ifdef CONFIG_X86_64
-#define __percpu_seg_override __seg_gs
-#else
-#define __percpu_seg_override __seg_fs
+# define __seg_gs __attribute__((address_space(__seg_gs)))
+# define __seg_fs __attribute__((address_space(__seg_fs)))
#endif
+#define __percpu_seg_override CONCATENATE(__seg_, __percpu_seg)
#define __percpu_prefix ""
-#else /* CONFIG_CC_HAS_NAMED_AS */
+#else /* !CONFIG_CC_HAS_NAMED_AS: */
#define __percpu_seg_override
#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
@@ -59,40 +49,34 @@
#define __force_percpu_prefix "%%"__stringify(__percpu_seg)":"
#define __my_cpu_offset this_cpu_read(this_cpu_off)
-#ifdef CONFIG_USE_X86_SEG_SUPPORT
-/*
- * Efficient implementation for cases in which the compiler supports
- * named address spaces. Allows the compiler to perform additional
- * optimizations that can save more instructions.
- */
-#define arch_raw_cpu_ptr(ptr) \
-({ \
- unsigned long tcp_ptr__; \
- tcp_ptr__ = __raw_cpu_read(, this_cpu_off); \
- \
- tcp_ptr__ += (unsigned long)(ptr); \
- (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
-})
-#else /* CONFIG_USE_X86_SEG_SUPPORT */
/*
* Compared to the generic __my_cpu_offset version, the following
* saves one instruction and avoids clobbering a temp register.
+ *
+ * arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit
+ * kernel, because games are played with CONFIG_X86_64 there and
+ * sizeof(this_cpu_off) becames 4.
*/
-#define arch_raw_cpu_ptr(ptr) \
-({ \
- unsigned long tcp_ptr__; \
- asm ("mov " __percpu_arg(1) ", %0" \
- : "=r" (tcp_ptr__) \
- : "m" (__my_cpu_var(this_cpu_off))); \
- \
- tcp_ptr__ += (unsigned long)(ptr); \
- (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
+#ifndef BUILD_VDSO32_64
+#define arch_raw_cpu_ptr(_ptr) \
+({ \
+ unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \
+ \
+ tcp_ptr__ += (__force unsigned long)(_ptr); \
+ (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)tcp_ptr__; \
})
-#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+#else
+#define arch_raw_cpu_ptr(_ptr) \
+({ \
+ BUILD_BUG(); \
+ (TYPEOF_UNQUAL(*(_ptr)) __force __kernel *)0; \
+})
+#endif
#define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel
-#else /* CONFIG_SMP */
+#else /* !CONFIG_SMP: */
+
#define __percpu_seg_override
#define __percpu_prefix ""
#define __force_percpu_prefix ""
@@ -101,91 +85,147 @@
#endif /* CONFIG_SMP */
-#define __my_cpu_type(var) typeof(var) __percpu_seg_override
-#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr)
-#define __my_cpu_var(var) (*__my_cpu_ptr(&var))
+#if defined(CONFIG_USE_X86_SEG_SUPPORT) && defined(USE_TYPEOF_UNQUAL)
+# define __my_cpu_type(var) typeof(var)
+# define __my_cpu_ptr(ptr) (ptr)
+# define __my_cpu_var(var) (var)
+
+# define __percpu_qual __percpu_seg_override
+#else
+# define __my_cpu_type(var) typeof(var) __percpu_seg_override
+# define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr)
+# define __my_cpu_var(var) (*__my_cpu_ptr(&(var)))
+#endif
+
#define __percpu_arg(x) __percpu_prefix "%" #x
#define __force_percpu_arg(x) __force_percpu_prefix "%" #x
/*
- * Initialized pointers to per-cpu variables needed for the boot
- * processor need to use these macros to get the proper address
- * offset from __per_cpu_load on SMP.
- *
- * There also must be an entry in vmlinux_64.lds.S
+ * For arch-specific code, we can use direct single-insn ops (they
+ * don't give an lvalue though).
*/
-#define DECLARE_INIT_PER_CPU(var) \
- extern typeof(var) init_per_cpu_var(var)
-#ifdef CONFIG_X86_64_SMP
-#define init_per_cpu_var(var) init_per_cpu__##var
-#else
-#define init_per_cpu_var(var) var
-#endif
+#define __pcpu_type_1 u8
+#define __pcpu_type_2 u16
+#define __pcpu_type_4 u32
+#define __pcpu_type_8 u64
-/* For arch-specific code, we can use direct single-insn ops (they
- * don't give an lvalue though). */
+#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
+#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
+#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
+#define __pcpu_cast_8(val) ((u64)(val))
-#define __pcpu_type_1 u8
-#define __pcpu_type_2 u16
-#define __pcpu_type_4 u32
-#define __pcpu_type_8 u64
+#define __pcpu_op_1(op) op "b "
+#define __pcpu_op_2(op) op "w "
+#define __pcpu_op_4(op) op "l "
+#define __pcpu_op_8(op) op "q "
-#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
-#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
-#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
-#define __pcpu_cast_8(val) ((u64)(val))
+#define __pcpu_reg_1(mod, x) mod "q" (x)
+#define __pcpu_reg_2(mod, x) mod "r" (x)
+#define __pcpu_reg_4(mod, x) mod "r" (x)
+#define __pcpu_reg_8(mod, x) mod "r" (x)
-#define __pcpu_op1_1(op, dst) op "b " dst
-#define __pcpu_op1_2(op, dst) op "w " dst
-#define __pcpu_op1_4(op, dst) op "l " dst
-#define __pcpu_op1_8(op, dst) op "q " dst
+#define __pcpu_reg_imm_1(x) "qi" (x)
+#define __pcpu_reg_imm_2(x) "ri" (x)
+#define __pcpu_reg_imm_4(x) "ri" (x)
+#define __pcpu_reg_imm_8(x) "re" (x)
-#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
-#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
-#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
-#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
+#ifdef CONFIG_USE_X86_SEG_SUPPORT
-#define __pcpu_reg_1(mod, x) mod "q" (x)
-#define __pcpu_reg_2(mod, x) mod "r" (x)
-#define __pcpu_reg_4(mod, x) mod "r" (x)
-#define __pcpu_reg_8(mod, x) mod "r" (x)
+#define __raw_cpu_read(size, qual, pcp) \
+({ \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \
+})
-#define __pcpu_reg_imm_1(x) "qi" (x)
-#define __pcpu_reg_imm_2(x) "ri" (x)
-#define __pcpu_reg_imm_4(x) "ri" (x)
-#define __pcpu_reg_imm_8(x) "re" (x)
+#define __raw_cpu_write(size, qual, pcp, val) \
+do { \
+ *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \
+} while (0)
+
+#define __raw_cpu_read_const(pcp) __raw_cpu_read(, , pcp)
+
+#else /* !CONFIG_USE_X86_SEG_SUPPORT: */
-#define percpu_to_op(size, qual, op, _var, _val) \
+#define __raw_cpu_read(size, qual, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ \
+ asm qual (__pcpu_op_##size("mov") \
+ __percpu_arg([var]) ", %[val]" \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "m" (__my_cpu_var(_var))); \
+ \
+ (typeof(_var))(unsigned long) pfo_val__; \
+})
+
+#define __raw_cpu_write(size, qual, _var, _val) \
do { \
__pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ \
if (0) { \
- typeof(_var) pto_tmp__; \
+ TYPEOF_UNQUAL(_var) pto_tmp__; \
pto_tmp__ = (_val); \
(void)pto_tmp__; \
} \
- asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
- : [var] "+m" (__my_cpu_var(_var)) \
+ asm qual (__pcpu_op_##size("mov") "%[val], " \
+ __percpu_arg([var]) \
+ : [var] "=m" (__my_cpu_var(_var)) \
: [val] __pcpu_reg_imm_##size(pto_val__)); \
} while (0)
+/*
+ * The generic per-CPU infrastrucutre is not suitable for
+ * reading const-qualified variables.
+ */
+#define __raw_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
+
+#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+
+#define __raw_cpu_read_stable(size, _var) \
+({ \
+ __pcpu_type_##size pfo_val__; \
+ \
+ asm(__pcpu_op_##size("mov") \
+ __force_percpu_arg(a[var]) ", %[val]" \
+ : [val] __pcpu_reg_##size("=", pfo_val__) \
+ : [var] "i" (&(_var))); \
+ \
+ (typeof(_var))(unsigned long) pfo_val__; \
+})
+
#define percpu_unary_op(size, qual, op, _var) \
({ \
- asm qual (__pcpu_op1_##size(op, __percpu_arg([var])) \
+ asm qual (__pcpu_op_##size(op) __percpu_arg([var]) \
: [var] "+m" (__my_cpu_var(_var))); \
})
+#define percpu_binary_op(size, qual, op, _var, _val) \
+do { \
+ __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
+ \
+ if (0) { \
+ TYPEOF_UNQUAL(_var) pto_tmp__; \
+ pto_tmp__ = (_val); \
+ (void)pto_tmp__; \
+ } \
+ asm qual (__pcpu_op_##size(op) "%[val], " __percpu_arg([var]) \
+ : [var] "+m" (__my_cpu_var(_var)) \
+ : [val] __pcpu_reg_imm_##size(pto_val__)); \
+} while (0)
+
/*
- * Generate a percpu add to memory instruction and optimize code
+ * Generate a per-CPU add to memory instruction and optimize code
* if one is added or subtracted.
*/
#define percpu_add_op(size, qual, var, val) \
do { \
- const int pao_ID__ = (__builtin_constant_p(val) && \
- ((val) == 1 || (val) == -1)) ? \
- (int)(val) : 0; \
+ const int pao_ID__ = \
+ (__builtin_constant_p(val) && \
+ ((val) == 1 || \
+ (val) == (typeof(val))-1)) ? (int)(val) : 0; \
+ \
if (0) { \
- typeof(var) pao_tmp__; \
+ TYPEOF_UNQUAL(var) pao_tmp__; \
pao_tmp__ = (val); \
(void)pao_tmp__; \
} \
@@ -194,35 +234,18 @@ do { \
else if (pao_ID__ == -1) \
percpu_unary_op(size, qual, "dec", var); \
else \
- percpu_to_op(size, qual, "add", var, val); \
+ percpu_binary_op(size, qual, "add", var, val); \
} while (0)
-#define percpu_from_op(size, qual, op, _var) \
-({ \
- __pcpu_type_##size pfo_val__; \
- asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \
- : [val] __pcpu_reg_##size("=", pfo_val__) \
- : [var] "m" (__my_cpu_var(_var))); \
- (typeof(_var))(unsigned long) pfo_val__; \
-})
-
-#define percpu_stable_op(size, op, _var) \
-({ \
- __pcpu_type_##size pfo_val__; \
- asm(__pcpu_op2_##size(op, __force_percpu_arg(a[var]), "%[val]") \
- : [val] __pcpu_reg_##size("=", pfo_val__) \
- : [var] "i" (&(_var))); \
- (typeof(_var))(unsigned long) pfo_val__; \
-})
-
/*
* Add return operation
*/
#define percpu_add_return_op(size, qual, _var, _val) \
({ \
__pcpu_type_##size paro_tmp__ = __pcpu_cast_##size(_val); \
- asm qual (__pcpu_op2_##size("xadd", "%[tmp]", \
- __percpu_arg([var])) \
+ \
+ asm qual (__pcpu_op_##size("xadd") "%[tmp], " \
+ __percpu_arg([var]) \
: [tmp] __pcpu_reg_##size("+", paro_tmp__), \
[var] "+m" (__my_cpu_var(_var)) \
: : "memory"); \
@@ -230,41 +253,48 @@ do { \
})
/*
- * xchg is implemented using cmpxchg without a lock prefix. xchg is
- * expensive due to the implied lock prefix. The processor cannot prefetch
- * cachelines if xchg is used.
+ * raw_cpu_xchg() can use a load-store since
+ * it is not required to be IRQ-safe.
*/
-#define percpu_xchg_op(size, qual, _var, _nval) \
+#define raw_percpu_xchg_op(_var, _nval) \
({ \
- __pcpu_type_##size pxo_old__; \
- __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \
- asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \
- "%[oval]") \
- "\n1:\t" \
- __pcpu_op2_##size("cmpxchg", "%[nval]", \
- __percpu_arg([var])) \
- "\n\tjnz 1b" \
- : [oval] "=&a" (pxo_old__), \
- [var] "+m" (__my_cpu_var(_var)) \
- : [nval] __pcpu_reg_##size(, pxo_new__) \
- : "memory"); \
- (typeof(_var))(unsigned long) pxo_old__; \
+ TYPEOF_UNQUAL(_var) pxo_old__ = raw_cpu_read(_var); \
+ \
+ raw_cpu_write(_var, _nval); \
+ \
+ pxo_old__; \
+})
+
+/*
+ * this_cpu_xchg() is implemented using CMPXCHG without a LOCK prefix.
+ * XCHG is expensive due to the implied LOCK prefix. The processor
+ * cannot prefetch cachelines if XCHG is used.
+ */
+#define this_percpu_xchg_op(_var, _nval) \
+({ \
+ TYPEOF_UNQUAL(_var) pxo_old__ = this_cpu_read(_var); \
+ \
+ do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \
+ \
+ pxo_old__; \
})
/*
- * cmpxchg has no such implied lock semantics as a result it is much
- * more efficient for cpu local operations.
+ * CMPXCHG has no such implied lock semantics as a result it is much
+ * more efficient for CPU-local operations.
*/
#define percpu_cmpxchg_op(size, qual, _var, _oval, _nval) \
({ \
__pcpu_type_##size pco_old__ = __pcpu_cast_##size(_oval); \
__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
- asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
- __percpu_arg([var])) \
+ \
+ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
+ __percpu_arg([var]) \
: [oval] "+a" (pco_old__), \
[var] "+m" (__my_cpu_var(_var)) \
: [nval] __pcpu_reg_##size(, pco_new__) \
: "memory"); \
+ \
(typeof(_var))(unsigned long) pco_old__; \
})
@@ -274,8 +304,9 @@ do { \
__pcpu_type_##size *pco_oval__ = (__pcpu_type_##size *)(_ovalp); \
__pcpu_type_##size pco_old__ = *pco_oval__; \
__pcpu_type_##size pco_new__ = __pcpu_cast_##size(_nval); \
- asm qual (__pcpu_op2_##size("cmpxchg", "%[nval]", \
- __percpu_arg([var])) \
+ \
+ asm qual (__pcpu_op_##size("cmpxchg") "%[nval], " \
+ __percpu_arg([var]) \
CC_SET(z) \
: CC_OUT(z) (success), \
[oval] "+a" (pco_old__), \
@@ -284,10 +315,12 @@ do { \
: "memory"); \
if (unlikely(!success)) \
*pco_oval__ = pco_old__; \
+ \
likely(success); \
})
#if defined(CONFIG_X86_32) && !defined(CONFIG_UML)
+
#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval) \
({ \
union { \
@@ -300,21 +333,20 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
- "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- : [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
\
old__.var; \
})
-#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
-#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, , pcp, oval, nval)
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
#define percpu_try_cmpxchg64_op(size, qual, _var, _ovalp, _nval) \
({ \
@@ -330,29 +362,30 @@ do { \
old__.var = *_oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
- "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
- CC_SET(z) \
- : CC_OUT(z) (success), \
- [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg8b_emu", \
+ "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
+ CC_SET(z) \
+ : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
if (unlikely(!success)) \
*_oval = old__.var; \
+ \
likely(success); \
})
#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, , pcp, ovalp, nval)
#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg64_op(8, volatile, pcp, ovalp, nval)
-#endif
+
+#endif /* defined(CONFIG_X86_32) && !defined(CONFIG_UML) */
#ifdef CONFIG_X86_64
-#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
-#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
+#define raw_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval);
+#define this_cpu_cmpxchg64(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
#define raw_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval);
#define this_cpu_try_cmpxchg64(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval);
@@ -369,21 +402,20 @@ do { \
old__.var = _oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
- "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- : [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ : ALT_OUTPUT_SP([var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
\
old__.var; \
})
-#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
-#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
+#define raw_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, , pcp, oval, nval)
+#define this_cpu_cmpxchg128(pcp, oval, nval) percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
#define percpu_try_cmpxchg128_op(size, qual, _var, _ovalp, _nval) \
({ \
@@ -399,225 +431,177 @@ do { \
old__.var = *_oval; \
new__.var = _nval; \
\
- asm qual (ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
- "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
- CC_SET(z) \
- : CC_OUT(z) (success), \
- [var] "+m" (__my_cpu_var(_var)), \
- "+a" (old__.low), \
- "+d" (old__.high) \
- : "b" (new__.low), \
- "c" (new__.high), \
- "S" (&(_var)) \
- : "memory"); \
+ asm_inline qual ( \
+ ALTERNATIVE("call this_cpu_cmpxchg16b_emu", \
+ "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
+ CC_SET(z) \
+ : ALT_OUTPUT_SP(CC_OUT(z) (success), \
+ [var] "+m" (__my_cpu_var(_var)), \
+ "+a" (old__.low), "+d" (old__.high)) \
+ : "b" (new__.low), "c" (new__.high), \
+ "S" (&(_var)) \
+ : "memory"); \
if (unlikely(!success)) \
*_oval = old__.var; \
+ \
likely(success); \
})
#define raw_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, , pcp, ovalp, nval)
#define this_cpu_try_cmpxchg128(pcp, ovalp, nval) percpu_try_cmpxchg128_op(16, volatile, pcp, ovalp, nval)
-#endif
+
+#endif /* CONFIG_X86_64 */
+
+#define raw_cpu_read_1(pcp) __raw_cpu_read(1, , pcp)
+#define raw_cpu_read_2(pcp) __raw_cpu_read(2, , pcp)
+#define raw_cpu_read_4(pcp) __raw_cpu_read(4, , pcp)
+#define raw_cpu_write_1(pcp, val) __raw_cpu_write(1, , pcp, val)
+#define raw_cpu_write_2(pcp, val) __raw_cpu_write(2, , pcp, val)
+#define raw_cpu_write_4(pcp, val) __raw_cpu_write(4, , pcp, val)
+
+#define this_cpu_read_1(pcp) __raw_cpu_read(1, volatile, pcp)
+#define this_cpu_read_2(pcp) __raw_cpu_read(2, volatile, pcp)
+#define this_cpu_read_4(pcp) __raw_cpu_read(4, volatile, pcp)
+#define this_cpu_write_1(pcp, val) __raw_cpu_write(1, volatile, pcp, val)
+#define this_cpu_write_2(pcp, val) __raw_cpu_write(2, volatile, pcp, val)
+#define this_cpu_write_4(pcp, val) __raw_cpu_write(4, volatile, pcp, val)
+
+#define this_cpu_read_stable_1(pcp) __raw_cpu_read_stable(1, pcp)
+#define this_cpu_read_stable_2(pcp) __raw_cpu_read_stable(2, pcp)
+#define this_cpu_read_stable_4(pcp) __raw_cpu_read_stable(4, pcp)
+
+#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
+#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
+#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
+#define raw_cpu_and_1(pcp, val) percpu_binary_op(1, , "and", (pcp), val)
+#define raw_cpu_and_2(pcp, val) percpu_binary_op(2, , "and", (pcp), val)
+#define raw_cpu_and_4(pcp, val) percpu_binary_op(4, , "and", (pcp), val)
+#define raw_cpu_or_1(pcp, val) percpu_binary_op(1, , "or", (pcp), val)
+#define raw_cpu_or_2(pcp, val) percpu_binary_op(2, , "or", (pcp), val)
+#define raw_cpu_or_4(pcp, val) percpu_binary_op(4, , "or", (pcp), val)
+#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
+#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
+
+#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
+#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
+#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
+#define this_cpu_and_1(pcp, val) percpu_binary_op(1, volatile, "and", (pcp), val)
+#define this_cpu_and_2(pcp, val) percpu_binary_op(2, volatile, "and", (pcp), val)
+#define this_cpu_and_4(pcp, val) percpu_binary_op(4, volatile, "and", (pcp), val)
+#define this_cpu_or_1(pcp, val) percpu_binary_op(1, volatile, "or", (pcp), val)
+#define this_cpu_or_2(pcp, val) percpu_binary_op(2, volatile, "or", (pcp), val)
+#define this_cpu_or_4(pcp, val) percpu_binary_op(4, volatile, "or", (pcp), val)
+#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval)
+
+#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
+#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
+#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
+#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval)
+#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, , pcp, ovalp, nval)
+#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, , pcp, ovalp, nval)
+
+#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
+#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
+#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
+#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
+#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
+#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, volatile, pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, volatile, pcp, ovalp, nval)
+#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, volatile, pcp, ovalp, nval)
/*
- * this_cpu_read() makes gcc load the percpu variable every time it is
- * accessed while this_cpu_read_stable() allows the value to be cached.
- * this_cpu_read_stable() is more efficient and can be used if its value
- * is guaranteed to be valid across cpus. The current users include
- * pcpu_hot.current_task and pcpu_hot.top_of_stack, both of which are
- * actually per-thread variables implemented as per-CPU variables and
- * thus stable for the duration of the respective task.
+ * Per-CPU atomic 64-bit operations are only available under 64-bit kernels.
+ * 32-bit kernels must fall back to generic operations.
*/
-#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp)
-#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp)
-#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp)
-#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp)
-#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
+#ifdef CONFIG_X86_64
-#ifdef CONFIG_USE_X86_SEG_SUPPORT
+#define raw_cpu_read_8(pcp) __raw_cpu_read(8, , pcp)
+#define raw_cpu_write_8(pcp, val) __raw_cpu_write(8, , pcp, val)
-#define __raw_cpu_read(qual, pcp) \
-({ \
- *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)); \
-})
-
-#define __raw_cpu_write(qual, pcp, val) \
-do { \
- *(qual __my_cpu_type(pcp) *)__my_cpu_ptr(&(pcp)) = (val); \
-} while (0)
+#define this_cpu_read_8(pcp) __raw_cpu_read(8, volatile, pcp)
+#define this_cpu_write_8(pcp, val) __raw_cpu_write(8, volatile, pcp, val)
-#define raw_cpu_read_1(pcp) __raw_cpu_read(, pcp)
-#define raw_cpu_read_2(pcp) __raw_cpu_read(, pcp)
-#define raw_cpu_read_4(pcp) __raw_cpu_read(, pcp)
-#define raw_cpu_write_1(pcp, val) __raw_cpu_write(, pcp, val)
-#define raw_cpu_write_2(pcp, val) __raw_cpu_write(, pcp, val)
-#define raw_cpu_write_4(pcp, val) __raw_cpu_write(, pcp, val)
+#define this_cpu_read_stable_8(pcp) __raw_cpu_read_stable(8, pcp)
-#define this_cpu_read_1(pcp) __raw_cpu_read(volatile, pcp)
-#define this_cpu_read_2(pcp) __raw_cpu_read(volatile, pcp)
-#define this_cpu_read_4(pcp) __raw_cpu_read(volatile, pcp)
-#define this_cpu_write_1(pcp, val) __raw_cpu_write(volatile, pcp, val)
-#define this_cpu_write_2(pcp, val) __raw_cpu_write(volatile, pcp, val)
-#define this_cpu_write_4(pcp, val) __raw_cpu_write(volatile, pcp, val)
+#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
+#define raw_cpu_and_8(pcp, val) percpu_binary_op(8, , "and", (pcp), val)
+#define raw_cpu_or_8(pcp, val) percpu_binary_op(8, , "or", (pcp), val)
+#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
+#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
+#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
+#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
-#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) __raw_cpu_read(, pcp)
-#define raw_cpu_write_8(pcp, val) __raw_cpu_write(, pcp, val)
+#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
+#define this_cpu_and_8(pcp, val) percpu_binary_op(8, volatile, "and", (pcp), val)
+#define this_cpu_or_8(pcp, val) percpu_binary_op(8, volatile, "or", (pcp), val)
+#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
+#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval)
+#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
+#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
-#define this_cpu_read_8(pcp) __raw_cpu_read(volatile, pcp)
-#define this_cpu_write_8(pcp, val) __raw_cpu_write(volatile, pcp, val)
-#endif
+#define raw_cpu_read_long(pcp) raw_cpu_read_8(pcp)
-#define this_cpu_read_const(pcp) __raw_cpu_read(, pcp)
-#else /* CONFIG_USE_X86_SEG_SUPPORT */
+#else /* !CONFIG_X86_64: */
-#define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp)
-#define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp)
-#define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp)
-#define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val)
-#define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val)
-#define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val)
+/* There is no generic 64-bit read stable operation for 32-bit targets. */
+#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
-#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
-#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
-#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
-#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
-#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
-#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
+#define raw_cpu_read_long(pcp) raw_cpu_read_4(pcp)
-#ifdef CONFIG_X86_64
-#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
-#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
+#endif /* CONFIG_X86_64 */
-#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
-#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
-#endif
+#define this_cpu_read_const(pcp) __raw_cpu_read_const(pcp)
/*
- * The generic per-cpu infrastrucutre is not suitable for
- * reading const-qualified variables.
+ * this_cpu_read() makes the compiler load the per-CPU variable every time
+ * it is accessed while this_cpu_read_stable() allows the value to be cached.
+ * this_cpu_read_stable() is more efficient and can be used if its value
+ * is guaranteed to be valid across CPUs. The current users include
+ * current_task and cpu_current_top_of_stack, both of which are
+ * actually per-thread variables implemented as per-CPU variables and
+ * thus stable for the duration of the respective task.
*/
-#define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; })
-#endif /* CONFIG_USE_X86_SEG_SUPPORT */
+#define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp)
-#define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val)
-#define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val)
-#define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val)
-#define raw_cpu_and_1(pcp, val) percpu_to_op(1, , "and", (pcp), val)
-#define raw_cpu_and_2(pcp, val) percpu_to_op(2, , "and", (pcp), val)
-#define raw_cpu_and_4(pcp, val) percpu_to_op(4, , "and", (pcp), val)
-#define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val)
-#define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val)
-#define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val)
-
-/*
- * raw_cpu_xchg() can use a load-store since it is not required to be
- * IRQ-safe.
- */
-#define raw_percpu_xchg_op(var, nval) \
+#define x86_this_cpu_constant_test_bit(_nr, _var) \
({ \
- typeof(var) pxo_ret__ = raw_cpu_read(var); \
- raw_cpu_write(var, (nval)); \
- pxo_ret__; \
+ unsigned long __percpu *addr__ = \
+ (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \
+ \
+ !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \
})
-#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
-#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
-
-#define this_cpu_add_1(pcp, val) percpu_add_op(1, volatile, (pcp), val)
-#define this_cpu_add_2(pcp, val) percpu_add_op(2, volatile, (pcp), val)
-#define this_cpu_add_4(pcp, val) percpu_add_op(4, volatile, (pcp), val)
-#define this_cpu_and_1(pcp, val) percpu_to_op(1, volatile, "and", (pcp), val)
-#define this_cpu_and_2(pcp, val) percpu_to_op(2, volatile, "and", (pcp), val)
-#define this_cpu_and_4(pcp, val) percpu_to_op(4, volatile, "and", (pcp), val)
-#define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val)
-#define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val)
-#define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val)
-#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval)
-#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval)
-#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval)
-
-#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val)
-#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val)
-#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(4, , pcp, val)
-#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, , pcp, oval, nval)
-#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, , pcp, oval, nval)
-#define raw_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, , pcp, ovalp, nval)
-#define raw_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, , pcp, ovalp, nval)
-#define raw_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, , pcp, ovalp, nval)
-
-#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(1, volatile, pcp, val)
-#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(2, volatile, pcp, val)
-#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(4, volatile, pcp, val)
-#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(1, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(2, volatile, pcp, oval, nval)
-#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(4, volatile, pcp, oval, nval)
-#define this_cpu_try_cmpxchg_1(pcp, ovalp, nval) percpu_try_cmpxchg_op(1, volatile, pcp, ovalp, nval)
-#define this_cpu_try_cmpxchg_2(pcp, ovalp, nval) percpu_try_cmpxchg_op(2, volatile, pcp, ovalp, nval)
-#define this_cpu_try_cmpxchg_4(pcp, ovalp, nval) percpu_try_cmpxchg_op(4, volatile, pcp, ovalp, nval)
-
-/*
- * Per cpu atomic 64 bit operations are only available under 64 bit.
- * 32 bit must fall back to generic operations.
- */
-#ifdef CONFIG_X86_64
-#define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val)
-#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
-#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
-#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(8, , pcp, val)
-#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
-#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, , pcp, oval, nval)
-#define raw_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, , pcp, ovalp, nval)
-
-#define this_cpu_add_8(pcp, val) percpu_add_op(8, volatile, (pcp), val)
-#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
-#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
-#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val)
-#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval)
-#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval)
-#define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval)
-#endif
-
-static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
- const unsigned long __percpu *addr)
-{
- unsigned long __percpu *a =
- (unsigned long __percpu *)addr + nr / BITS_PER_LONG;
-
-#ifdef CONFIG_X86_64
- return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
-#else
- return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
-#endif
-}
-
-static inline bool x86_this_cpu_variable_test_bit(int nr,
- const unsigned long __percpu *addr)
-{
- bool oldbit;
-
- asm volatile("btl "__percpu_arg(2)",%1"
- CC_SET(c)
- : CC_OUT(c) (oldbit)
- : "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr));
-
- return oldbit;
-}
+#define x86_this_cpu_variable_test_bit(_nr, _var) \
+({ \
+ bool oldbit; \
+ \
+ asm volatile("btl %[nr], " __percpu_arg([var]) \
+ CC_SET(c) \
+ : CC_OUT(c) (oldbit) \
+ : [var] "m" (__my_cpu_var(_var)), \
+ [nr] "rI" (_nr)); \
+ oldbit; \
+})
-#define x86_this_cpu_test_bit(nr, addr) \
- (__builtin_constant_p((nr)) \
- ? x86_this_cpu_constant_test_bit((nr), (addr)) \
- : x86_this_cpu_variable_test_bit((nr), (addr)))
+#define x86_this_cpu_test_bit(_nr, _var) \
+ (__builtin_constant_p(_nr) \
+ ? x86_this_cpu_constant_test_bit(_nr, _var) \
+ : x86_this_cpu_variable_test_bit(_nr, _var))
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */
-DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, this_cpu_off);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_SMP
@@ -639,46 +623,47 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
{ [0 ... NR_CPUS-1] = _initvalue }; \
__typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
-#define DECLARE_EARLY_PER_CPU(_type, _name) \
- DECLARE_PER_CPU(_type, _name); \
- extern __typeof__(_type) *_name##_early_ptr; \
+#define DECLARE_EARLY_PER_CPU(_type, _name) \
+ DECLARE_PER_CPU(_type, _name); \
+ extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
- DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
- extern __typeof__(_type) *_name##_early_ptr; \
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+ DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
+ extern __typeof__(_type) *_name##_early_ptr; \
extern __typeof__(_type) _name##_early_map[]
-#define early_per_cpu_ptr(_name) (_name##_early_ptr)
-#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
-#define early_per_cpu(_name, _cpu) \
- *(early_per_cpu_ptr(_name) ? \
- &early_per_cpu_ptr(_name)[_cpu] : \
+#define early_per_cpu_ptr(_name) (_name##_early_ptr)
+#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
+
+#define early_per_cpu(_name, _cpu) \
+ *(early_per_cpu_ptr(_name) ? \
+ &early_per_cpu_ptr(_name)[_cpu] : \
&per_cpu(_name, _cpu))
-#else /* !CONFIG_SMP */
-#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
+#else /* !CONFIG_SMP: */
+#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
DEFINE_PER_CPU(_type, _name) = _initvalue
#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue
-#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
+#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
EXPORT_PER_CPU_SYMBOL(_name)
-#define DECLARE_EARLY_PER_CPU(_type, _name) \
+#define DECLARE_EARLY_PER_CPU(_type, _name) \
DECLARE_PER_CPU(_type, _name)
-#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
+#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
DECLARE_PER_CPU_READ_MOSTLY(_type, _name)
-#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
-#define early_per_cpu_ptr(_name) NULL
+#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
+#define early_per_cpu_ptr(_name) NULL
/* no early_per_cpu_map() */
-#endif /* !CONFIG_SMP */
+#endif /* !CONFIG_SMP */
#endif /* _ASM_X86_PERCPU_H */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7f1e17250546..812dac3f79f0 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -32,6 +32,8 @@
#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23)
#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL
#define ARCH_PERFMON_EVENTSEL_BR_CNTR (1ULL << 35)
+#define ARCH_PERFMON_EVENTSEL_EQ (1ULL << 36)
+#define ARCH_PERFMON_EVENTSEL_UMASK2 (0xFFULL << 40)
#define INTEL_FIXED_BITS_MASK 0xFULL
#define INTEL_FIXED_BITS_STRIDE 4
@@ -39,6 +41,7 @@
#define INTEL_FIXED_0_USER (1ULL << 1)
#define INTEL_FIXED_0_ANYTHREAD (1ULL << 2)
#define INTEL_FIXED_0_ENABLE_PMI (1ULL << 3)
+#define INTEL_FIXED_3_METRICS_CLEAR (1ULL << 2)
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
@@ -138,6 +141,12 @@
#define PEBS_DATACFG_XMMS BIT_ULL(2)
#define PEBS_DATACFG_LBRS BIT_ULL(3)
#define PEBS_DATACFG_LBR_SHIFT 24
+#define PEBS_DATACFG_CNTR BIT_ULL(4)
+#define PEBS_DATACFG_CNTR_SHIFT 32
+#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
+#define PEBS_DATACFG_FIX_SHIFT 48
+#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
+#define PEBS_DATACFG_METRICS BIT_ULL(5)
/* Steal the highest bit of pebs_data_cfg for SW usage */
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
@@ -185,9 +194,33 @@ union cpuid10_edx {
* detection/enumeration details:
*/
#define ARCH_PERFMON_EXT_LEAF 0x00000023
-#define ARCH_PERFMON_NUM_COUNTER_LEAF_BIT 0x1
#define ARCH_PERFMON_NUM_COUNTER_LEAF 0x1
+union cpuid35_eax {
+ struct {
+ unsigned int leaf0:1;
+ /* Counters Sub-Leaf */
+ unsigned int cntr_subleaf:1;
+ /* Auto Counter Reload Sub-Leaf */
+ unsigned int acr_subleaf:1;
+ /* Events Sub-Leaf */
+ unsigned int events_subleaf:1;
+ unsigned int reserved:28;
+ } split;
+ unsigned int full;
+};
+
+union cpuid35_ebx {
+ struct {
+ /* UnitMask2 Supported */
+ unsigned int umask2:1;
+ /* EQ-bit Supported */
+ unsigned int eq:1;
+ unsigned int reserved:30;
+ } split;
+ unsigned int full;
+};
+
/*
* Intel Architectural LBR CPUID detection/enumeration details:
*/
@@ -307,6 +340,10 @@ struct x86_pmu_capability {
#define INTEL_PMC_IDX_FIXED_SLOTS (INTEL_PMC_IDX_FIXED + 3)
#define INTEL_PMC_MSK_FIXED_SLOTS (1ULL << INTEL_PMC_IDX_FIXED_SLOTS)
+/* TOPDOWN_BAD_SPECULATION.ALL: fixed counter 4 (Atom only) */
+/* TOPDOWN_FE_BOUND.ALL: fixed counter 5 (Atom only) */
+/* TOPDOWN_RETIRING.ALL: fixed counter 6 (Atom only) */
+
static inline bool use_fixed_pseudo_encoding(u64 code)
{
return !(code & 0xff);
@@ -364,6 +401,9 @@ static inline bool use_fixed_pseudo_encoding(u64 code)
#define INTEL_TD_METRIC_MAX INTEL_TD_METRIC_MEM_BOUND
#define INTEL_TD_METRIC_NUM 8
+#define INTEL_TD_CFG_METRIC_CLEAR_BIT 0
+#define INTEL_TD_CFG_METRIC_CLEAR BIT_ULL(INTEL_TD_CFG_METRIC_CLEAR_BIT)
+
static inline bool is_metric_idx(int idx)
{
return (unsigned)(idx - INTEL_PMC_IDX_METRIC_BASE) < INTEL_TD_METRIC_NUM;
@@ -414,7 +454,9 @@ static inline bool is_topdown_idx(int idx)
*/
struct pebs_basic {
- u64 format_size;
+ u64 format_group:32,
+ retire_latency:16,
+ format_size:16;
u64 ip;
u64 applicable_counters;
u64 tsc;
@@ -423,7 +465,17 @@ struct pebs_basic {
struct pebs_meminfo {
u64 address;
u64 aux;
- u64 latency;
+ union {
+ /* pre Alder Lake */
+ u64 mem_latency;
+ /* Alder Lake and later */
+ struct {
+ u64 instr_latency:16;
+ u64 pad2:16;
+ u64 cache_latency:16;
+ u64 pad3:16;
+ };
+ };
u64 tsx_tuning;
};
@@ -436,6 +488,15 @@ struct pebs_xmm {
u64 xmm[16*2]; /* two entries for each register */
};
+struct pebs_cntr_header {
+ u32 cntr;
+ u32 fixed;
+ u32 metrics;
+ u32 reserved;
+};
+
+#define INTEL_CNTR_METRICS 0x3
+
/*
* AMD Extended Performance Monitoring and Debug cpuid feature detection
*/
@@ -463,6 +524,8 @@ struct pebs_xmm {
#define IBS_CAPS_FETCHCTLEXTD (1U<<9)
#define IBS_CAPS_OPDATA4 (1U<<10)
#define IBS_CAPS_ZEN4 (1U<<11)
+#define IBS_CAPS_OPLDLAT (1U<<12)
+#define IBS_CAPS_OPDTLBPGSIZE (1U<<19)
#define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \
| IBS_CAPS_FETCHSAM \
@@ -488,8 +551,11 @@ struct pebs_xmm {
* The lower 7 bits of the current count are random bits
* preloaded by hardware and ignored in software
*/
+#define IBS_OP_LDLAT_EN (1ULL<<63)
+#define IBS_OP_LDLAT_THRSH (0xFULL<<59)
#define IBS_OP_CUR_CNT (0xFFF80ULL<<32)
#define IBS_OP_CUR_CNT_RAND (0x0007FULL<<32)
+#define IBS_OP_CUR_CNT_EXT_MASK (0x7FULL<<52)
#define IBS_OP_CNT_CTL (1ULL<<19)
#define IBS_OP_VAL (1ULL<<18)
#define IBS_OP_ENABLE (1ULL<<17)
@@ -528,15 +594,17 @@ struct x86_perf_regs {
u64 *xmm_regs;
};
-extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-extern unsigned long perf_misc_flags(struct pt_regs *regs);
-#define perf_misc_flags(regs) perf_misc_flags(regs)
+extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_arch_misc_flags(struct pt_regs *regs);
+extern unsigned long perf_arch_guest_misc_flags(struct pt_regs *regs);
+#define perf_arch_misc_flags(regs) perf_arch_misc_flags(regs)
+#define perf_arch_guest_misc_flags(regs) perf_arch_guest_misc_flags(regs)
#include <asm/stacktrace.h>
/*
- * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
+ * We abuse bit 3 from flags to pass exact information, see
+ * perf_arch_misc_flags() and the comment with PERF_EFLAGS_EXACT.
*/
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index dcd836b59beb..c88691b15f3c 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -6,6 +6,8 @@
#include <linux/mm.h> /* for struct page */
#include <linux/pagemap.h>
+#include <asm/cpufeature.h>
+
#define __HAVE_ARCH_PTE_ALLOC_ONE
#define __HAVE_ARCH_PGD_FREE
#include <asm-generic/pgalloc.h>
@@ -30,20 +32,16 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
#endif
/*
- * Flags to use when allocating a user page table page.
+ * In case of Page Table Isolation active, we acquire two PGDs instead of one.
+ * Being order-1, it is both 8k in size and 8k-aligned. That lets us just
+ * flip bit 12 in a pointer to swap between the two 4k halves.
*/
-extern gfp_t __userpte_alloc_gfp;
-
-#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
-/*
- * Instead of one PGD, we acquire two PGDs. Being order-1, it is
- * both 8k in size and 8k-aligned. That lets us just flip bit 12
- * in a pointer to swap between the two 4k halves.
- */
-#define PGD_ALLOCATION_ORDER 1
-#else
-#define PGD_ALLOCATION_ORDER 0
-#endif
+static inline unsigned int pgd_allocation_order(void)
+{
+ if (cpu_feature_enabled(X86_FEATURE_PTI))
+ return 1;
+ return 0;
+}
/*
* Allocate and free page tables.
@@ -147,24 +145,6 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
}
-static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
- gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
- if (mm == &init_mm)
- gfp &= ~__GFP_ACCOUNT;
- return (p4d_t *)get_zeroed_page(gfp);
-}
-
-static inline void p4d_free(struct mm_struct *mm, p4d_t *p4d)
-{
- if (!pgtable_l5_enabled())
- return;
-
- BUG_ON((unsigned long)p4d & (PAGE_SIZE-1));
- free_page((unsigned long)p4d);
-}
-
extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 7f6ccff0ba72..66425424ce91 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
typedef unsigned long pteval_t;
@@ -16,24 +16,24 @@ typedef union {
pteval_t pte;
pteval_t pte_low;
} pte_t;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define SHARED_KERNEL_PMD 0
#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED
/*
- * traditional i386 two-level paging structure:
+ * Traditional i386 two-level paging structure:
*/
#define PGDIR_SHIFT 22
#define PTRS_PER_PGD 1024
-
/*
- * the i386 is two-level, so we don't really have any
- * PMD directory physically.
+ * The i386 is two-level, so we don't really have any
+ * PMD directory physically:
*/
+#define PTRS_PER_PMD 1
#define PTRS_PER_PTE 1024
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 80911349519e..9d5b257d44e3 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
typedef u64 pteval_t;
@@ -25,7 +25,7 @@ typedef union {
};
pmdval_t pmd;
} pmd_t;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI))
diff --git a/arch/x86/include/asm/pgtable-invert.h b/arch/x86/include/asm/pgtable-invert.h
index a0c1525f1b6f..e12e52ae8083 100644
--- a/arch/x86/include/asm/pgtable-invert.h
+++ b/arch/x86/include/asm/pgtable-invert.h
@@ -2,7 +2,7 @@
#ifndef _ASM_PGTABLE_INVERT_H
#define _ASM_PGTABLE_INVERT_H 1
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* A clear pte value is special, and doesn't get inverted.
@@ -36,6 +36,6 @@ static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
return val;
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 315535ffb258..7bd6bd6df4a1 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -15,7 +15,7 @@
cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \
: (prot))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/spinlock.h>
#include <asm/x86_init.h>
#include <asm/pkru.h>
@@ -120,6 +120,34 @@ extern pmdval_t early_pmd_flags;
#define arch_end_context_switch(prev) do {} while(0)
#endif /* CONFIG_PARAVIRT_XXL */
+static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v | set);
+}
+
+static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
+{
+ pmdval_t v = native_pmd_val(pmd);
+
+ return native_make_pmd(v & ~clear);
+}
+
+static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v | set);
+}
+
+static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
+{
+ pudval_t v = native_pud_val(pud);
+
+ return native_make_pud(v & ~clear);
+}
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
@@ -140,6 +168,11 @@ static inline int pte_young(pte_t pte)
return pte_flags(pte) & _PAGE_ACCESSED;
}
+static inline bool pte_decrypted(pte_t pte)
+{
+ return cc_mkdec(pte_val(pte)) == pte_val(pte);
+}
+
#define pmd_dirty pmd_dirty
static inline bool pmd_dirty(pmd_t pmd)
{
@@ -169,6 +202,13 @@ static inline int pud_young(pud_t pud)
return pud_flags(pud) & _PAGE_ACCESSED;
}
+static inline bool pud_shstk(pud_t pud)
+{
+ return cpu_feature_enabled(X86_FEATURE_SHSTK) &&
+ (pud_flags(pud) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) ==
+ (_PAGE_DIRTY | _PAGE_PSE);
+}
+
static inline int pte_write(pte_t pte)
{
/*
@@ -234,6 +274,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
return (pfn & pmd_pfn_mask(pmd)) >> PAGE_SHIFT;
}
+#define pud_pfn pud_pfn
static inline unsigned long pud_pfn(pud_t pud)
{
phys_addr_t pfn = pud_val(pud);
@@ -304,6 +345,30 @@ static inline int pud_devmap(pud_t pud)
}
#endif
+#ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
+static inline bool pmd_special(pmd_t pmd)
+{
+ return pmd_flags(pmd) & _PAGE_SPECIAL;
+}
+
+static inline pmd_t pmd_mkspecial(pmd_t pmd)
+{
+ return pmd_set_flags(pmd, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PMD_PFNMAP */
+
+#ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
+static inline bool pud_special(pud_t pud)
+{
+ return pud_flags(pud) & _PAGE_SPECIAL;
+}
+
+static inline pud_t pud_mkspecial(pud_t pud)
+{
+ return pud_set_flags(pud, _PAGE_SPECIAL);
+}
+#endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */
+
static inline int pgd_devmap(pgd_t pgd)
{
return 0;
@@ -387,23 +452,7 @@ static inline pte_t pte_wrprotect(pte_t pte)
#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
static inline int pte_uffd_wp(pte_t pte)
{
- bool wp = pte_flags(pte) & _PAGE_UFFD_WP;
-
-#ifdef CONFIG_DEBUG_VM
- /*
- * Having write bit for wr-protect-marked present ptes is fatal,
- * because it means the uffd-wp bit will be ignored and write will
- * just go through.
- *
- * Use any chance of pgtable walking to verify this (e.g., when
- * page swapped out or being migrated for all purposes). It means
- * something is already wrong. Tell the admin even before the
- * process crashes. We also nail it with wrong pgtable setup.
- */
- WARN_ON_ONCE(wp && pte_write(pte));
-#endif
-
- return wp;
+ return pte_flags(pte) & _PAGE_UFFD_WP;
}
static inline pte_t pte_mkuffd_wp(pte_t pte)
@@ -490,20 +539,6 @@ static inline pte_t pte_mkdevmap(pte_t pte)
return pte_set_flags(pte, _PAGE_SPECIAL|_PAGE_DEVMAP);
}
-static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v | set);
-}
-
-static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear)
-{
- pmdval_t v = native_pmd_val(pmd);
-
- return native_make_pmd(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pmd_t pmd_mksaveddirty(pmd_t pmd)
{
@@ -598,20 +633,6 @@ static inline pmd_t pmd_mkwrite_novma(pmd_t pmd)
pmd_t pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma);
#define pmd_mkwrite pmd_mkwrite
-static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v | set);
-}
-
-static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
-{
- pudval_t v = native_pud_val(pud);
-
- return native_make_pud(v & ~clear);
-}
-
/* See comments above mksaveddirty_shift() */
static inline pud_t pud_mksaveddirty(pud_t pud)
{
@@ -790,6 +811,12 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd)
__pgprot(pmd_flags(pmd) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
}
+static inline pud_t pud_mkinvalid(pud_t pud)
+{
+ return pfn_pud(pud_pfn(pud),
+ __pgprot(pud_flags(pud) & ~(_PAGE_PRESENT|_PAGE_PROTNONE)));
+}
+
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask);
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
@@ -837,14 +864,8 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
pmd_result = __pmd(val);
/*
- * To avoid creating Write=0,Dirty=1 PMDs, pte_modify() needs to avoid:
- * 1. Marking Write=0 PMDs Dirty=1
- * 2. Marking Dirty=1 PMDs Write=0
- *
- * The first case cannot happen because the _PAGE_CHG_MASK will filter
- * out any Dirty bit passed in newprot. Handle the second case by
- * going through the mksaveddirty exercise. Only do this if the old
- * value was Write=1 to avoid doing this on Shadow Stack PTEs.
+ * Avoid creating shadow stack PMD by accident. See comment in
+ * pte_modify().
*/
if (oldval & _PAGE_RW)
pmd_result = pmd_mksaveddirty(pmd_result);
@@ -854,6 +875,29 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pmd_result;
}
+static inline pud_t pud_modify(pud_t pud, pgprot_t newprot)
+{
+ pudval_t val = pud_val(pud), oldval = val;
+ pud_t pud_result;
+
+ val &= _HPAGE_CHG_MASK;
+ val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
+ val = flip_protnone_guard(oldval, val, PHYSICAL_PUD_PAGE_MASK);
+
+ pud_result = __pud(val);
+
+ /*
+ * Avoid creating shadow stack PUD by accident. See comment in
+ * pte_modify().
+ */
+ if (oldval & _PAGE_RW)
+ pud_result = pud_mksaveddirty(pud_result);
+ else
+ pud_result = pud_clear_saveddirty(pud_result);
+
+ return pud_result;
+}
+
/*
* mprotect needs to preserve PAT and encryption bits when updating
* vm_page_prot
@@ -929,7 +973,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
}
#endif /* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#ifdef CONFIG_X86_32
@@ -938,7 +982,7 @@ static inline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
# include <asm/pgtable_64.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/mm_types.h>
#include <linux/mmdebug.h>
#include <linux/log2.h>
@@ -1088,8 +1132,7 @@ static inline pmd_t *pud_pgtable(pud_t pud)
#define pud_leaf pud_leaf
static inline bool pud_leaf(pud_t pud)
{
- return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
- (_PAGE_PSE | _PAGE_PRESENT);
+ return pud_val(pud) & _PAGE_PSE;
}
static inline int pud_bad(pud_t pud)
@@ -1190,17 +1233,16 @@ static inline int pgd_none(pgd_t pgd)
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern int direct_gbpages;
void init_mem_mapping(void);
void early_alloc_pgt_buf(void);
-extern void memblock_find_dma_reserve(void);
void __init poking_init(void);
unsigned long init_memory_mapping(unsigned long start,
unsigned long end, pgprot_t prot);
@@ -1394,10 +1436,28 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
}
#endif
+#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
+static inline pud_t pudp_establish(struct vm_area_struct *vma,
+ unsigned long address, pud_t *pudp, pud_t pud)
+{
+ page_table_check_pud_set(vma->vm_mm, pudp, pud);
+ if (IS_ENABLED(CONFIG_SMP)) {
+ return xchg(pudp, pud);
+ } else {
+ pud_t old = *pudp;
+ WRITE_ONCE(*pudp, pud);
+ return old;
+ }
+}
+#endif
+
#define __HAVE_ARCH_PMDP_INVALIDATE_AD
extern pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp);
+pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address,
+ pud_t *pudp);
+
/*
* Page table pages are page-aligned. The lower half of the top
* level is used for userspace and the top half for the kernel.
@@ -1679,6 +1739,9 @@ void arch_check_zapped_pte(struct vm_area_struct *vma, pte_t pte);
#define arch_check_zapped_pmd arch_check_zapped_pmd
void arch_check_zapped_pmd(struct vm_area_struct *vma, pmd_t pmd);
+#define arch_check_zapped_pud arch_check_zapped_pud
+void arch_check_zapped_pud(struct vm_area_struct *vma, pud_t pud);
+
#ifdef CONFIG_XEN_PV
#define arch_has_hw_nonleaf_pmd_young arch_has_hw_nonleaf_pmd_young
static inline bool arch_has_hw_nonleaf_pmd_young(void)
@@ -1712,6 +1775,43 @@ bool arch_is_platform_page(u64 paddr);
#define arch_is_platform_page arch_is_platform_page
#endif
-#endif /* __ASSEMBLY__ */
+/*
+ * Use set_p*_safe(), and elide TLB flushing, when confident that *no*
+ * TLB flush will be required as a result of the "set". For example, use
+ * in scenarios where it is known ahead of time that the routine is
+ * setting non-present entries, or re-setting an existing entry to the
+ * same value. Otherwise, use the typical "set" helpers and flush the
+ * TLB.
+ */
+#define set_pte_safe(ptep, pte) \
+({ \
+ WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
+ set_pte(ptep, pte); \
+})
+
+#define set_pmd_safe(pmdp, pmd) \
+({ \
+ WARN_ON_ONCE(pmd_present(*pmdp) && !pmd_same(*pmdp, pmd)); \
+ set_pmd(pmdp, pmd); \
+})
+
+#define set_pud_safe(pudp, pud) \
+({ \
+ WARN_ON_ONCE(pud_present(*pudp) && !pud_same(*pudp, pud)); \
+ set_pud(pudp, pud); \
+})
+
+#define set_p4d_safe(p4dp, p4d) \
+({ \
+ WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \
+ set_p4d(p4dp, p4d); \
+})
+
+#define set_pgd_safe(pgdp, pgd) \
+({ \
+ WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \
+ set_pgd(pgdp, pgd); \
+})
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_H */
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 7d4ad8907297..b612cc57a4d3 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -13,7 +13,7 @@
* This file contains the functions and defines necessary to modify and use
* the i386 page table tree.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/processor.h>
#include <linux/threads.h>
#include <asm/paravirt.h>
@@ -45,7 +45,7 @@ do { \
flush_tlb_one_kernel((vaddr)); \
} while (0)
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* This is used to calculate the .brk reservation for initial pagetables.
diff --git a/arch/x86/include/asm/pgtable_32_areas.h b/arch/x86/include/asm/pgtable_32_areas.h
index b6355416a15a..921148b42967 100644
--- a/arch/x86/include/asm/pgtable_32_areas.h
+++ b/arch/x86/include/asm/pgtable_32_areas.h
@@ -13,7 +13,7 @@
*/
#define VMALLOC_OFFSET (8 * 1024 * 1024)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern bool __vmalloc_start_set; /* set once high_memory is set */
#endif
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 7e9db77231ac..b89f8f1194a9 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -5,7 +5,7 @@
#include <linux/const.h>
#include <asm/pgtable_64_types.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* This file contains the functions and defines necessary to modify and use
@@ -270,5 +270,26 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
#include <asm/pgtable-invert.h>
-#endif /* !__ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
+
+#define l4_index(x) (((x) >> 39) & 511)
+#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+
+L4_PAGE_OFFSET = l4_index(__PAGE_OFFSET_BASE_L4)
+L4_START_KERNEL = l4_index(__START_KERNEL_map)
+
+L3_START_KERNEL = pud_index(__START_KERNEL_map)
+
+#define SYM_DATA_START_PAGE_ALIGNED(name) \
+ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
+
+/* Automate the creation of 1 to 1 mapping pmd entries */
+#define PMDS(START, PERM, COUNT) \
+ i = 0 ; \
+ .rept (COUNT) ; \
+ .quad (START) + (i << PMD_SHIFT) + (PERM) ; \
+ i = i + 1 ; \
+ .endr
+
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_64_H */
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 9053dfe9fa03..5bb782d856f2 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -4,7 +4,7 @@
#include <asm/sparsemem.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/kaslr.h>
@@ -44,7 +44,7 @@ static inline bool pgtable_l5_enabled(void)
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#define SHARED_KERNEL_PMD 0
@@ -140,6 +140,10 @@ extern unsigned int ptrs_per_p4d;
# define VMEMMAP_START __VMEMMAP_BASE_L4
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
+#ifdef CONFIG_RANDOMIZE_MEMORY
+# define DIRECT_MAP_PHYSMEM_END direct_map_physmem_end
+#endif
+
/*
* End of the region for which vmalloc page tables are pre-allocated.
* For non-KMSAN builds, this is the same as VMALLOC_END.
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0b748ee16b3d..b74ec5c3643b 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -33,13 +33,16 @@
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_SOFTW1
#define _PAGE_BIT_UFFD_WP _PAGE_BIT_SOFTW2 /* userfaultfd wrprotected */
#define _PAGE_BIT_SOFT_DIRTY _PAGE_BIT_SOFTW3 /* software dirty tracking */
+#define _PAGE_BIT_KERNEL_4K _PAGE_BIT_SOFTW3 /* page must not be converted to large */
#define _PAGE_BIT_DEVMAP _PAGE_BIT_SOFTW4
#ifdef CONFIG_X86_64
-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW5 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW5 /* No PTI shadow (root PGD) */
#else
/* Shared with _PAGE_BIT_UFFD_WP which is not supported on 32 bit */
-#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit */
+#define _PAGE_BIT_SAVED_DIRTY _PAGE_BIT_SOFTW2 /* Saved Dirty bit (leaf) */
+#define _PAGE_BIT_NOPTISHADOW _PAGE_BIT_SOFTW2 /* No PTI shadow (root PGD) */
#endif
/* If _PAGE_BIT_PRESENT is clear, we use these: */
@@ -62,6 +65,7 @@
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+#define _PAGE_KERNEL_4K (_AT(pteval_t, 1) << _PAGE_BIT_KERNEL_4K)
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
#define _PAGE_PKEY_BIT0 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT0)
#define _PAGE_PKEY_BIT1 (_AT(pteval_t, 1) << _PAGE_BIT_PKEY_BIT1)
@@ -139,6 +143,8 @@
#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+#define _PAGE_NOPTISHADOW (_AT(pteval_t, 1) << _PAGE_BIT_NOPTISHADOW)
+
/*
* Set of bits not changed in pte_modify. The pte's
* protection key is treated like _PAGE_RW, for
@@ -148,7 +154,7 @@
#define _COMMON_PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \
_PAGE_SPECIAL | _PAGE_ACCESSED | \
_PAGE_DIRTY_BITS | _PAGE_SOFT_DIRTY | \
- _PAGE_DEVMAP | _PAGE_ENC | _PAGE_UFFD_WP)
+ _PAGE_DEVMAP | _PAGE_CC | _PAGE_UFFD_WP)
#define _PAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PAT)
#define _HPAGE_CHG_MASK (_COMMON_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_PAT_LARGE)
@@ -160,7 +166,7 @@
* to have the WB mode at index 0 (all bits clear). This is the default
* right now and likely would break too much if changed.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
enum page_cache_mode {
_PAGE_CACHE_MODE_WB = 0,
_PAGE_CACHE_MODE_WC = 1,
@@ -173,6 +179,7 @@ enum page_cache_mode {
};
#endif
+#define _PAGE_CC (_AT(pteval_t, cc_get_mask()))
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define _PAGE_CACHE_MASK (_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)
@@ -234,7 +241,7 @@ enum page_cache_mode {
#define __PAGE_KERNEL_IO_NOCACHE __PAGE_KERNEL_NOCACHE
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _ENC)
#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _ENC)
@@ -257,7 +264,7 @@ enum page_cache_mode {
#define PAGE_KERNEL_IO __pgprot_mask(__PAGE_KERNEL_IO)
#define PAGE_KERNEL_IO_NOCACHE __pgprot_mask(__PAGE_KERNEL_IO_NOCACHE)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/*
* early identity mapping pte attrib macros.
@@ -276,7 +283,7 @@ enum page_cache_mode {
# include <asm/pgtable_64_types.h>
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -516,8 +523,6 @@ typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern pteval_t __default_kernel_pte_mask;
-extern void set_nx(void);
-extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
@@ -548,6 +553,7 @@ enum pg_level {
PG_LEVEL_2M,
PG_LEVEL_1G,
PG_LEVEL_512G,
+ PG_LEVEL_256T,
PG_LEVEL_NUM
};
@@ -566,6 +572,8 @@ static inline void update_page_count(int level, unsigned long pages) { }
extern pte_t *lookup_address(unsigned long address, unsigned int *level);
extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
unsigned int *level);
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw);
extern pmd_t *lookup_pmd_address(unsigned long address);
extern phys_addr_t slow_virt_to_phys(void *__address);
extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
@@ -574,6 +582,6 @@ extern int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn,
unsigned long page_flags);
extern int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
unsigned long numpages);
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PGTABLE_DEFS_H */
diff --git a/arch/x86/include/asm/posted_intr.h b/arch/x86/include/asm/posted_intr.h
new file mode 100644
index 000000000000..de788b400fba
--- /dev/null
+++ b/arch/x86/include/asm/posted_intr.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_POSTED_INTR_H
+#define _X86_POSTED_INTR_H
+#include <asm/irq_vectors.h>
+
+#define POSTED_INTR_ON 0
+#define POSTED_INTR_SN 1
+
+#define PID_TABLE_ENTRY_VALID 1
+
+/* Posted-Interrupt Descriptor */
+struct pi_desc {
+ union {
+ u32 pir[8]; /* Posted interrupt requested */
+ u64 pir64[4];
+ };
+ union {
+ struct {
+ u16 notifications; /* Suppress and outstanding bits */
+ u8 nv;
+ u8 rsvd_2;
+ u32 ndst;
+ };
+ u64 control;
+ };
+ u32 rsvd[6];
+} __aligned(64);
+
+static inline bool pi_test_and_set_on(struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_on(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_clear_sn(struct pi_desc *pi_desc)
+{
+ return test_and_clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
+{
+ return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
+}
+
+static inline bool pi_is_pir_empty(struct pi_desc *pi_desc)
+{
+ return bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS);
+}
+
+static inline void pi_set_sn(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_set_on(struct pi_desc *pi_desc)
+{
+ set_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline void pi_clear_sn(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_on(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_ON, (unsigned long *)&pi_desc->control);
+}
+
+static inline bool pi_test_sn(struct pi_desc *pi_desc)
+{
+ return test_bit(POSTED_INTR_SN, (unsigned long *)&pi_desc->control);
+}
+
+/* Non-atomic helpers */
+static inline void __pi_set_sn(struct pi_desc *pi_desc)
+{
+ pi_desc->notifications |= BIT(POSTED_INTR_SN);
+}
+
+static inline void __pi_clear_sn(struct pi_desc *pi_desc)
+{
+ pi_desc->notifications &= ~BIT(POSTED_INTR_SN);
+}
+
+#ifdef CONFIG_X86_POSTED_MSI
+/*
+ * Not all external vectors are subject to interrupt remapping, e.g. IOMMU's
+ * own interrupts. Here we do not distinguish them since those vector bits in
+ * PIR will always be zero.
+ */
+static inline bool pi_pending_this_cpu(unsigned int vector)
+{
+ struct pi_desc *pid = this_cpu_ptr(&posted_msi_pi_desc);
+
+ if (WARN_ON_ONCE(vector > NR_VECTORS || vector < FIRST_EXTERNAL_VECTOR))
+ return false;
+
+ return test_bit(vector, (unsigned long *)pid->pir);
+}
+
+extern void intel_posted_msi_init(void);
+#else
+static inline bool pi_pending_this_cpu(unsigned int vector) { return false; }
+
+static inline void intel_posted_msi_init(void) {};
+#endif /* X86_POSTED_MSI */
+
+#endif /* _X86_POSTED_INTR_H */
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 919909d8cb77..578441db09f0 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -4,10 +4,11 @@
#include <asm/rmwcc.h>
#include <asm/percpu.h>
-#include <asm/current.h>
#include <linux/static_call_types.h>
+DECLARE_PER_CPU_CACHE_HOT(int, __preempt_count);
+
/* We use the MSB mostly because its available */
#define PREEMPT_NEED_RESCHED 0x80000000
@@ -23,18 +24,18 @@
*/
static __always_inline int preempt_count(void)
{
- return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED;
+ return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
}
static __always_inline void preempt_count_set(int pc)
{
int old, new;
- old = raw_cpu_read_4(pcpu_hot.preempt_count);
+ old = raw_cpu_read_4(__preempt_count);
do {
new = (old & PREEMPT_NEED_RESCHED) |
(pc & ~PREEMPT_NEED_RESCHED);
- } while (!raw_cpu_try_cmpxchg_4(pcpu_hot.preempt_count, &old, new));
+ } while (!raw_cpu_try_cmpxchg_4(__preempt_count, &old, new));
}
/*
@@ -43,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
#define init_task_preempt_count(p) do { } while (0)
#define init_idle_preempt_count(p, cpu) do { \
- per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \
+ per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
} while (0)
/*
@@ -57,17 +58,17 @@ static __always_inline void preempt_count_set(int pc)
static __always_inline void set_preempt_need_resched(void)
{
- raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED);
+ raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
}
static __always_inline void clear_preempt_need_resched(void)
{
- raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED);
+ raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
}
static __always_inline bool test_preempt_need_resched(void)
{
- return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED);
+ return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
}
/*
@@ -76,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void)
static __always_inline void __preempt_count_add(int val)
{
- raw_cpu_add_4(pcpu_hot.preempt_count, val);
+ raw_cpu_add_4(__preempt_count, val);
}
static __always_inline void __preempt_count_sub(int val)
{
- raw_cpu_add_4(pcpu_hot.preempt_count, -val);
+ raw_cpu_add_4(__preempt_count, -val);
}
/*
@@ -91,7 +92,7 @@ static __always_inline void __preempt_count_sub(int val)
*/
static __always_inline bool __preempt_count_dec_and_test(void)
{
- return GEN_UNARY_RMWcc("decl", __my_cpu_var(pcpu_hot.preempt_count), e,
+ return GEN_UNARY_RMWcc("decl", __my_cpu_var(__preempt_count), e,
__percpu_arg([var]));
}
@@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
*/
static __always_inline bool should_resched(int preempt_offset)
{
- return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
+ return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
}
#ifdef CONFIG_PREEMPTION
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 811548f131f4..5d2f7e5aff26 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -60,18 +60,13 @@ struct vm86;
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
-enum tlb_infos {
- ENTRIES,
- NR_INFO
-};
-
-extern u16 __read_mostly tlb_lli_4k[NR_INFO];
-extern u16 __read_mostly tlb_lli_2m[NR_INFO];
-extern u16 __read_mostly tlb_lli_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4k[NR_INFO];
-extern u16 __read_mostly tlb_lld_2m[NR_INFO];
-extern u16 __read_mostly tlb_lld_4m[NR_INFO];
-extern u16 __read_mostly tlb_lld_1g[NR_INFO];
+extern u16 __read_mostly tlb_lli_4k;
+extern u16 __read_mostly tlb_lli_2m;
+extern u16 __read_mostly tlb_lli_4m;
+extern u16 __read_mostly tlb_lld_4k;
+extern u16 __read_mostly tlb_lld_2m;
+extern u16 __read_mostly tlb_lld_4m;
+extern u16 __read_mostly tlb_lld_1g;
/*
* CPU type and hardware bug flags. Kept separately for each CPU.
@@ -98,6 +93,7 @@ struct cpuinfo_topology {
// Logical ID mappings
u32 logical_pkg_id;
u32 logical_die_id;
+ u32 logical_core_id;
// AMD Node ID and Nodes per Package info
u32 amd_node_id;
@@ -105,12 +101,44 @@ struct cpuinfo_topology {
// Cache level topology IDs
u32 llc_id;
u32 l2c_id;
+
+ // Hardware defined CPU-type
+ union {
+ u32 cpu_type;
+ struct {
+ // CPUID.1A.EAX[23-0]
+ u32 intel_native_model_id :24;
+ // CPUID.1A.EAX[31-24]
+ u32 intel_type :8;
+ };
+ struct {
+ // CPUID 0x80000026.EBX
+ u32 amd_num_processors :16,
+ amd_power_eff_ranking :8,
+ amd_native_model_id :4,
+ amd_type :4;
+ };
+ };
};
struct cpuinfo_x86 {
- __u8 x86; /* CPU family */
- __u8 x86_vendor; /* CPU vendor */
- __u8 x86_model;
+ union {
+ /*
+ * The particular ordering (low-to-high) of (vendor,
+ * family, model) is done in case range of models, like
+ * it is usually done on AMD, need to be compared.
+ */
+ struct {
+ __u8 x86_model;
+ /* CPU family */
+ __u8 x86;
+ /* CPU vendor */
+ __u8 x86_vendor;
+ __u8 x86_reserved;
+ };
+ /* combined vendor, family, model */
+ __u32 x86_vfm;
+ };
__u8 x86_stepping;
#ifdef CONFIG_X86_64
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
@@ -198,8 +226,10 @@ static inline unsigned long long l1tf_pfn_limit(void)
return BIT_ULL(boot_cpu_data.x86_cache_bits - 1 - PAGE_SHIFT);
}
+void init_cpu_devs(void);
+void get_cpu_vendor(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
-extern void identify_secondary_cpu(struct cpuinfo_x86 *);
+extern void identify_secondary_cpu(unsigned int cpu);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
@@ -385,37 +415,33 @@ struct irq_stack {
char stack[IRQ_STACK_SIZE];
} __aligned(IRQ_STACK_SIZE);
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr);
#ifdef CONFIG_X86_64
-struct fixed_percpu_data {
- /*
- * GCC hardcodes the stack canary as %gs:40. Since the
- * irq_stack is the object at %gs:0, we reserve the bottom
- * 48 bytes of the irq stack for the canary.
- *
- * Once we are willing to require -mstack-protector-guard-symbol=
- * support for x86_64 stackprotector, we can get rid of this.
- */
- char gs_base[40];
- unsigned long stack_canary;
-};
+DECLARE_PER_CPU_CACHE_HOT(bool, hardirq_stack_inuse);
+#else
+DECLARE_PER_CPU_CACHE_HOT(struct irq_stack *, softirq_stack_ptr);
+#endif
-DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
-DECLARE_INIT_PER_CPU(fixed_percpu_data);
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, cpu_current_top_of_stack);
+/* const-qualified alias provided by the linker. */
+DECLARE_PER_CPU_CACHE_HOT(const unsigned long __percpu_seg_override,
+ const_cpu_current_top_of_stack);
+#ifdef CONFIG_X86_64
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
+#ifdef CONFIG_SMP
+ return per_cpu_offset(cpu);
+#else
+ return 0;
+#endif
}
extern asmlinkage void entry_SYSCALL32_ignore(void);
/* Save actual FS/GS selectors and bases to current->thread */
void current_save_fsgs(void);
-#else /* X86_64 */
-#ifdef CONFIG_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
-#endif
-#endif /* !X86_64 */
+#endif /* X86_64 */
struct perf_event;
@@ -472,7 +498,6 @@ struct thread_struct {
unsigned long iopl_emul;
unsigned int iopl_warn:1;
- unsigned int sig_on_uaccess_err:1;
/*
* Protection Keys Register for Userspace. Loaded immediately on
@@ -527,9 +552,9 @@ static __always_inline unsigned long current_top_of_stack(void)
* entry trampoline.
*/
if (IS_ENABLED(CONFIG_USE_X86_SEG_SUPPORT))
- return this_cpu_read_const(const_pcpu_hot.top_of_stack);
+ return this_cpu_read_const(const_cpu_current_top_of_stack);
- return this_cpu_read_stable(pcpu_hot.top_of_stack);
+ return this_cpu_read_stable(cpu_current_top_of_stack);
}
static __always_inline bool on_thread_stack(void)
@@ -569,7 +594,8 @@ extern void switch_gdt_and_percpu_base(int);
extern void load_direct_gdt(int);
extern void load_fixmap_gdt(int);
extern void cpu_init(void);
-extern void cpu_init_exception_handling(void);
+extern void cpu_init_exception_handling(bool boot_cpu);
+extern void cpu_init_replace_early_idt(void);
extern void cr4_init(void);
extern void set_task_blockstep(struct task_struct *task, bool on);
@@ -587,7 +613,7 @@ extern char ignore_fpu_irq;
# define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH
#else
-# define BASE_PREFETCH "prefetcht0 %P1"
+# define BASE_PREFETCH "prefetcht0 %1"
#endif
/*
@@ -598,7 +624,7 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchnta %P1",
+ alternative_input(BASE_PREFETCH, "prefetchnta %1",
X86_FEATURE_XMM,
"m" (*(const char *)x));
}
@@ -610,7 +636,7 @@ static inline void prefetch(const void *x)
*/
static __always_inline void prefetchw(const void *x)
{
- alternative_input(BASE_PREFETCH, "prefetchw %P1",
+ alternative_input(BASE_PREFETCH, "prefetchw %1",
X86_FEATURE_3DNOWPREFETCH,
"m" (*(const char *)x));
}
@@ -633,19 +659,13 @@ static __always_inline void prefetchw(const void *x)
.sysenter_cs = __KERNEL_CS, \
}
-#define KSTK_ESP(task) (task_pt_regs(task)->sp)
-
#else
-extern unsigned long __end_init_task[];
+extern unsigned long __top_init_kernel_stack[];
#define INIT_THREAD { \
- .sp = (unsigned long)&__end_init_task - \
- TOP_OF_KERNEL_STACK_PADDING - \
- sizeof(struct pt_regs), \
+ .sp = (unsigned long)&__top_init_kernel_stack, \
}
-extern unsigned long KSTK_ESP(struct task_struct *task);
-
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
@@ -659,6 +679,7 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE_LOW)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
+#define KSTK_ESP(task) (task_pt_regs(task)->sp)
/* Get/set a process' ability to use the timestamp counter instruction */
#define GET_TSC_CTL(adr) get_tsc_mode((adr))
@@ -680,11 +701,18 @@ static inline u32 per_cpu_l2c_id(unsigned int cpu)
}
#ifdef CONFIG_CPU_SUP_AMD
-extern u32 amd_get_highest_perf(void);
-extern void amd_clear_divider(void);
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+static __always_inline void amd_clear_divider(void)
+{
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
+}
+
extern void amd_check_microcode(void);
#else
-static inline u32 amd_get_highest_perf(void) { return 0; }
static inline void amd_clear_divider(void) { }
static inline void amd_check_microcode(void) { }
#endif
@@ -717,6 +745,7 @@ extern enum l1tf_mitigations l1tf_mitigation;
enum mds_mitigations {
MDS_MITIGATION_OFF,
+ MDS_MITIGATION_AUTO,
MDS_MITIGATION_FULL,
MDS_MITIGATION_VMWERV,
};
diff --git a/arch/x86/include/asm/prom.h b/arch/x86/include/asm/prom.h
index 043758a2e627..5d0dbab85264 100644
--- a/arch/x86/include/asm/prom.h
+++ b/arch/x86/include/asm/prom.h
@@ -8,7 +8,7 @@
#ifndef _ASM_X86_PROM_H
#define _ASM_X86_PROM_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/of.h>
#include <linux/types.h>
@@ -23,20 +23,15 @@ extern int of_ioapic;
extern u64 initial_dtb;
extern void add_dtb(u64 data);
void x86_of_pci_init(void);
-void x86_dtb_parse_smp_config(void);
+void x86_flattree_get_config(void);
#else
static inline void add_dtb(u64 data) { }
static inline void x86_of_pci_init(void) { }
-static inline void x86_dtb_parse_smp_config(void) { }
+static inline void x86_flattree_get_config(void) { }
#define of_ioapic 0
#endif
-#ifdef CONFIG_OF_EARLY_FLATTREE
-void x86_flattree_get_config(void);
-#else
-static inline void x86_flattree_get_config(void) { }
-#endif
extern char cmd_line[COMMAND_LINE_SIZE];
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 484f4f0131a5..05224a695872 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -15,7 +15,6 @@ void entry_SYSCALL_64(void);
void entry_SYSCALL_64_safe_stack(void);
void entry_SYSRETQ_unsafe_stack(void);
void entry_SYSRETQ_end(void);
-long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
#ifdef CONFIG_X86_32
@@ -41,6 +40,6 @@ void x86_configure_nx(void);
extern int reboot_force;
-long do_arch_prctl_common(int option, unsigned long arg2);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/pti.h b/arch/x86/include/asm/pti.h
index ab167c96b9ab..88d0a1ab1f77 100644
--- a/arch/x86/include/asm/pti.h
+++ b/arch/x86/include/asm/pti.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PTI_H
#define _ASM_X86_PTI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION
extern void pti_init(void);
@@ -11,5 +11,5 @@ extern void pti_finalize(void);
static inline void pti_check_boottime_disable(void) { }
#endif
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PTI_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 5a83fbd9bc0b..50f75467f73d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -6,7 +6,7 @@
#include <asm/page_types.h>
#include <uapi/asm/ptrace.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef __i386__
struct pt_regs {
@@ -469,5 +469,5 @@ extern int do_set_thread_area(struct task_struct *p, int idx,
# define do_set_thread_area_64(p, s, t) (0)
#endif
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
index 5528e9325049..2fee5e9f1ccc 100644
--- a/arch/x86/include/asm/purgatory.h
+++ b/arch/x86/include/asm/purgatory.h
@@ -2,10 +2,10 @@
#ifndef _ASM_X86_PURGATORY_H
#define _ASM_X86_PURGATORY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/purgatory.h>
extern void purgatory(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 1436226efe3e..b9fece5fc96d 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_PVCLOCK_ABI_H
#define _ASM_X86_PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* These structs MUST NOT be changed.
@@ -44,5 +44,5 @@ struct pvclock_wall_clock {
#define PVCLOCK_GUEST_STOPPED (1 << 1)
/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 0c92db84469d..6e4f8fae3ce9 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -5,6 +5,7 @@
#include <asm/clocksource.h>
#include <asm/pvclock-abi.h>
+struct timespec64;
/* some helper functions for xen and kvm pv clock sources */
u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src);
diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index cde8357bb226..68da67df304d 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -66,13 +66,15 @@ static inline bool vcpu_is_preempted(long cpu)
#ifdef CONFIG_PARAVIRT
/*
- * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack.
+ * virt_spin_lock_key - disables by default the virt_spin_lock() hijack.
*
- * Native (and PV wanting native due to vCPU pinning) should disable this key.
- * It is done in this backwards fashion to only have a single direction change,
- * which removes ordering between native_pv_spin_init() and HV setup.
+ * Native (and PV wanting native due to vCPU pinning) should keep this key
+ * disabled. Native does not touch the key.
+ *
+ * When in a guest then native_pv_lock_init() enables the key first and
+ * KVM/XEN might conditionally disable it later in the boot process again.
*/
-DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
/*
* Shortcut for the queued_spin_lock_slowpath() function that allows
@@ -85,6 +87,8 @@ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key);
#define virt_spin_lock virt_spin_lock
static inline bool virt_spin_lock(struct qspinlock *lock)
{
+ int val;
+
if (!static_branch_likely(&virt_spin_lock_key))
return false;
@@ -94,10 +98,13 @@ static inline bool virt_spin_lock(struct qspinlock *lock)
* horrible lock 'holder' preemption issues.
*/
- do {
- while (atomic_read(&lock->val) != 0)
- cpu_relax();
- } while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0);
+ __retry:
+ val = atomic_read(&lock->val);
+
+ if (val || !atomic_try_cmpxchg(&lock->val, &val, _Q_LOCKED_VAL)) {
+ cpu_relax();
+ goto __retry;
+ }
return true;
}
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index ef9697f20129..0a985784be9b 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -25,9 +25,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
*
* void __lockfunc __pv_queued_spin_unlock(struct qspinlock *lock)
* {
- * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0);
+ * u8 lockval = _Q_LOCKED_VAL;
*
- * if (likely(lockval == _Q_LOCKED_VAL))
+ * if (try_cmpxchg(&lock->locked, &lockval, 0))
* return;
* pv_queued_spin_unlock_slowpath(lock, lockval);
* }
@@ -40,10 +40,9 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
#define PV_UNLOCK_ASM \
FRAME_BEGIN \
"push %rdx\n\t" \
- "mov $0x1,%eax\n\t" \
+ "mov $" __stringify(_Q_LOCKED_VAL) ",%eax\n\t" \
"xor %edx,%edx\n\t" \
LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \
- "cmp $0x1,%al\n\t" \
"jne .slowpath\n\t" \
"pop %rdx\n\t" \
FRAME_END \
diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h
index 87e5482acd0d..f607081a022a 100644
--- a/arch/x86/include/asm/realmode.h
+++ b/arch/x86/include/asm/realmode.h
@@ -9,7 +9,7 @@
#define TH_FLAGS_SME_ACTIVE_BIT 0
#define TH_FLAGS_SME_ACTIVE BIT(TH_FLAGS_SME_ACTIVE_BIT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <asm/io.h>
@@ -95,6 +95,6 @@ void reserve_real_mode(void);
void load_trampoline_pgtable(void);
void init_real_mode(void);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ARCH_X86_REALMODE_H */
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h
index 6536873f8fc0..ecd58ea9a837 100644
--- a/arch/x86/include/asm/reboot.h
+++ b/arch/x86/include/asm/reboot.h
@@ -25,14 +25,16 @@ void __noreturn machine_real_restart(unsigned int type);
#define MRR_BIOS 0
#define MRR_APM 1
-#if IS_ENABLED(CONFIG_KVM_INTEL) || IS_ENABLED(CONFIG_KVM_AMD)
typedef void (cpu_emergency_virt_cb)(void);
+#if IS_ENABLED(CONFIG_KVM_X86)
void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback);
void cpu_emergency_disable_virtualization(void);
#else
+static inline void cpu_emergency_register_virt_callback(cpu_emergency_virt_cb *callback) {}
+static inline void cpu_emergency_unregister_virt_callback(cpu_emergency_virt_cb *callback) {}
static inline void cpu_emergency_disable_virtualization(void) {}
-#endif /* CONFIG_KVM_INTEL || CONFIG_KVM_AMD */
+#endif /* CONFIG_KVM_X86 */
typedef void (*nmi_shootdown_cb)(int, struct pt_regs*);
void nmi_shootdown_cpus(nmi_shootdown_cb callback);
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
deleted file mode 100644
index e9187ddd3d1f..000000000000
--- a/arch/x86/include/asm/required-features.h
+++ /dev/null
@@ -1,105 +0,0 @@
-#ifndef _ASM_X86_REQUIRED_FEATURES_H
-#define _ASM_X86_REQUIRED_FEATURES_H
-
-/* Define minimum CPUID feature set for kernel These bits are checked
- really early to actually display a visible error message before the
- kernel dies. Make sure to assign features to the proper mask!
-
- Some requirements that are not in CPUID yet are also in the
- CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
-
- The real information is in arch/x86/Kconfig.cpu, this just converts
- the CONFIGs into a bitmask */
-
-#ifndef CONFIG_MATH_EMULATION
-# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
-#else
-# define NEED_FPU 0
-#endif
-
-#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
-# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
-#else
-# define NEED_PAE 0
-#endif
-
-#ifdef CONFIG_X86_CMPXCHG64
-# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
-#else
-# define NEED_CX8 0
-#endif
-
-#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
-# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
-#else
-# define NEED_CMOV 0
-#endif
-
-# define NEED_3DNOW 0
-
-#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
-# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
-#else
-# define NEED_NOPL 0
-#endif
-
-#ifdef CONFIG_MATOM
-# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
-#else
-# define NEED_MOVBE 0
-#endif
-
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT_XXL
-/* Paravirtualized systems may not have PSE or PGE available */
-#define NEED_PSE 0
-#define NEED_PGE 0
-#else
-#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
-#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
-#endif
-#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
-#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
-#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
-#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
-#define NEED_LM (1<<(X86_FEATURE_LM & 31))
-#else
-#define NEED_PSE 0
-#define NEED_MSR 0
-#define NEED_PGE 0
-#define NEED_FXSR 0
-#define NEED_XMM 0
-#define NEED_XMM2 0
-#define NEED_LM 0
-#endif
-
-#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
- NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
- NEED_XMM|NEED_XMM2)
-#define SSE_MASK (NEED_XMM|NEED_XMM2)
-
-#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
-
-#define REQUIRED_MASK2 0
-#define REQUIRED_MASK3 (NEED_NOPL)
-#define REQUIRED_MASK4 (NEED_MOVBE)
-#define REQUIRED_MASK5 0
-#define REQUIRED_MASK6 0
-#define REQUIRED_MASK7 0
-#define REQUIRED_MASK8 0
-#define REQUIRED_MASK9 0
-#define REQUIRED_MASK10 0
-#define REQUIRED_MASK11 0
-#define REQUIRED_MASK12 0
-#define REQUIRED_MASK13 0
-#define REQUIRED_MASK14 0
-#define REQUIRED_MASK15 0
-#define REQUIRED_MASK16 0
-#define REQUIRED_MASK17 0
-#define REQUIRED_MASK18 0
-#define REQUIRED_MASK19 0
-#define REQUIRED_MASK20 0
-#define REQUIRED_MASK21 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
-
-#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index 12dbd2588ca7..011bf67a1866 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -4,8 +4,10 @@
#ifdef CONFIG_X86_CPU_RESCTRL
-#include <linux/sched.h>
#include <linux/jump_label.h>
+#include <linux/percpu.h>
+#include <linux/resctrl_types.h>
+#include <linux/sched.h>
/*
* This value can never be a valid CLOSID, and is used when mapping a
@@ -40,6 +42,7 @@ DECLARE_PER_CPU(struct resctrl_pqr_state, pqr_state);
extern bool rdt_alloc_capable;
extern bool rdt_mon_capable;
+extern unsigned int rdt_mon_features;
DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
@@ -79,6 +82,21 @@ static inline void resctrl_arch_disable_mon(void)
static_branch_dec_cpuslocked(&rdt_enable_key);
}
+static inline bool resctrl_arch_is_llc_occupancy_enabled(void)
+{
+ return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
+}
+
+static inline bool resctrl_arch_is_mbm_total_enabled(void)
+{
+ return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
+}
+
+static inline bool resctrl_arch_is_mbm_local_enabled(void)
+{
+ return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
+}
+
/*
* __resctrl_sched_in() - Writes the task's CLOSid/RMID to IA32_PQR_MSR
*
@@ -96,8 +114,8 @@ static inline void resctrl_arch_disable_mon(void)
static inline void __resctrl_sched_in(struct task_struct *tsk)
{
struct resctrl_pqr_state *state = this_cpu_ptr(&pqr_state);
- u32 closid = state->default_closid;
- u32 rmid = state->default_rmid;
+ u32 closid = READ_ONCE(state->default_closid);
+ u32 rmid = READ_ONCE(state->default_rmid);
u32 tmp;
/*
@@ -132,6 +150,13 @@ static inline unsigned int resctrl_arch_round_mon_val(unsigned int val)
return val * scale;
}
+static inline void resctrl_arch_set_cpu_default_closid_rmid(int cpu, u32 closid,
+ u32 rmid)
+{
+ WRITE_ONCE(per_cpu(pqr_state.default_closid, cpu), closid);
+ WRITE_ONCE(per_cpu(pqr_state.default_rmid, cpu), rmid);
+}
+
static inline void resctrl_arch_set_closid_rmid(struct task_struct *tsk,
u32 closid, u32 rmid)
{
@@ -156,12 +181,6 @@ static inline void resctrl_sched_in(struct task_struct *tsk)
__resctrl_sched_in(tsk);
}
-static inline u32 resctrl_arch_system_num_rmid_idx(void)
-{
- /* RMID are independent numbers for x86. num_rmid_idx == num_rmid */
- return boot_cpu_data.x86_cache_max_rmid + 1;
-}
-
static inline void resctrl_arch_rmid_idx_decode(u32 idx, u32 *closid, u32 *rmid)
{
*rmid = idx;
@@ -184,6 +203,11 @@ static inline void *resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid
static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
void *ctx) { };
+u64 resctrl_arch_get_prefetch_disable_bits(void);
+int resctrl_arch_pseudo_lock_fn(void *_plr);
+int resctrl_arch_measure_cycles_lat_fn(void *_plr);
+int resctrl_arch_measure_l2_residency(void *_plr);
+int resctrl_arch_measure_l3_residency(void *_plr);
void resctrl_cpu_detect(struct cpuinfo_x86 *c);
#else
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 363266cbcada..3821ee3fae35 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -29,7 +29,7 @@ cc_label: c = true; \
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c; \
- asm volatile (fullop CC_SET(cc) \
+ asm_inline volatile (fullop CC_SET(cc) \
: [var] "+m" (_var), CC_OUT(cc) (c) \
: __VA_ARGS__ : clobbers); \
c; \
diff --git a/arch/x86/include/asm/rqspinlock.h b/arch/x86/include/asm/rqspinlock.h
new file mode 100644
index 000000000000..24a885449ee6
--- /dev/null
+++ b/arch/x86/include/asm/rqspinlock.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_RQSPINLOCK_H
+#define _ASM_X86_RQSPINLOCK_H
+
+#include <asm/paravirt.h>
+
+#ifdef CONFIG_PARAVIRT
+DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key);
+
+#define resilient_virt_spin_lock_enabled resilient_virt_spin_lock_enabled
+static __always_inline bool resilient_virt_spin_lock_enabled(void)
+{
+ return static_branch_likely(&virt_spin_lock_key);
+}
+
+#ifdef CONFIG_QUEUED_SPINLOCKS
+typedef struct qspinlock rqspinlock_t;
+#else
+typedef struct rqspinlock rqspinlock_t;
+#endif
+extern int resilient_tas_spin_lock(rqspinlock_t *lock);
+
+#define resilient_virt_spin_lock resilient_virt_spin_lock
+static inline int resilient_virt_spin_lock(rqspinlock_t *lock)
+{
+ return resilient_tas_spin_lock(lock);
+}
+
+#endif /* CONFIG_PARAVIRT */
+
+#include <asm-generic/rqspinlock.h>
+
+#endif /* _ASM_X86_RQSPINLOCK_H */
diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
new file mode 100644
index 000000000000..8d983cfd06ea
--- /dev/null
+++ b/arch/x86/include/asm/runtime-const.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RUNTIME_CONST_H
+#define _ASM_RUNTIME_CONST_H
+
+#ifdef __ASSEMBLY__
+
+.macro RUNTIME_CONST_PTR sym reg
+ movq $0x0123456789abcdef, %\reg
+ 1:
+ .pushsection runtime_ptr_\sym, "a"
+ .long 1b - 8 - .
+ .popsection
+.endm
+
+#else /* __ASSEMBLY__ */
+
+#define runtime_const_ptr(sym) ({ \
+ typeof(sym) __ret; \
+ asm_inline("mov %1,%0\n1:\n" \
+ ".pushsection runtime_ptr_" #sym ",\"a\"\n\t" \
+ ".long 1b - %c2 - .\n" \
+ ".popsection" \
+ :"=r" (__ret) \
+ :"i" ((unsigned long)0x0123456789abcdefull), \
+ "i" (sizeof(long))); \
+ __ret; })
+
+// The 'typeof' will create at _least_ a 32-bit type, but
+// will happily also take a bigger type and the 'shrl' will
+// clear the upper bits
+#define runtime_const_shift_right_32(val, sym) ({ \
+ typeof(0u+(val)) __ret = (val); \
+ asm_inline("shrl $12,%k0\n1:\n" \
+ ".pushsection runtime_shift_" #sym ",\"a\"\n\t" \
+ ".long 1b - 1 - .\n" \
+ ".popsection" \
+ :"+r" (__ret)); \
+ __ret; })
+
+#define runtime_const_init(type, sym) do { \
+ extern s32 __start_runtime_##type##_##sym[]; \
+ extern s32 __stop_runtime_##type##_##sym[]; \
+ runtime_const_fixup(__runtime_fixup_##type, \
+ (unsigned long)(sym), \
+ __start_runtime_##type##_##sym, \
+ __stop_runtime_##type##_##sym); \
+} while (0)
+
+/*
+ * The text patching is trivial - you can only do this at init time,
+ * when the text section hasn't been marked RO, and before the text
+ * has ever been executed.
+ */
+static inline void __runtime_fixup_ptr(void *where, unsigned long val)
+{
+ *(unsigned long *)where = val;
+}
+
+static inline void __runtime_fixup_shift(void *where, unsigned long val)
+{
+ *(unsigned char *)where = val;
+}
+
+static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
+ unsigned long val, s32 *start, s32 *end)
+{
+ while (start < end) {
+ fn(*start + (void *)start, val);
+ start++;
+ }
+}
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index fef16e398161..42bcd42d70d1 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -9,7 +9,7 @@
#endif
#ifdef CONFIG_COMPAT
-#include <asm/ia32_unistd.h>
+#include <asm/unistd_32_ia32.h>
#define __NR_seccomp_read_32 __NR_ia32_read
#define __NR_seccomp_write_32 __NR_ia32_write
#define __NR_seccomp_exit_32 __NR_ia32_exit
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 3fa87e5e11ab..30e8ee7006f9 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -5,6 +5,7 @@
#include <asm-generic/sections.h>
#include <asm/extable.h>
+extern char __relocate_kernel_start[], __relocate_kernel_end[];
extern char __brk_base[], __brk_limit[];
extern char __end_rodata_aligned[];
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 9d6411c65920..77d8f49b92bd 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -233,7 +233,7 @@
#define VDSO_CPUNODE_BITS 12
#define VDSO_CPUNODE_MASK 0xfff
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Helper functions to store/load CPU and node numbers */
@@ -265,7 +265,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
*node = (p >> VDSO_CPUNODE_BITS);
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef __KERNEL__
@@ -286,7 +286,7 @@ static inline void vdso_read_cpunode(unsigned *cpu, unsigned *node)
*/
#define XEN_EARLY_IDT_HANDLER_SIZE (8 + ENDBR_INSN_SIZE)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
extern const char early_idt_handler_array[NUM_EXCEPTION_VECTORS][EARLY_IDT_HANDLER_SIZE];
extern void early_ignore_irq(void);
@@ -350,7 +350,7 @@ static inline void __loadsegment_fs(unsigned short value)
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __KERNEL__ */
#endif /* _ASM_X86_SEGMENT_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 9aee31862b4a..8d9f1c9aaa4c 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -2,7 +2,6 @@
#ifndef _ASM_X86_SET_MEMORY_H
#define _ASM_X86_SET_MEMORY_H
-#include <linux/mm.h>
#include <asm/page.h>
#include <asm-generic/set_memory.h>
@@ -38,7 +37,6 @@ int set_memory_rox(unsigned long addr, int numpages);
* The caller is required to take care of these.
*/
-int __set_memory_prot(unsigned long addr, int numpages, pgprot_t prot);
int _set_memory_uc(unsigned long addr, int numpages);
int _set_memory_wc(unsigned long addr, int numpages);
int _set_memory_wt(unsigned long addr, int numpages);
@@ -49,8 +47,11 @@ int set_memory_wb(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_p(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);
+
+bool set_memory_enc_stop_conversion(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
+
int set_memory_np_noalias(unsigned long addr, int numpages);
int set_memory_nonglobal(unsigned long addr, int numpages);
int set_memory_global(unsigned long addr, int numpages);
@@ -86,6 +87,7 @@ int set_pages_rw(struct page *page, int numpages);
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
+int set_direct_map_valid_noflush(struct page *page, unsigned nr, bool valid);
bool kernel_page_present(struct page *page);
extern int kernel_set_to_readonly;
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index e61e68d71cba..ad9212df0ec0 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -27,7 +27,9 @@
#define OLD_CL_ADDRESS 0x020 /* Relative to real mode data */
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
+#include <linux/cache.h>
+
#include <asm/bootparam.h>
#include <asm/x86_init.h>
@@ -44,10 +46,11 @@ void setup_bios_corruption_check(void);
void early_platform_quirks(void);
extern unsigned long saved_video_mode;
+extern unsigned long acpi_realmode_flags;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
-extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
+extern unsigned long __startup_64(unsigned long p2v_offset, struct boot_params *bp);
extern void startup_64_setup_gdt_idt(void);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
@@ -133,7 +136,13 @@ asmlinkage void __init __noreturn x86_64_start_reservations(char *real_mode_data
#endif /* __i386__ */
#endif /* _SETUP */
-#else /* __ASSEMBLY */
+#ifdef CONFIG_CMDLINE_BOOL
+extern bool builtin_cmdline_added __ro_after_init;
+#else
+#define builtin_cmdline_added 0
+#endif
+
+#else /* __ASSEMBLER__ */
.macro __RESERVE_BRK name, size
.pushsection .bss..brk, "aw"
@@ -145,6 +154,6 @@ SYM_DATA_END(__brk_\name)
#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SETUP_H */
diff --git a/arch/x86/include/asm/setup_data.h b/arch/x86/include/asm/setup_data.h
index 77c51111a893..7bb16f843c93 100644
--- a/arch/x86/include/asm/setup_data.h
+++ b/arch/x86/include/asm/setup_data.h
@@ -4,7 +4,7 @@
#include <uapi/asm/setup_data.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct pci_setup_rom {
struct setup_data data;
@@ -27,6 +27,6 @@ struct efi_setup_data {
u64 reserved[8];
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b463fcbd4b90..acb85b9346d8 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -54,8 +54,18 @@
(((unsigned long)fn) << 32))
/* AP Reset Hold */
-#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
-#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
+#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_POS 12
+#define GHCB_MSR_AP_RESET_HOLD_RESULT_MASK GENMASK_ULL(51, 0)
+
+/* Preferred GHCB GPA Request */
+#define GHCB_MSR_PREF_GPA_REQ 0x010
+#define GHCB_MSR_GPA_VALUE_POS 12
+#define GHCB_MSR_GPA_VALUE_MASK GENMASK_ULL(51, 0)
+
+#define GHCB_MSR_PREF_GPA_RESP 0x011
+#define GHCB_MSR_PREF_GPA_NONE 0xfffffffffffff
/* GHCB GPA Register */
#define GHCB_MSR_REG_GPA_REQ 0x012
@@ -91,28 +101,61 @@ enum psc_op {
/* GHCBData[11:0] */ \
GHCB_MSR_PSC_REQ)
+#define GHCB_MSR_PSC_REQ_TO_GFN(msr) (((msr) & GENMASK_ULL(51, 12)) >> 12)
+#define GHCB_MSR_PSC_REQ_TO_OP(msr) (((msr) & GENMASK_ULL(55, 52)) >> 52)
+
#define GHCB_MSR_PSC_RESP 0x015
#define GHCB_MSR_PSC_RESP_VAL(val) \
/* GHCBData[63:32] */ \
(((u64)(val) & GENMASK_ULL(63, 32)) >> 32)
+/* Set highest bit as a generic error response */
+#define GHCB_MSR_PSC_RESP_ERROR (BIT_ULL(63) | GHCB_MSR_PSC_RESP)
+
+/* GHCB Run at VMPL Request/Response */
+#define GHCB_MSR_VMPL_REQ 0x016
+#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
+ /* GHCBData[39:32] */ \
+ (((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
+ /* GHCBDdata[11:0] */ \
+ GHCB_MSR_VMPL_REQ)
+
+#define GHCB_MSR_VMPL_RESP 0x017
+#define GHCB_MSR_VMPL_RESP_VAL(v) \
+ /* GHCBData[63:32] */ \
+ (((u64)(v) & GENMASK_ULL(63, 32)) >> 32)
+
/* GHCB Hypervisor Feature Request/Response */
#define GHCB_MSR_HV_FT_REQ 0x080
#define GHCB_MSR_HV_FT_RESP 0x081
+#define GHCB_MSR_HV_FT_POS 12
+#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0)
#define GHCB_MSR_HV_FT_RESP_VAL(v) \
/* GHCBData[63:12] */ \
(((u64)(v) & GENMASK_ULL(63, 12)) >> 12)
#define GHCB_HV_FT_SNP BIT_ULL(0)
#define GHCB_HV_FT_SNP_AP_CREATION BIT_ULL(1)
+#define GHCB_HV_FT_SNP_MULTI_VMPL BIT_ULL(5)
/*
* SNP Page State Change NAE event
* The VMGEXIT_PSC_MAX_ENTRY determines the size of the PSC structure, which
* is a local stack variable in set_pages_state(). Do not increase this value
* without evaluating the impact to stack usage.
+ *
+ * Use VMGEXIT_PSC_MAX_COUNT in cases where the actual GHCB-defined max value
+ * is needed, such as when processing GHCB requests on the hypervisor side.
*/
#define VMGEXIT_PSC_MAX_ENTRY 64
+#define VMGEXIT_PSC_MAX_COUNT 253
+
+#define VMGEXIT_PSC_ERROR_GENERIC (0x100UL << 32)
+#define VMGEXIT_PSC_ERROR_INVALID_HDR ((1UL << 32) | 1)
+#define VMGEXIT_PSC_ERROR_INVALID_ENTRY ((1UL << 32) | 2)
+
+#define VMGEXIT_PSC_OP_PRIVATE 1
+#define VMGEXIT_PSC_OP_SHARED 2
struct psc_hdr {
u16 cur_entry;
@@ -159,12 +202,26 @@ struct snp_psc_desc {
#define GHCB_TERM_NOT_VMPL0 3 /* SNP guest is not running at VMPL-0 */
#define GHCB_TERM_CPUID 4 /* CPUID-validation failure */
#define GHCB_TERM_CPUID_HV 5 /* CPUID failure during hypervisor fallback */
+#define GHCB_TERM_SECRETS_PAGE 6 /* Secrets page failure */
+#define GHCB_TERM_NO_SVSM 7 /* SVSM is not advertised in the secrets page */
+#define GHCB_TERM_SVSM_VMPL0 8 /* SVSM is present but has set VMPL to 0 */
+#define GHCB_TERM_SVSM_CAA 9 /* SVSM is present but CAA is not page aligned */
+#define GHCB_TERM_SECURE_TSC 10 /* Secure TSC initialization failed */
+#define GHCB_TERM_SVSM_CA_REMAP_FAIL 11 /* SVSM is present but CA could not be remapped */
#define GHCB_RESP_CODE(v) ((v) & GHCB_MSR_INFO_MASK)
/*
- * Error codes related to GHCB input that can be communicated back to the guest
- * by setting the lower 32-bits of the GHCB SW_EXITINFO1 field to 2.
+ * GHCB-defined return codes that are communicated back to the guest via
+ * SW_EXITINFO1.
+ */
+#define GHCB_HV_RESP_NO_ACTION 0
+#define GHCB_HV_RESP_ISSUE_EXCEPTION 1
+#define GHCB_HV_RESP_MALFORMED_INPUT 2
+
+/*
+ * GHCB-defined sub-error codes for malformed input (see above) that are
+ * communicated back to the guest via SW_EXITINFO2[31:0].
*/
#define GHCB_ERR_NOT_REGISTERED 1
#define GHCB_ERR_INVALID_USAGE 2
@@ -173,4 +230,31 @@ struct snp_psc_desc {
#define GHCB_ERR_INVALID_INPUT 5
#define GHCB_ERR_INVALID_EVENT 6
+struct sev_config {
+ __u64 debug : 1,
+
+ /*
+ * Indicates when the per-CPU GHCB has been created and registered
+ * and thus can be used by the BSP instead of the early boot GHCB.
+ *
+ * For APs, the per-CPU GHCB is created before they are started
+ * and registered upon startup, so this flag can be used globally
+ * for the BSP and APs.
+ */
+ ghcbs_initialized : 1,
+
+ /*
+ * Indicates when the per-CPU SVSM CA is to be used instead of the
+ * boot SVSM CA.
+ *
+ * For APs, the per-CPU SVSM CA is created as part of the AP
+ * bringup, so this flag can be used globally for the BSP and APs.
+ */
+ use_cas : 1,
+
+ __reserved : 61;
+};
+
+extern struct sev_config sev_cfg;
+
#endif
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h
index 7f57382afee4..ba7999f66abe 100644
--- a/arch/x86/include/asm/sev.h
+++ b/arch/x86/include/asm/sev.h
@@ -14,6 +14,7 @@
#include <asm/insn.h>
#include <asm/sev-common.h>
#include <asm/coco.h>
+#include <asm/set_memory.h>
#define GHCB_PROTOCOL_MIN 1ULL
#define GHCB_PROTOCOL_MAX 2ULL
@@ -91,6 +92,9 @@ extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
/* RMUPDATE detected 4K page and 2MB page overlap. */
#define RMPUPDATE_FAIL_OVERLAP 4
+/* PSMASH failed due to concurrent access by another CPU */
+#define PSMASH_FAIL_INUSE 3
+
/* RMP page size */
#define RMP_PG_SIZE_4K 0
#define RMP_PG_SIZE_2M 1
@@ -116,8 +120,92 @@ struct snp_req_data {
unsigned int data_npages;
};
-struct sev_guest_platform_data {
- u64 secrets_gpa;
+#define MAX_AUTHTAG_LEN 32
+#define AUTHTAG_LEN 16
+#define AAD_LEN 48
+#define MSG_HDR_VER 1
+
+#define SNP_REQ_MAX_RETRY_DURATION (60*HZ)
+#define SNP_REQ_RETRY_DELAY (2*HZ)
+
+/* See SNP spec SNP_GUEST_REQUEST section for the structure */
+enum msg_type {
+ SNP_MSG_TYPE_INVALID = 0,
+ SNP_MSG_CPUID_REQ,
+ SNP_MSG_CPUID_RSP,
+ SNP_MSG_KEY_REQ,
+ SNP_MSG_KEY_RSP,
+ SNP_MSG_REPORT_REQ,
+ SNP_MSG_REPORT_RSP,
+ SNP_MSG_EXPORT_REQ,
+ SNP_MSG_EXPORT_RSP,
+ SNP_MSG_IMPORT_REQ,
+ SNP_MSG_IMPORT_RSP,
+ SNP_MSG_ABSORB_REQ,
+ SNP_MSG_ABSORB_RSP,
+ SNP_MSG_VMRK_REQ,
+ SNP_MSG_VMRK_RSP,
+
+ SNP_MSG_TSC_INFO_REQ = 17,
+ SNP_MSG_TSC_INFO_RSP,
+
+ SNP_MSG_TYPE_MAX
+};
+
+enum aead_algo {
+ SNP_AEAD_INVALID,
+ SNP_AEAD_AES_256_GCM,
+};
+
+struct snp_guest_msg_hdr {
+ u8 authtag[MAX_AUTHTAG_LEN];
+ u64 msg_seqno;
+ u8 rsvd1[8];
+ u8 algo;
+ u8 hdr_version;
+ u16 hdr_sz;
+ u8 msg_type;
+ u8 msg_version;
+ u16 msg_sz;
+ u32 rsvd2;
+ u8 msg_vmpck;
+ u8 rsvd3[35];
+} __packed;
+
+struct snp_guest_msg {
+ struct snp_guest_msg_hdr hdr;
+ u8 payload[PAGE_SIZE - sizeof(struct snp_guest_msg_hdr)];
+} __packed;
+
+#define SNP_TSC_INFO_REQ_SZ 128
+
+struct snp_tsc_info_req {
+ u8 rsvd[SNP_TSC_INFO_REQ_SZ];
+} __packed;
+
+struct snp_tsc_info_resp {
+ u32 status;
+ u32 rsvd1;
+ u64 tsc_scale;
+ u64 tsc_offset;
+ u32 tsc_factor;
+ u8 rsvd2[100];
+} __packed;
+
+struct snp_guest_req {
+ void *req_buf;
+ size_t req_sz;
+
+ void *resp_buf;
+ size_t resp_sz;
+
+ u64 exit_code;
+ unsigned int vmpck_id;
+ u8 msg_version;
+ u8 msg_type;
+
+ struct snp_req_data input;
+ void *certs_data;
};
/*
@@ -140,7 +228,7 @@ struct secrets_os_area {
#define VMPCK_KEY_LEN 32
/* See the SNP spec version 0.9 for secrets page format */
-struct snp_secrets_page_layout {
+struct snp_secrets_page {
u32 version;
u32 imien : 1,
rsvd1 : 31;
@@ -152,10 +240,154 @@ struct snp_secrets_page_layout {
u8 vmpck2[VMPCK_KEY_LEN];
u8 vmpck3[VMPCK_KEY_LEN];
struct secrets_os_area os_area;
- u8 rsvd3[3840];
+
+ u8 vmsa_tweak_bitmap[64];
+
+ /* SVSM fields */
+ u64 svsm_base;
+ u64 svsm_size;
+ u64 svsm_caa;
+ u32 svsm_max_version;
+ u8 svsm_guest_vmpl;
+ u8 rsvd3[3];
+
+ /* Remainder of page */
+ u8 rsvd4[3744];
} __packed;
+struct snp_msg_desc {
+ /* request and response are in unencrypted memory */
+ struct snp_guest_msg *request, *response;
+
+ /*
+ * Avoid information leakage by double-buffering shared messages
+ * in fields that are in regular encrypted memory.
+ */
+ struct snp_guest_msg secret_request, secret_response;
+
+ struct snp_secrets_page *secrets;
+
+ struct aesgcm_ctx *ctx;
+
+ u32 *os_area_msg_seqno;
+ u8 *vmpck;
+ int vmpck_id;
+};
+
+/*
+ * The SVSM Calling Area (CA) related structures.
+ */
+struct svsm_ca {
+ u8 call_pending;
+ u8 mem_available;
+ u8 rsvd1[6];
+
+ u8 svsm_buffer[PAGE_SIZE - 8];
+};
+
+#define SVSM_SUCCESS 0
+#define SVSM_ERR_INCOMPLETE 0x80000000
+#define SVSM_ERR_UNSUPPORTED_PROTOCOL 0x80000001
+#define SVSM_ERR_UNSUPPORTED_CALL 0x80000002
+#define SVSM_ERR_INVALID_ADDRESS 0x80000003
+#define SVSM_ERR_INVALID_FORMAT 0x80000004
+#define SVSM_ERR_INVALID_PARAMETER 0x80000005
+#define SVSM_ERR_INVALID_REQUEST 0x80000006
+#define SVSM_ERR_BUSY 0x80000007
+#define SVSM_PVALIDATE_FAIL_SIZEMISMATCH 0x80001006
+
+/*
+ * The SVSM PVALIDATE related structures
+ */
+struct svsm_pvalidate_entry {
+ u64 page_size : 2,
+ action : 1,
+ ignore_cf : 1,
+ rsvd : 8,
+ pfn : 52;
+};
+
+struct svsm_pvalidate_call {
+ u16 num_entries;
+ u16 cur_index;
+
+ u8 rsvd1[4];
+
+ struct svsm_pvalidate_entry entry[];
+};
+
+#define SVSM_PVALIDATE_MAX_COUNT ((sizeof_field(struct svsm_ca, svsm_buffer) - \
+ offsetof(struct svsm_pvalidate_call, entry)) / \
+ sizeof(struct svsm_pvalidate_entry))
+
+/*
+ * The SVSM Attestation related structures
+ */
+struct svsm_loc_entry {
+ u64 pa;
+ u32 len;
+ u8 rsvd[4];
+};
+
+struct svsm_attest_call {
+ struct svsm_loc_entry report_buf;
+ struct svsm_loc_entry nonce;
+ struct svsm_loc_entry manifest_buf;
+ struct svsm_loc_entry certificates_buf;
+
+ /* For attesting a single service */
+ u8 service_guid[16];
+ u32 service_manifest_ver;
+ u8 rsvd[4];
+};
+
+/* PTE descriptor used for the prepare_pte_enc() operations. */
+struct pte_enc_desc {
+ pte_t *kpte;
+ int pte_level;
+ bool encrypt;
+ /* pfn of the kpte above */
+ unsigned long pfn;
+ /* physical address of @pfn */
+ unsigned long pa;
+ /* virtual address of @pfn */
+ void *va;
+ /* memory covered by the pte */
+ unsigned long size;
+ pgprot_t new_pgprot;
+};
+
+/*
+ * SVSM protocol structure
+ */
+struct svsm_call {
+ struct svsm_ca *caa;
+ u64 rax;
+ u64 rcx;
+ u64 rdx;
+ u64 r8;
+ u64 r9;
+ u64 rax_out;
+ u64 rcx_out;
+ u64 rdx_out;
+ u64 r8_out;
+ u64 r9_out;
+};
+
+#define SVSM_CORE_CALL(x) ((0ULL << 32) | (x))
+#define SVSM_CORE_REMAP_CA 0
+#define SVSM_CORE_PVALIDATE 1
+#define SVSM_CORE_CREATE_VCPU 2
+#define SVSM_CORE_DELETE_VCPU 3
+
+#define SVSM_ATTEST_CALL(x) ((1ULL << 32) | (x))
+#define SVSM_ATTEST_SERVICES 0
+#define SVSM_ATTEST_SINGLE_SERVICE 1
+
#ifdef CONFIG_AMD_MEM_ENCRYPT
+
+extern u8 snp_vmpl;
+
extern void __sev_es_ist_enter(struct pt_regs *regs);
extern void __sev_es_ist_exit(void);
static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
@@ -181,6 +413,14 @@ static __always_inline void sev_es_nmi_complete(void)
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
extern void sev_enable(struct boot_params *bp);
+/*
+ * RMPADJUST modifies the RMP permissions of a page of a lesser-
+ * privileged (numerically higher) VMPL.
+ *
+ * If the guest is running at a higher-privilege than the privilege
+ * level the instruction is targeting, the instruction will succeed,
+ * otherwise, it will fail.
+ */
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
int rc;
@@ -224,12 +464,29 @@ void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __noreturn snp_abort(void);
void snp_dmi_setup(void);
-int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
+int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
void sev_show_status(void);
-#else
+void snp_update_svsm_ca(void);
+int prepare_pte_enc(struct pte_enc_desc *d);
+void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot);
+void snp_kexec_finish(void);
+void snp_kexec_begin(void);
+
+int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id);
+struct snp_msg_desc *snp_msg_alloc(void);
+void snp_msg_free(struct snp_msg_desc *mdesc);
+int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
+ struct snp_guest_request_ioctl *rio);
+
+void __init snp_secure_tsc_prepare(void);
+void __init snp_secure_tsc_init(void);
+
+#else /* !CONFIG_AMD_MEM_ENCRYPT */
+
+#define snp_vmpl 0
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
@@ -249,19 +506,32 @@ static inline void snp_set_wakeup_secondary_cpu(void) { }
static inline bool snp_init(struct boot_params *bp) { return false; }
static inline void snp_abort(void) { }
static inline void snp_dmi_setup(void) { }
-static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio)
+static inline int snp_issue_svsm_attest_req(u64 call_id, struct svsm_call *call, struct svsm_attest_call *input)
{
return -ENOTTY;
}
-
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
static inline void sev_show_status(void) { }
-#endif
+static inline void snp_update_svsm_ca(void) { }
+static inline int prepare_pte_enc(struct pte_enc_desc *d) { return 0; }
+static inline void set_pte_enc_mask(pte_t *kpte, unsigned long pfn, pgprot_t new_prot) { }
+static inline void snp_kexec_finish(void) { }
+static inline void snp_kexec_begin(void) { }
+static inline int snp_msg_init(struct snp_msg_desc *mdesc, int vmpck_id) { return -1; }
+static inline struct snp_msg_desc *snp_msg_alloc(void) { return NULL; }
+static inline void snp_msg_free(struct snp_msg_desc *mdesc) { }
+static inline int snp_send_guest_request(struct snp_msg_desc *mdesc, struct snp_guest_req *req,
+ struct snp_guest_request_ioctl *rio) { return -ENODEV; }
+static inline void __init snp_secure_tsc_prepare(void) { }
+static inline void __init snp_secure_tsc_init(void) { }
+
+#endif /* CONFIG_AMD_MEM_ENCRYPT */
#ifdef CONFIG_KVM_AMD_SEV
bool snp_probe_rmptable_info(void);
+int snp_rmptable_init(void);
int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
void snp_dump_hva_rmpentry(unsigned long address);
int psmash(u64 pfn);
@@ -269,8 +539,10 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut
int rmp_make_shared(u64 pfn, enum pg_level level);
void snp_leak_pages(u64 pfn, unsigned int npages);
void kdump_sev_callback(void);
+void snp_fixup_e820_tables(void);
#else
static inline bool snp_probe_rmptable_info(void) { return false; }
+static inline int snp_rmptable_init(void) { return -ENOSYS; }
static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; }
static inline void snp_dump_hva_rmpentry(unsigned long address) {}
static inline int psmash(u64 pfn) { return -ENODEV; }
@@ -282,6 +554,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as
static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; }
static inline void snp_leak_pages(u64 pfn, unsigned int npages) {}
static inline void kdump_sev_callback(void) { }
+static inline void snp_fixup_e820_tables(void) {}
#endif
#endif
diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h
index fdfd41511b02..a28ff6b14145 100644
--- a/arch/x86/include/asm/shared/tdx.h
+++ b/arch/x86/include/asm/shared/tdx.h
@@ -16,10 +16,55 @@
#define TDG_VP_VEINFO_GET 3
#define TDG_MR_REPORT 4
#define TDG_MEM_PAGE_ACCEPT 6
+#define TDG_VM_RD 7
#define TDG_VM_WR 8
-/* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */
+/* TDX attributes */
+#define TDX_ATTR_DEBUG_BIT 0
+#define TDX_ATTR_DEBUG BIT_ULL(TDX_ATTR_DEBUG_BIT)
+#define TDX_ATTR_HGS_PLUS_PROF_BIT 4
+#define TDX_ATTR_HGS_PLUS_PROF BIT_ULL(TDX_ATTR_HGS_PLUS_PROF_BIT)
+#define TDX_ATTR_PERF_PROF_BIT 5
+#define TDX_ATTR_PERF_PROF BIT_ULL(TDX_ATTR_PERF_PROF_BIT)
+#define TDX_ATTR_PMT_PROF_BIT 6
+#define TDX_ATTR_PMT_PROF BIT_ULL(TDX_ATTR_PMT_PROF_BIT)
+#define TDX_ATTR_ICSSD_BIT 16
+#define TDX_ATTR_ICSSD BIT_ULL(TDX_ATTR_ICSSD_BIT)
+#define TDX_ATTR_LASS_BIT 27
+#define TDX_ATTR_LASS BIT_ULL(TDX_ATTR_LASS_BIT)
+#define TDX_ATTR_SEPT_VE_DISABLE_BIT 28
+#define TDX_ATTR_SEPT_VE_DISABLE BIT_ULL(TDX_ATTR_SEPT_VE_DISABLE_BIT)
+#define TDX_ATTR_MIGRTABLE_BIT 29
+#define TDX_ATTR_MIGRTABLE BIT_ULL(TDX_ATTR_MIGRTABLE_BIT)
+#define TDX_ATTR_PKS_BIT 30
+#define TDX_ATTR_PKS BIT_ULL(TDX_ATTR_PKS_BIT)
+#define TDX_ATTR_KL_BIT 31
+#define TDX_ATTR_KL BIT_ULL(TDX_ATTR_KL_BIT)
+#define TDX_ATTR_TPA_BIT 62
+#define TDX_ATTR_TPA BIT_ULL(TDX_ATTR_TPA_BIT)
+#define TDX_ATTR_PERFMON_BIT 63
+#define TDX_ATTR_PERFMON BIT_ULL(TDX_ATTR_PERFMON_BIT)
+
+/* TDX TD-Scope Metadata. To be used by TDG.VM.WR and TDG.VM.RD */
+#define TDCS_CONFIG_FLAGS 0x1110000300000016
+#define TDCS_TD_CTLS 0x1110000300000017
#define TDCS_NOTIFY_ENABLES 0x9100000000000010
+#define TDCS_TOPOLOGY_ENUM_CONFIGURED 0x9100000000000019
+
+/* TDCS_CONFIG_FLAGS bits */
+#define TDCS_CONFIG_FLEXIBLE_PENDING_VE BIT_ULL(1)
+
+/* TDCS_TD_CTLS bits */
+#define TD_CTLS_PENDING_VE_DISABLE_BIT 0
+#define TD_CTLS_PENDING_VE_DISABLE BIT_ULL(TD_CTLS_PENDING_VE_DISABLE_BIT)
+#define TD_CTLS_ENUM_TOPOLOGY_BIT 1
+#define TD_CTLS_ENUM_TOPOLOGY BIT_ULL(TD_CTLS_ENUM_TOPOLOGY_BIT)
+#define TD_CTLS_VIRT_CPUID2_BIT 2
+#define TD_CTLS_VIRT_CPUID2 BIT_ULL(TD_CTLS_VIRT_CPUID2_BIT)
+#define TD_CTLS_REDUCE_VE_BIT 3
+#define TD_CTLS_REDUCE_VE BIT_ULL(TD_CTLS_REDUCE_VE_BIT)
+#define TD_CTLS_LOCK_BIT 63
+#define TD_CTLS_LOCK BIT_ULL(TD_CTLS_LOCK_BIT)
/* TDX hypercall Leaf IDs */
#define TDVMCALL_MAP_GPA 0x10001
@@ -61,7 +106,7 @@
#define TDX_PS_1G 2
#define TDX_PS_NR (TDX_PS_1G + 1)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/compiler_attributes.h>
@@ -132,5 +177,5 @@ static __always_inline u64 hcall_func(u64 exit_reason)
return exit_reason;
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_SHARED_TDX_H */
diff --git a/arch/x86/include/asm/shstk.h b/arch/x86/include/asm/shstk.h
index 42fee8959df7..ba6f2fe43848 100644
--- a/arch/x86/include/asm/shstk.h
+++ b/arch/x86/include/asm/shstk.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_SHSTK_H
#define _ASM_X86_SHSTK_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
struct task_struct;
@@ -21,6 +21,8 @@ unsigned long shstk_alloc_thread_stack(struct task_struct *p, unsigned long clon
void shstk_free(struct task_struct *p);
int setup_signal_shadow_stack(struct ksignal *ksig);
int restore_signal_shadow_stack(void);
+int shstk_update_last_frame(unsigned long val);
+bool shstk_is_enabled(void);
#else
static inline long shstk_prctl(struct task_struct *task, int option,
unsigned long arg2) { return -EINVAL; }
@@ -31,8 +33,10 @@ static inline unsigned long shstk_alloc_thread_stack(struct task_struct *p,
static inline void shstk_free(struct task_struct *p) {}
static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
static inline int restore_signal_shadow_stack(void) { return 0; }
+static inline int shstk_update_last_frame(unsigned long val) { return 0; }
+static inline bool shstk_is_enabled(void) { return false; }
#endif /* CONFIG_X86_USER_SHADOW_STACK */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SHSTK_H */
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index 4a4043ca6493..c72d46175374 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_SIGNAL_H
#define _ASM_X86_SIGNAL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/linkage.h>
/* Most things should be clean enough to redefine this at will, if care
@@ -28,9 +28,9 @@ typedef struct {
#define SA_IA32_ABI 0x02000000u
#define SA_X32_ABI 0x01000000u
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#include <uapi/asm/signal.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define __ARCH_HAS_SA_RESTORER
@@ -101,5 +101,5 @@ struct pt_regs;
#endif /* !__i386__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index bab490379c65..4f84d421d1cf 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,30 +13,26 @@
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
-/* "Raw" instruction opcodes */
-#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
-#define __ASM_STAC ".byte 0x0f,0x01,0xcb"
-
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define ASM_CLAC \
- ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
+ ALTERNATIVE "", "clac", X86_FEATURE_SMAP
#define ASM_STAC \
- ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
+ ALTERNATIVE "", "stac", X86_FEATURE_SMAP
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
+ alternative("", "clac", X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __ASM_STAC, X86_FEATURE_SMAP);
+ alternative("", "stac", X86_FEATURE_SMAP);
}
static __always_inline unsigned long smap_save(void)
@@ -44,7 +40,8 @@ static __always_inline unsigned long smap_save(void)
unsigned long flags;
asm volatile ("# smap_save\n\t"
- ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t",
+ ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
+ "", "pushf; pop %0; clac",
X86_FEATURE_SMAP)
: "=rm" (flags) : : "memory", "cc");
@@ -54,17 +51,23 @@ static __always_inline unsigned long smap_save(void)
static __always_inline void smap_restore(unsigned long flags)
{
asm volatile ("# smap_restore\n\t"
- ALTERNATIVE("", "push %0; popf\n\t",
+ ALTERNATIVE(ANNOTATE_IGNORE_ALTERNATIVE
+ "", "push %0; popf",
X86_FEATURE_SMAP)
: : "g" (flags) : "memory", "cc");
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
+ ALTERNATIVE("", "clac", X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
+ ALTERNATIVE("", "stac", X86_FEATURE_SMAP)
+
+#define ASM_CLAC_UNSAFE \
+ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "clac", X86_FEATURE_SMAP)
+#define ASM_STAC_UNSAFE \
+ ALTERNATIVE("", ANNOTATE_IGNORE_ALTERNATIVE "stac", X86_FEATURE_SMAP)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_SMAP_H */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a35936b512fe..0c1c68039d6f 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -1,12 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_SMP_H
#define _ASM_X86_SMP_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/cpumask.h>
+#include <linux/thread_info.h>
#include <asm/cpumask.h>
-#include <asm/current.h>
-#include <asm/thread_info.h>
+
+DECLARE_PER_CPU_CACHE_HOT(int, cpu_number);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_map);
@@ -35,6 +36,7 @@ struct smp_ops {
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
void (*play_dead)(void);
+ void (*stop_this_cpu)(void);
void (*send_call_func_ipi)(const struct cpumask *mask);
void (*send_call_func_single_ipi)(int cpu);
@@ -113,13 +115,12 @@ void wbinvd_on_cpu(int cpu);
int wbinvd_on_all_cpus(void);
void smp_kick_mwait_play_dead(void);
+void __noreturn mwait_play_dead(unsigned int eax_hint);
void native_smp_send_reschedule(int cpu);
void native_send_call_func_ipi(const struct cpumask *mask);
void native_send_call_func_single_ipi(int cpu);
-void smp_store_cpu_info(int id);
-
asmlinkage __visible void smp_reboot_interrupt(void);
__visible void smp_reschedule_interrupt(struct pt_regs *regs);
__visible void smp_call_function_interrupt(struct pt_regs *regs);
@@ -132,14 +133,8 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
* This function is needed by all SMP systems. It must _always_ be valid
* from the initial startup.
*/
-#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number)
-#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number)
-
-#ifdef CONFIG_X86_32
-extern int safe_smp_processor_id(void);
-#else
-# define safe_smp_processor_id() smp_processor_id()
-#endif
+#define raw_smp_processor_id() this_cpu_read(cpu_number)
+#define __smp_processor_id() __this_cpu_read(cpu_number)
static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
@@ -163,6 +158,8 @@ static inline struct cpumask *cpu_llc_shared_mask(int cpu)
{
return (struct cpumask *)cpumask_of(0);
}
+
+static inline void __noreturn mwait_play_dead(unsigned int eax_hint) { BUG(); }
#endif /* CONFIG_SMP */
#ifdef CONFIG_DEBUG_NMI_SELFTEST
@@ -174,7 +171,7 @@ extern void nmi_selftest(void);
extern unsigned int smpboot_control;
extern unsigned long apic_mmio_base;
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/* Control bits for startup_64 */
#define STARTUP_READ_APICID 0x80000000
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h
index 1be13b2dfe8b..3918c7a434f5 100644
--- a/arch/x86/include/asm/sparsemem.h
+++ b/arch/x86/include/asm/sparsemem.h
@@ -31,15 +31,4 @@
#endif /* CONFIG_SPARSEMEM */
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_NUMA_KEEP_MEMINFO
-extern int phys_to_target_node(phys_addr_t start);
-#define phys_to_target_node phys_to_target_node
-extern int memory_add_physaddr_to_nid(u64 start);
-#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
-extern int numa_fill_memblks(u64 start, u64 end);
-#define numa_fill_memblks numa_fill_memblks
-#endif
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_SPARSEMEM_H */
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 2e9fc5c400cd..6266d6b9e0b8 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -42,14 +42,14 @@ static __always_inline void native_write_cr2(unsigned long val)
asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
}
-static inline unsigned long __native_read_cr3(void)
+static __always_inline unsigned long __native_read_cr3(void)
{
unsigned long val;
asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
return val;
}
-static inline void native_write_cr3(unsigned long val)
+static __always_inline void native_write_cr3(unsigned long val)
{
asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
}
@@ -115,7 +115,7 @@ static inline void wrpkru(u32 pkru)
}
#endif
-static __always_inline void native_wbinvd(void)
+static __always_inline void wbinvd(void)
{
asm volatile("wbinvd": : :"memory");
}
@@ -167,12 +167,6 @@ static inline void __write_cr4(unsigned long x)
{
native_write_cr4(x);
}
-
-static __always_inline void wbinvd(void)
-{
- native_wbinvd();
-}
-
#endif /* CONFIG_PARAVIRT_XXL */
static __always_inline void clflush(volatile void *__p)
@@ -182,9 +176,8 @@ static __always_inline void clflush(volatile void *__p)
static inline void clflushopt(volatile void *__p)
{
- alternative_io(".byte 0x3e; clflush %P0",
- ".byte 0x66; clflush %P0",
- X86_FEATURE_CLFLUSHOPT,
+ alternative_io("ds clflush %0",
+ "clflushopt %0", X86_FEATURE_CLFLUSHOPT,
"+m" (*(volatile char __force *)__p));
}
@@ -192,22 +185,19 @@ static inline void clwb(volatile void *__p)
{
volatile struct { char x[64]; } *p = __p;
- asm volatile(ALTERNATIVE_2(
- ".byte 0x3e; clflush (%[pax])",
- ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
- X86_FEATURE_CLFLUSHOPT,
- ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
- X86_FEATURE_CLWB)
- : [p] "+m" (*p)
- : [pax] "a" (p));
+ asm_inline volatile(ALTERNATIVE_2(
+ "ds clflush %0",
+ "clflushopt %0", X86_FEATURE_CLFLUSHOPT,
+ "clwb %0", X86_FEATURE_CLWB)
+ : "+m" (*p));
}
#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
- asm goto("1: wrussq %[val], (%[addr])\n"
+ asm goto("1: wrussq %[val], %[addr]\n"
_ASM_EXTABLE(1b, %l[fail])
- :: [addr] "r" (addr), [val] "r" (val)
+ :: [addr] "m" (*addr), [val] "r" (val)
:: fail);
return 0;
fail:
@@ -217,7 +207,7 @@ fail:
#define nop() asm volatile ("nop")
-static inline void serialize(void)
+static __always_inline void serialize(void)
{
/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
diff --git a/arch/x86/include/asm/sta2x11.h b/arch/x86/include/asm/sta2x11.h
deleted file mode 100644
index e0975e9c4f47..000000000000
--- a/arch/x86/include/asm/sta2x11.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Header file for STMicroelectronics ConneXt (STA2X11) IOHub
- */
-#ifndef __ASM_STA2X11_H
-#define __ASM_STA2X11_H
-
-#include <linux/pci.h>
-
-/* This needs to be called from the MFD to configure its sub-devices */
-struct sta2x11_instance *sta2x11_get_instance(struct pci_dev *pdev);
-
-#endif /* __ASM_STA2X11_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 00473a650f51..cd761b14eb02 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -2,26 +2,10 @@
/*
* GCC stack protector support.
*
- * Stack protector works by putting predefined pattern at the start of
+ * Stack protector works by putting a predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
- * returning from the function. The pattern is called stack canary
- * and unfortunately gcc historically required it to be at a fixed offset
- * from the percpu segment base. On x86_64, the offset is 40 bytes.
- *
- * The same segment is shared by percpu area and stack canary. On
- * x86_64, percpu symbols are zero based and %gs (64-bit) points to the
- * base of percpu area. The first occupant of the percpu area is always
- * fixed_percpu_data which contains stack_canary at the appropriate
- * offset. On x86_32, the stack canary is just a regular percpu
- * variable.
- *
- * Putting percpu data in %fs on 32-bit is a minor optimization compared to
- * using %gs. Since 32-bit userspace normally has %fs == 0, we are likely
- * to load 0 into %fs on exit to usermode, whereas with percpu data in
- * %gs, we are likely to load a non-null %gs on return to user mode.
- *
- * Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
- * support, we can remove some of this complexity.
+ * returning from the function. The pattern is called the stack canary
+ * and is a unique value for each task.
*/
#ifndef _ASM_STACKPROTECTOR_H
@@ -36,6 +20,8 @@
#include <linux/sched.h>
+DECLARE_PER_CPU_CACHE_HOT(unsigned long, __stack_chk_guard);
+
/*
* Initialize the stackprotector canary value.
*
@@ -51,25 +37,13 @@ static __always_inline void boot_init_stack_canary(void)
{
unsigned long canary = get_random_canary();
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
-#endif
-
current->stack_canary = canary;
-#ifdef CONFIG_X86_64
- this_cpu_write(fixed_percpu_data.stack_canary, canary);
-#else
this_cpu_write(__stack_chk_guard, canary);
-#endif
}
static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{
-#ifdef CONFIG_X86_64
- per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
-#else
per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
-#endif
}
#else /* STACKPROTECTOR */
diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h
index 125c407e2abe..41502bd2afd6 100644
--- a/arch/x86/include/asm/static_call.h
+++ b/arch/x86/include/asm/static_call.h
@@ -65,4 +65,19 @@
extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+extern void __static_call_update_early(void *tramp, void *func);
+
+#define static_call_update_early(name, _func) \
+({ \
+ typeof(&STATIC_CALL_TRAMP(name)) __F = (_func); \
+ if (static_call_initialized) { \
+ __static_call_update(&STATIC_CALL_KEY(name), \
+ STATIC_CALL_TRAMP_ADDR(name), __F);\
+ } else { \
+ WRITE_ONCE(STATIC_CALL_KEY(name).func, _func); \
+ __static_call_update_early(STATIC_CALL_TRAMP_ADDR(name),\
+ __F); \
+ } \
+})
+
#endif /* _ASM_STATIC_CALL_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 857d364b9888..79e9695dc13e 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -21,6 +21,7 @@ extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET
void *memset(void *s, int c, size_t n);
void *__memset(void *s, int c, size_t n);
+KCFI_REFERENCE(__memset);
/*
* KMSAN needs to instrument as much code as possible. Use C versions of
@@ -30,43 +31,47 @@ void *__memset(void *s, int c, size_t n);
#define __HAVE_ARCH_MEMSET16
static inline void *memset16(uint16_t *s, uint16_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosw"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosw"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#define __HAVE_ARCH_MEMSET32
static inline void *memset32(uint32_t *s, uint32_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosl"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosl"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#define __HAVE_ARCH_MEMSET64
static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
{
- long d0, d1;
- asm volatile("rep\n\t"
- "stosq"
- : "=&c" (d0), "=&D" (d1)
- : "a" (v), "1" (s), "0" (n)
- : "memory");
- return s;
+ const __auto_type s0 = s;
+ asm volatile (
+ "rep stosq"
+ : "+D" (s), "+c" (n)
+ : "a" (v)
+ : "memory"
+ );
+ return s0;
}
#endif
#define __HAVE_ARCH_MEMMOVE
void *memmove(void *dest, const void *src, size_t count);
void *__memmove(void *dest, const void *src, size_t count);
+KCFI_REFERENCE(__memmove);
int memcmp(const void *cs, const void *ct, size_t count);
size_t strlen(const char *s);
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 728c98175b9c..9b7fa99ae951 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -5,7 +5,7 @@
#include <uapi/asm/svm.h>
#include <uapi/asm/kvm.h>
-#include <asm/hyperv-tlfs.h>
+#include <hyperv/hvhdk.h>
/*
* 32-bit intercept words in the VMCB Control Area, starting
@@ -116,6 +116,7 @@ enum {
INTERCEPT_INVPCID,
INTERCEPT_MCOMMIT,
INTERCEPT_TLBSYNC,
+ INTERCEPT_IDLE_HLT = 166,
};
@@ -285,7 +286,10 @@ static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_
#define AVIC_HPA_MASK ~((0xFFFULL << 52) | 0xFFF)
-#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
+#define SVM_SEV_FEAT_SNP_ACTIVE BIT(0)
+#define SVM_SEV_FEAT_RESTRICTED_INJECTION BIT(3)
+#define SVM_SEV_FEAT_ALTERNATE_INJECTION BIT(4)
+#define SVM_SEV_FEAT_DEBUG_SWAP BIT(5)
struct vmcb_seg {
u16 selector;
@@ -410,7 +414,9 @@ struct sev_es_save_area {
u8 reserved_0x298[80];
u32 pkru;
u32 tsc_aux;
- u8 reserved_0x2f0[24];
+ u64 tsc_scale;
+ u64 tsc_offset;
+ u8 reserved_0x300[8];
u64 rcx;
u64 rdx;
u64 rbx;
@@ -509,6 +515,20 @@ struct ghcb {
u32 ghcb_usage;
} __packed;
+struct vmcb {
+ struct vmcb_control_area control;
+ union {
+ struct vmcb_save_area save;
+
+ /*
+ * For SEV-ES VMs, the save area in the VMCB is used only to
+ * save/load host state. Guest state resides in a separate
+ * page, the aptly named VM Save Area (VMSA), that is encrypted
+ * with the guest's private key.
+ */
+ struct sev_es_save_area host_sev_es_save;
+ };
+} __packed;
#define EXPECTED_VMCB_SAVE_AREA_SIZE 744
#define EXPECTED_GHCB_SAVE_AREA_SIZE 1032
@@ -525,6 +545,7 @@ static inline void __unused_size_checks(void)
BUILD_BUG_ON(sizeof(struct ghcb_save_area) != EXPECTED_GHCB_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
+ BUILD_BUG_ON(offsetof(struct vmcb, save) != EXPECTED_VMCB_CONTROL_AREA_SIZE);
BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE);
/* Check offsets of reserved fields */
@@ -542,7 +563,7 @@ static inline void __unused_size_checks(void)
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298);
- BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x2f0);
+ BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x300);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380);
BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0);
@@ -561,11 +582,6 @@ static inline void __unused_size_checks(void)
BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0);
}
-struct vmcb {
- struct vmcb_control_area control;
- struct vmcb_save_area save;
-} __packed;
-
#define SVM_CPUID_FUNC 0x8000000a
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index c3bd0c0758c9..75248546403d 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -70,13 +70,9 @@ static inline void update_task_stack(struct task_struct *task)
#ifdef CONFIG_X86_32
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
#else
- if (cpu_feature_enabled(X86_FEATURE_FRED)) {
- /* WRMSRNS is a baseline feature for FRED. */
- wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE);
- } else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
+ if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
/* Xen PV enters the kernel on the thread stack. */
load_sp0(task_top_of_stack(task));
- }
#endif
}
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 6d8d6bc183b7..cd21a0405ac5 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -31,7 +31,7 @@
*/
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
+ asm volatile("lock " __ASM_SIZE(bts) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -49,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
+ asm volatile("lock " __ASM_SIZE(btr) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -66,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
+ asm volatile("lock " __ASM_SIZE(btc) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -82,7 +82,7 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
*/
static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
- return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
@@ -95,7 +95,7 @@ static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
@@ -108,7 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
- return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
+ return GEN_BINARY_RMWcc("lock " __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
#define sync_test_bit(nr, addr) test_bit(nr, addr)
diff --git a/arch/x86/include/asm/sync_core.h b/arch/x86/include/asm/sync_core.h
index ab7382f92aff..96bda43538ee 100644
--- a/arch/x86/include/asm/sync_core.h
+++ b/arch/x86/include/asm/sync_core.h
@@ -8,7 +8,7 @@
#include <asm/special_insns.h>
#ifdef CONFIG_X86_32
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
{
asm volatile (
"pushfl\n\t"
@@ -19,7 +19,7 @@ static inline void iret_to_self(void)
: ASM_CALL_CONSTRAINT : : "memory");
}
#else
-static inline void iret_to_self(void)
+static __always_inline void iret_to_self(void)
{
unsigned int tmp;
@@ -55,7 +55,7 @@ static inline void iret_to_self(void)
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/
-static inline void sync_core(void)
+static __always_inline void sync_core(void)
{
/*
* The SERIALIZE instruction is the most straightforward way to
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 2fc7bc3863ff..7c488ff0c764 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -82,7 +82,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
struct pt_regs *regs,
unsigned long *args)
{
- memcpy(args, &regs->bx, 6 * sizeof(args[0]));
+ args[0] = regs->bx;
+ args[1] = regs->cx;
+ args[2] = regs->dx;
+ args[3] = regs->si;
+ args[4] = regs->di;
+ args[5] = regs->bp;
}
static inline int syscall_get_arch(struct task_struct *task)
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index eba178996d84..4a1922ec80cf 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -30,7 +30,7 @@
#define TDX_SUCCESS 0ULL
#define TDX_RND_NO_ENTROPY 0x8000020300000000ULL
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <uapi/asm/mce.h>
@@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve);
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve);
-void tdx_safe_halt(void);
+void tdx_halt(void);
bool tdx_early_handle_ve(struct pt_regs *regs);
@@ -66,10 +66,13 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport);
u64 tdx_hcall_get_quote(u8 *buf, size_t size);
+void __init tdx_dump_attributes(u64 td_attr);
+void __init tdx_dump_td_ctls(u64 td_ctls);
+
#else
static inline void tdx_early_init(void) { };
-static inline void tdx_safe_halt(void) { };
+static inline void tdx_halt(void) { };
static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; }
@@ -123,5 +126,5 @@ static inline int tdx_enable(void) { return -ENODEV; }
static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
#endif /* CONFIG_INTEL_TDX_HOST */
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_TDX_H */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 345aafbc1964..ab9e143ec9fe 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -15,7 +15,7 @@
extern void text_poke_early(void *addr, const void *opcode, size_t len);
-extern void apply_relocation(u8 *buf, size_t len, u8 *dest, u8 *src, size_t src_len);
+extern void apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len);
/*
* Clear and restore the kernel write-protection flag on the local CPU.
@@ -35,6 +35,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void text_poke_sync(void);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+#define text_poke_copy text_poke_copy
extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
extern void *text_poke_set(void *addr, int c, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 12da7dfd5ef1..9282465eea21 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -54,7 +54,7 @@
* - this struct should fit entirely inside of one cache line
* - this struct shares the supervisor stack pages
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct task_struct;
#include <asm/cpufeature.h>
#include <linux/atomic.h>
@@ -73,7 +73,7 @@ struct thread_info {
.flags = 0, \
}
-#else /* !__ASSEMBLY__ */
+#else /* !__ASSEMBLER__ */
#include <asm/asm-offsets.h>
@@ -87,8 +87,9 @@ struct thread_info {
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
-#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
-#define TIF_SSBD 5 /* Speculative store bypass disable */
+#define TIF_NEED_RESCHED_LAZY 4 /* Lazy rescheduling needed */
+#define TIF_SINGLESTEP 5 /* reenable singlestep on user return*/
+#define TIF_SSBD 6 /* Speculative store bypass disable */
#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */
#define TIF_SPEC_L1D_FLUSH 10 /* Flush L1D on mm switches (processes) */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
@@ -110,6 +111,7 @@ struct thread_info {
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_NEED_RESCHED_LAZY (1 << TIF_NEED_RESCHED_LAZY)
#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SPEC_IB (1 << TIF_SPEC_IB)
@@ -159,7 +161,7 @@ struct thread_info {
*
* preempt_count needs to be 1 initially, until the scheduler is functional.
*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Walks up the stack frames to make sure that the specified object is
@@ -211,7 +213,7 @@ static inline int arch_within_stack_frames(const void * const stack,
#endif
}
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Thread-synchronous status.
@@ -222,7 +224,7 @@ static inline int arch_within_stack_frames(const void * const stack,
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef CONFIG_COMPAT
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
@@ -240,6 +242,6 @@ static inline int arch_within_stack_frames(const void * const stack,
extern void arch_setup_new_exec(void);
#define arch_setup_new_exec arch_setup_new_exec
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h
index 7365dd4acffb..23baf8c9b34c 100644
--- a/arch/x86/include/asm/timer.h
+++ b/arch/x86/include/asm/timer.h
@@ -6,8 +6,6 @@
#include <linux/interrupt.h>
#include <linux/math64.h>
-#define TICK_SIZE (tick_nsec / 1000)
-
unsigned long long native_sched_clock(void);
extern void recalibrate_cpu_khz(void);
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 580636cdc257..866ea78ba156 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,9 @@
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
+#include <linux/kernel.h>
+#include <vdso/bits.h>
+#include <vdso/page.h>
static inline void tlb_flush(struct mmu_gather *tlb)
{
@@ -20,18 +23,144 @@ static inline void tlb_flush(struct mmu_gather *tlb)
flush_tlb_mm_range(tlb->mm, start, end, stride_shift, tlb->freed_tables);
}
+static inline void invlpg(unsigned long addr)
+{
+ asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+}
+
+enum addr_stride {
+ PTE_STRIDE = 0,
+ PMD_STRIDE = 1
+};
+
+/*
+ * INVLPGB can be targeted by virtual address, PCID, ASID, or any combination
+ * of the three. For example:
+ * - FLAG_VA | FLAG_INCLUDE_GLOBAL: invalidate all TLB entries at the address
+ * - FLAG_PCID: invalidate all TLB entries matching the PCID
+ *
+ * The first is used to invalidate (kernel) mappings at a particular
+ * address across all processes.
+ *
+ * The latter invalidates all TLB entries matching a PCID.
+ */
+#define INVLPGB_FLAG_VA BIT(0)
+#define INVLPGB_FLAG_PCID BIT(1)
+#define INVLPGB_FLAG_ASID BIT(2)
+#define INVLPGB_FLAG_INCLUDE_GLOBAL BIT(3)
+#define INVLPGB_FLAG_FINAL_ONLY BIT(4)
+#define INVLPGB_FLAG_INCLUDE_NESTED BIT(5)
+
+/* The implied mode when all bits are clear: */
+#define INVLPGB_MODE_ALL_NONGLOBALS 0UL
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
/*
- * While x86 architecture in general requires an IPI to perform TLB
- * shootdown, enablement code for several hypervisors overrides
- * .flush_tlb_others hook in pv_mmu_ops and implements it by issuing
- * a hypercall. To keep software pagetable walkers safe in this case we
- * switch to RCU based table free (MMU_GATHER_RCU_TABLE_FREE). See the comment
- * below 'ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE' in include/asm-generic/tlb.h
- * for more details.
+ * INVLPGB does broadcast TLB invalidation across all the CPUs in the system.
+ *
+ * The INVLPGB instruction is weakly ordered, and a batch of invalidations can
+ * be done in a parallel fashion.
+ *
+ * The instruction takes the number of extra pages to invalidate, beyond the
+ * first page, while __invlpgb gets the more human readable number of pages to
+ * invalidate.
+ *
+ * The bits in rax[0:2] determine respectively which components of the address
+ * (VA, PCID, ASID) get compared when flushing. If neither bits are set, *any*
+ * address in the specified range matches.
+ *
+ * Since it is desired to only flush TLB entries for the ASID that is executing
+ * the instruction (a host/hypervisor or a guest), the ASID valid bit should
+ * always be set. On a host/hypervisor, the hardware will use the ASID value
+ * specified in EDX[15:0] (which should be 0). On a guest, the hardware will
+ * use the actual ASID value of the guest.
+ *
+ * TLBSYNC is used to ensure that pending INVLPGB invalidations initiated from
+ * this CPU have completed.
*/
-static inline void __tlb_remove_table(void *table)
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride stride, u8 flags)
+{
+ u64 rax = addr | flags | INVLPGB_FLAG_ASID;
+ u32 ecx = (stride << 31) | (nr_pages - 1);
+ u32 edx = (pcid << 16) | asid;
+
+ /* The low bits in rax are for flags. Verify addr is clean. */
+ VM_WARN_ON_ONCE(addr & ~PAGE_MASK);
+
+ /* INVLPGB; supported in binutils >= 2.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xfe" :: "a" (rax), "c" (ecx), "d" (edx));
+}
+
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags)
+{
+ __invlpgb(asid, pcid, 0, 1, 0, flags);
+}
+
+static inline void __tlbsync(void)
{
- free_page_and_swap_cache(table);
+ /*
+ * TLBSYNC waits for INVLPGB instructions originating on the same CPU
+ * to have completed. Print a warning if the task has been migrated,
+ * and might not be waiting on all the INVLPGBs issued during this TLB
+ * invalidation sequence.
+ */
+ cant_migrate();
+
+ /* TLBSYNC: supported in binutils >= 0.36. */
+ asm volatile(".byte 0x0f, 0x01, 0xff" ::: "memory");
+}
+#else
+/* Some compilers (I'm looking at you clang!) simply can't do DCE */
+static inline void __invlpgb(unsigned long asid, unsigned long pcid,
+ unsigned long addr, u16 nr_pages,
+ enum addr_stride s, u8 flags) { }
+static inline void __invlpgb_all(unsigned long asid, unsigned long pcid, u8 flags) { }
+static inline void __tlbsync(void) { }
+#endif
+
+static inline void invlpgb_flush_user_nr_nosync(unsigned long pcid,
+ unsigned long addr,
+ u16 nr, bool stride)
+{
+ enum addr_stride str = stride ? PMD_STRIDE : PTE_STRIDE;
+ u8 flags = INVLPGB_FLAG_PCID | INVLPGB_FLAG_VA;
+
+ __invlpgb(0, pcid, addr, nr, str, flags);
+}
+
+/* Flush all mappings for a given PCID, not including globals. */
+static inline void invlpgb_flush_single_pcid_nosync(unsigned long pcid)
+{
+ __invlpgb_all(0, pcid, INVLPGB_FLAG_PCID);
}
+/* Flush all mappings, including globals, for all PCIDs. */
+static inline void invlpgb_flush_all(void)
+{
+ /*
+ * TLBSYNC at the end needs to make sure all flushes done on the
+ * current CPU have been executed system-wide. Therefore, make
+ * sure nothing gets migrated in-between but disable preemption
+ * as it is cheaper.
+ */
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_FLAG_INCLUDE_GLOBAL);
+ __tlbsync();
+}
+
+/* Flush addr, including globals, for all PCIDs. */
+static inline void invlpgb_flush_addr_nosync(unsigned long addr, u16 nr)
+{
+ __invlpgb(0, 0, addr, nr, PTE_STRIDE, INVLPGB_FLAG_INCLUDE_GLOBAL);
+}
+
+/* Flush all mappings for all PCIDs except globals. */
+static inline void invlpgb_flush_all_nonglobals(void)
+{
+ guard(preempt)();
+ __invlpgb_all(0, 0, INVLPGB_MODE_ALL_NONGLOBALS);
+ __tlbsync();
+}
#endif /* _ASM_X86_TLB_H */
diff --git a/arch/x86/include/asm/tlbbatch.h b/arch/x86/include/asm/tlbbatch.h
index 1ad56eb3e8a8..80aaf64ff25f 100644
--- a/arch/x86/include/asm/tlbbatch.h
+++ b/arch/x86/include/asm/tlbbatch.h
@@ -10,6 +10,11 @@ struct arch_tlbflush_unmap_batch {
* the PFNs being flushed..
*/
struct cpumask cpumask;
+ /*
+ * Set if pages were unmapped from any MM, even one that does not
+ * have active CPUs in its cpumask.
+ */
+ bool unmapped_pages;
};
#endif /* _ARCH_X86_TLBBATCH_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 25726893c6f4..e9b81876ebe4 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -6,6 +6,7 @@
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
+#include <asm/barrier.h>
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/special_insns.h>
@@ -183,6 +184,9 @@ static inline void cr4_init_shadow(void)
extern unsigned long mmu_cr4_features;
extern u32 *trampoline_cr4_features;
+/* How many pages can be invalidated with one INVLPGB. */
+extern u16 invlpgb_count_max;
+
extern void initialize_tlbstate_and_flush(void);
/*
@@ -222,6 +226,7 @@ struct flush_tlb_info {
unsigned int initiating_cpu;
u8 stride_shift;
u8 freed_tables;
+ u8 trim_cpumask;
};
void flush_tlb_local(void);
@@ -230,6 +235,71 @@ void flush_tlb_one_kernel(unsigned long addr);
void flush_tlb_multi(const struct cpumask *cpumask,
const struct flush_tlb_info *info);
+static inline bool is_dyn_asid(u16 asid)
+{
+ return asid < TLB_NR_DYN_ASIDS;
+}
+
+static inline bool is_global_asid(u16 asid)
+{
+ return !is_dyn_asid(asid);
+}
+
+#ifdef CONFIG_BROADCAST_TLB_FLUSH
+static inline u16 mm_global_asid(struct mm_struct *mm)
+{
+ u16 asid;
+
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return 0;
+
+ asid = smp_load_acquire(&mm->context.global_asid);
+
+ /* mm->context.global_asid is either 0, or a global ASID */
+ VM_WARN_ON_ONCE(asid && is_dyn_asid(asid));
+
+ return asid;
+}
+
+static inline void mm_init_global_asid(struct mm_struct *mm)
+{
+ if (cpu_feature_enabled(X86_FEATURE_INVLPGB)) {
+ mm->context.global_asid = 0;
+ mm->context.asid_transition = false;
+ }
+}
+
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid)
+{
+ /*
+ * Notably flush_tlb_mm_range() -> broadcast_tlb_flush() ->
+ * finish_asid_transition() needs to observe asid_transition = true
+ * once it observes global_asid.
+ */
+ mm->context.asid_transition = true;
+ smp_store_release(&mm->context.global_asid, asid);
+}
+
+static inline void mm_clear_asid_transition(struct mm_struct *mm)
+{
+ WRITE_ONCE(mm->context.asid_transition, false);
+}
+
+static inline bool mm_in_asid_transition(struct mm_struct *mm)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_INVLPGB))
+ return false;
+
+ return mm && READ_ONCE(mm->context.asid_transition);
+}
+#else
+static inline u16 mm_global_asid(struct mm_struct *mm) { return 0; }
+static inline void mm_init_global_asid(struct mm_struct *mm) { }
+static inline void mm_assign_global_asid(struct mm_struct *mm, u16 asid) { }
+static inline void mm_clear_asid_transition(struct mm_struct *mm) { }
+static inline bool mm_in_asid_transition(struct mm_struct *mm) { return false; }
+#endif /* CONFIG_BROADCAST_TLB_FLUSH */
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#endif
@@ -241,7 +311,7 @@ void flush_tlb_multi(const struct cpumask *cpumask,
flush_tlb_mm_range((vma)->vm_mm, start, end, \
((vma)->vm_flags & VM_HUGETLB) \
? huge_page_shift(hstate_vma(vma)) \
- : PAGE_SHIFT, false)
+ : PAGE_SHIFT, true)
extern void flush_tlb_all(void);
extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
@@ -278,11 +348,11 @@ static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
}
static inline void arch_tlbbatch_add_pending(struct arch_tlbflush_unmap_batch *batch,
- struct mm_struct *mm,
- unsigned long uaddr)
+ struct mm_struct *mm, unsigned long start, unsigned long end)
{
inc_mm_tlb_gen(mm);
cpumask_or(&batch->cpumask, &batch->cpumask, mm_cpumask(mm));
+ batch->unmapped_pages = true;
mmu_notifier_arch_invalidate_secondary_tlbs(mm, 0, -1UL);
}
@@ -399,11 +469,10 @@ static inline u64 tlbstate_lam_cr3_mask(void)
return lam << X86_CR3_LAM_U57_BIT;
}
-static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
{
- this_cpu_write(cpu_tlbstate.lam,
- mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
- this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask);
+ this_cpu_write(cpu_tlbstate.lam, lam >> X86_CR3_LAM_U57_BIT);
+ this_cpu_write(tlbstate_untag_mask, untag_mask);
}
#else
@@ -413,7 +482,7 @@ static inline u64 tlbstate_lam_cr3_mask(void)
return 0;
}
-static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
+static inline void cpu_tlbstate_update_lam(unsigned long lam, u64 untag_mask)
{
}
#endif
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index abe3a8f22cbd..6c79ee7c0957 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -114,6 +114,12 @@ enum x86_topology_domains {
TOPO_MAX_DOMAIN,
};
+enum x86_topology_cpu_type {
+ TOPO_CPU_TYPE_PERFORMANCE,
+ TOPO_CPU_TYPE_EFFICIENCY,
+ TOPO_CPU_TYPE_UNKNOWN,
+};
+
struct x86_topology_system {
unsigned int dom_shifts[TOPO_MAX_DOMAIN];
unsigned int dom_size[TOPO_MAX_DOMAIN];
@@ -137,6 +143,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu);
#define topology_logical_package_id(cpu) (cpu_data(cpu).topo.logical_pkg_id)
#define topology_physical_package_id(cpu) (cpu_data(cpu).topo.pkg_id)
#define topology_logical_die_id(cpu) (cpu_data(cpu).topo.logical_die_id)
+#define topology_logical_core_id(cpu) (cpu_data(cpu).topo.logical_core_id)
#define topology_die_id(cpu) (cpu_data(cpu).topo.die_id)
#define topology_core_id(cpu) (cpu_data(cpu).topo.core_id)
#define topology_ppin(cpu) (cpu_data(cpu).ppin)
@@ -149,6 +156,9 @@ extern unsigned int __max_threads_per_core;
extern unsigned int __num_threads_per_package;
extern unsigned int __num_cores_per_package;
+const char *get_topology_cpu_type_name(struct cpuinfo_x86 *c);
+enum x86_topology_cpu_type get_topology_cpu_type(struct cpuinfo_x86 *c);
+
static inline unsigned int topology_max_packages(void)
{
return __max_logical_packages;
@@ -219,11 +229,11 @@ static inline bool topology_is_primary_thread(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_primary_thread_mask);
}
+#define topology_is_primary_thread topology_is_primary_thread
#else /* CONFIG_SMP */
static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; }
static inline int topology_max_smt_threads(void) { return 1; }
-static inline bool topology_is_primary_thread(unsigned int cpu) { return true; }
static inline unsigned int topology_amd_nodes_per_pkg(void) { return 1; }
#endif /* !CONFIG_SMP */
@@ -241,7 +251,7 @@ extern bool x86_topology_update;
#include <asm/percpu.h>
DECLARE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
-extern unsigned int __read_mostly sysctl_sched_itmt_enabled;
+extern bool __read_mostly sysctl_sched_itmt_enabled;
/* Interface to set priority of a cpu */
void sched_set_itmt_core_prio(int prio, int core_cpu);
@@ -254,7 +264,7 @@ void sched_clear_itmt_support(void);
#else /* CONFIG_SCHED_MC_PRIO */
-#define sysctl_sched_itmt_enabled 0
+#define sysctl_sched_itmt_enabled false
static inline void sched_set_itmt_core_prio(int prio, int core_cpu)
{
}
@@ -282,9 +292,22 @@ static inline long arch_scale_freq_capacity(int cpu)
}
#define arch_scale_freq_capacity arch_scale_freq_capacity
+bool arch_enable_hybrid_capacity_scale(void);
+void arch_set_cpu_capacity(int cpu, unsigned long cap, unsigned long max_cap,
+ unsigned long cap_freq, unsigned long base_freq);
+
+unsigned long arch_scale_cpu_capacity(int cpu);
+#define arch_scale_cpu_capacity arch_scale_cpu_capacity
+
extern void arch_set_max_freq_ratio(bool turbo_disabled);
extern void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled);
#else
+static inline bool arch_enable_hybrid_capacity_scale(void) { return false; }
+static inline void arch_set_cpu_capacity(int cpu, unsigned long cap,
+ unsigned long max_cap,
+ unsigned long cap_freq,
+ unsigned long base_freq) { }
+
static inline void arch_set_max_freq_ratio(bool turbo_disabled) { }
static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled) { }
#endif
@@ -292,9 +315,4 @@ static inline void freq_invariance_set_perf_ratio(u64 ratio, bool turbo_disabled
extern void arch_scale_freq_tick(void);
#define arch_scale_freq_tick arch_scale_freq_tick
-#ifdef CONFIG_ACPI_CPPC_LIB
-void init_freq_invariance_cppc(void);
-#define arch_init_invariance_cppc init_freq_invariance_cppc
-#endif
-
#endif /* _ASM_X86_TOPOLOGY_H */
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 1f1deaecd364..869b88061801 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -35,8 +35,6 @@ static inline int get_si_code(unsigned long condition)
return TRAP_BRKPT;
}
-extern int panic_on_unrecovered_nmi;
-
void math_emulate(struct math_emu_info *);
bool fault_in_kernel_space(unsigned long address);
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 405efb3e4996..94408a784c8e 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -28,9 +28,6 @@ static inline cycles_t get_cycles(void)
}
#define get_cycles get_cycles
-extern struct system_counterval_t convert_art_to_tsc(u64 art);
-extern struct system_counterval_t convert_art_ns_to_tsc(u64 art_ns);
-
extern void tsc_early_init(void);
extern void tsc_init(void);
extern void mark_tsc_unstable(char *reason);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 237dc8cdd12b..3a7755c1a441 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -78,10 +78,10 @@ extern int __get_user_bad(void);
int __ret_gu; \
register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \
__chk_user_ptr(ptr); \
- asm volatile("call __" #fn "_%P4" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=a" (__ret_gu), "=r" (__val_gu), \
ASM_CALL_CONSTRAINT \
- : "0" (ptr), "i" (sizeof(*(ptr)))); \
+ : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \
instrument_get_user(__val_gu); \
(x) = (__force __typeof__(*(ptr))) __val_gu; \
__builtin_expect(__ret_gu, 0); \
@@ -177,7 +177,7 @@ extern void __put_user_nocheck_8(void);
__chk_user_ptr(__ptr); \
__ptr_pu = __ptr; \
__val_pu = __x; \
- asm volatile("call __" #fn "_%P[size]" \
+ asm volatile("call __" #fn "_%c[size]" \
: "=c" (__ret_pu), \
ASM_CALL_CONSTRAINT \
: "0" (__ptr_pu), \
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 04789f45ab2b..c52f0133425b 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -12,6 +12,13 @@
#include <asm/cpufeatures.h>
#include <asm/page.h>
#include <asm/percpu.h>
+#include <asm/runtime-const.h>
+
+/*
+ * Virtual variable: there's no actual backing store for this,
+ * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
+ */
+extern unsigned long USER_PTR_MAX;
#ifdef CONFIG_ADDRESS_MASKING
/*
@@ -46,35 +53,44 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
#endif
+#define valid_user_address(x) \
+ ((__force unsigned long)(x) <= runtime_const_ptr(USER_PTR_MAX))
+
/*
- * The virtual address space space is logically divided into a kernel
- * half and a user half. When cast to a signed type, user pointers
- * are positive and kernel pointers are negative.
+ * Masking the user address is an alternative to a conditional
+ * user_access_begin that can avoid the fencing. This only works
+ * for dense accesses starting at the address.
*/
-#define valid_user_address(x) ((__force long)(x) >= 0)
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+ void __user *ret;
+ asm("cmp %1,%0\n\t"
+ "cmova %1,%0"
+ :"=r" (ret)
+ :"r" (runtime_const_ptr(USER_PTR_MAX)),
+ "0" (ptr));
+ return ret;
+}
+#define masked_user_access_begin(x) ({ \
+ __auto_type __masked_ptr = (x); \
+ __masked_ptr = mask_user_address(__masked_ptr); \
+ __uaccess_begin(); __masked_ptr; })
/*
* User pointers can have tag bits on x86-64. This scheme tolerates
* arbitrary values in those bits rather then masking them off.
*
* Enforce two rules:
- * 1. 'ptr' must be in the user half of the address space
+ * 1. 'ptr' must be in the user part of the address space
* 2. 'ptr+size' must not overflow into kernel addresses
*
- * Note that addresses around the sign change are not valid addresses,
- * and will GP-fault even with LAM enabled if the sign bit is set (see
- * "CR3.LAM_SUP" that can narrow the canonicality check if we ever
- * enable it, but not remove it entirely).
- *
- * So the "overflow into kernel addresses" does not imply some sudden
- * exact boundary at the sign bit, and we can allow a lot of slop on the
- * size check.
+ * Note that we always have at least one guard page between the
+ * max user address and the non-canonical gap, allowing us to
+ * ignore small sizes entirely.
*
* In fact, we could probably remove the size check entirely, since
* any kernel accesses will be in increasing address order starting
- * at 'ptr', and even if the end might be in kernel space, we'll
- * hit the GP faults for non-canonical accesses before we ever get
- * there.
+ * at 'ptr'.
*
* That's a separate optimization, for now just handle the small
* constant case.
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 761173ccc33c..6c9e5bdd3916 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -56,6 +56,5 @@
# define __ARCH_WANT_SYS_FORK
# define __ARCH_WANT_SYS_VFORK
# define __ARCH_WANT_SYS_CLONE
-# define __ARCH_WANT_SYS_CLONE3
#endif /* _ASM_X86_UNISTD_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 85cc57cb6539..8f4579c5a6f8 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -5,7 +5,7 @@
#include "orc_types.h"
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro UNWIND_HINT_END_OF_STACK
UNWIND_HINT type=UNWIND_HINT_TYPE_END_OF_STACK
@@ -88,6 +88,6 @@
#define UNWIND_HINT_RESTORE \
UNWIND_HINT(UNWIND_HINT_TYPE_RESTORE, 0, 0, 0)
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_UNWIND_HINTS_H */
diff --git a/arch/x86/include/asm/uv/uv_irq.h b/arch/x86/include/asm/uv/uv_irq.h
index d6b17c760622..1876b5edd142 100644
--- a/arch/x86/include/asm/uv/uv_irq.h
+++ b/arch/x86/include/asm/uv/uv_irq.h
@@ -31,7 +31,6 @@ enum {
UV_AFFINITY_CPU
};
-extern int uv_irq_2_mmr_info(int, unsigned long *, int *);
extern int uv_setup_irq(char *, int, int, unsigned long, int);
extern void uv_teardown_irq(unsigned int);
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h
index d7f6592b74a9..80be0da733df 100644
--- a/arch/x86/include/asm/vdso.h
+++ b/arch/x86/include/asm/vdso.h
@@ -18,12 +18,6 @@ struct vdso_image {
unsigned long extable_base, extable_len;
const void *extable;
- long sym_vvar_start; /* Negative offset to the vvar area */
-
- long sym_vvar_page;
- long sym_pvclock_page;
- long sym_hvclock_page;
- long sym_timens_page;
long sym_VDSO32_NOTE_MASK;
long sym___kernel_sigreturn;
long sym___kernel_rt_sigreturn;
diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h
new file mode 100644
index 000000000000..ff1c11b9fa27
--- /dev/null
+++ b/arch/x86/include/asm/vdso/getrandom.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+#ifndef __ASM_VDSO_GETRANDOM_H
+#define __ASM_VDSO_GETRANDOM_H
+
+#ifndef __ASSEMBLER__
+
+#include <asm/unistd.h>
+
+/**
+ * getrandom_syscall - Invoke the getrandom() syscall.
+ * @buffer: Destination buffer to fill with random bytes.
+ * @len: Size of @buffer in bytes.
+ * @flags: Zero or more GRND_* flags.
+ * Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
+ */
+static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
+{
+ long ret;
+
+ asm ("syscall" : "=a" (ret) :
+ "0" (__NR_getrandom), "D" (buffer), "S" (len), "d" (flags) :
+ "rcx", "r11", "memory");
+
+ return ret;
+}
+
+#endif /* !__ASSEMBLER__ */
+
+#endif /* __ASM_VDSO_GETRANDOM_H */
diff --git a/arch/x86/include/asm/vdso/gettimeofday.h b/arch/x86/include/asm/vdso/gettimeofday.h
index 8e048ca980df..73b2e7ee8f0f 100644
--- a/arch/x86/include/asm/vdso/gettimeofday.h
+++ b/arch/x86/include/asm/vdso/gettimeofday.h
@@ -10,19 +10,15 @@
#ifndef __ASM_VDSO_GETTIMEOFDAY_H
#define __ASM_VDSO_GETTIMEOFDAY_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <uapi/linux/time.h>
#include <asm/vgtod.h>
-#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
#include <asm/pvclock.h>
#include <clocksource/hyperv_timer.h>
-#define __vdso_data (VVAR(_vdso_data))
-#define __timens_vdso_data (TIMENS(_vdso_data))
-
#define VDSO_HAS_TIME 1
#define VDSO_HAS_CLOCK_GETRES 1
@@ -57,14 +53,6 @@ extern struct ms_hyperv_tsc_page hvclock_page
__attribute__((visibility("hidden")));
#endif
-#ifdef CONFIG_TIME_NS
-static __always_inline
-const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
-{
- return __timens_vdso_data;
-}
-#endif
-
#ifndef BUILD_VDSO32
static __always_inline
@@ -248,7 +236,7 @@ static u64 vread_hvclock(void)
#endif
static inline u64 __arch_get_hw_counter(s32 clock_mode,
- const struct vdso_data *vd)
+ const struct vdso_time_data *vd)
{
if (likely(clock_mode == VDSO_CLOCKMODE_TSC))
return (u64)rdtsc_ordered() & S64_MAX;
@@ -273,12 +261,7 @@ static inline u64 __arch_get_hw_counter(s32 clock_mode,
return U64_MAX;
}
-static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
-{
- return __vdso_data;
-}
-
-static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd)
+static inline bool arch_vdso_clocksource_ok(const struct vdso_clock *vc)
{
return true;
}
@@ -300,7 +283,7 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
#define vdso_cycles_ok arch_vdso_cycles_ok
/*
- * x86 specific delta calculation.
+ * x86 specific calculation of nanoseconds for the current cycle count
*
* The regular implementation assumes that clocksource reads are globally
* monotonic. The TSC can be slightly off across sockets which can cause
@@ -308,8 +291,8 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
* jump.
*
* Therefore it needs to be verified that @cycles are greater than
- * @last. If not then use @last, which is the base time of the current
- * conversion period.
+ * @vd->cycles_last. If not then use @vd->cycles_last, which is the base
+ * time of the current conversion period.
*
* This variant also uses a custom mask because while the clocksource mask of
* all the VDSO capable clocksources on x86 is U64_MAX, the above code uses
@@ -317,26 +300,37 @@ static inline bool arch_vdso_cycles_ok(u64 cycles)
* declares everything with the MSB/Sign-bit set as invalid. Therefore the
* effective mask is S64_MAX.
*/
-static __always_inline
-u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
+static __always_inline u64 vdso_calc_ns(const struct vdso_clock *vc, u64 cycles, u64 base)
{
- /*
- * Due to the MSB/Sign-bit being used as invalid marker (see
- * arch_vdso_cycles_valid() above), the effective mask is S64_MAX.
- */
- u64 delta = (cycles - last) & S64_MAX;
+ u64 delta = cycles - vc->cycle_last;
/*
- * Due to the above mentioned TSC wobbles, filter out negative motion.
- * Per the above masking, the effective sign bit is now bit 62.
+ * Negative motion and deltas which can cause multiplication
+ * overflow require special treatment. This check covers both as
+ * negative motion is guaranteed to be greater than @vc::max_cycles
+ * due to unsigned comparison.
+ *
+ * Due to the MSB/Sign-bit being used as invalid marker (see
+ * arch_vdso_cycles_ok() above), the effective mask is S64_MAX, but that
+ * case is also unlikely and will also take the unlikely path here.
*/
- if (unlikely(delta & (1ULL << 62)))
- return 0;
+ if (unlikely(delta > vc->max_cycles)) {
+ /*
+ * Due to the above mentioned TSC wobbles, filter out
+ * negative motion. Per the above masking, the effective
+ * sign bit is now bit 62.
+ */
+ if (delta & (1ULL << 62))
+ return base >> vc->shift;
+
+ /* Handle multiplication overflow gracefully */
+ return mul_u64_u32_add_u64_shr(delta & S64_MAX, vc->mult, base, vc->shift);
+ }
- return delta * mult;
+ return ((delta * vc->mult) + base) >> vc->shift;
}
-#define vdso_calc_delta vdso_calc_delta
+#define vdso_calc_ns vdso_calc_ns
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_GETTIMEOFDAY_H */
diff --git a/arch/x86/include/asm/vdso/processor.h b/arch/x86/include/asm/vdso/processor.h
index 2cbce97d29ea..c9b2ba7a9ec4 100644
--- a/arch/x86/include/asm/vdso/processor.h
+++ b/arch/x86/include/asm/vdso/processor.h
@@ -5,7 +5,7 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
static __always_inline void rep_nop(void)
@@ -22,6 +22,6 @@ struct getcpu_cache;
notrace long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused);
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h
index be199a9b2676..4aa311a923f2 100644
--- a/arch/x86/include/asm/vdso/vsyscall.h
+++ b/arch/x86/include/asm/vdso/vsyscall.h
@@ -2,28 +2,21 @@
#ifndef __ASM_VDSO_VSYSCALL_H
#define __ASM_VDSO_VSYSCALL_H
-#ifndef __ASSEMBLY__
+#define __VDSO_PAGES 6
+
+#define VDSO_NR_VCLOCK_PAGES 2
+#define VDSO_VCLOCK_PAGES_START(_b) ((_b) + (__VDSO_PAGES - VDSO_NR_VCLOCK_PAGES) * PAGE_SIZE)
+#define VDSO_PAGE_PVCLOCK_OFFSET 0
+#define VDSO_PAGE_HVCLOCK_OFFSET 1
+
+#ifndef __ASSEMBLER__
-#include <linux/hrtimer.h>
-#include <linux/timekeeper_internal.h>
#include <vdso/datapage.h>
#include <asm/vgtod.h>
-#include <asm/vvar.h>
-
-DEFINE_VVAR(struct vdso_data, _vdso_data);
-/*
- * Update the vDSO data page to keep in sync with kernel timekeeping.
- */
-static __always_inline
-struct vdso_data *__x86_get_k_vdso_data(void)
-{
- return _vdso_data;
-}
-#define __arch_get_k_vdso_data __x86_get_k_vdso_data
/* The asm-generic header needs to be included after the definitions above */
#include <asm-generic/vdso/vsyscall.h>
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* __ASM_VDSO_VSYSCALL_H */
diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h
index 75884d2cdec3..5d471253c755 100644
--- a/arch/x86/include/asm/vermagic.h
+++ b/arch/x86/include/asm/vermagic.h
@@ -15,8 +15,6 @@
#define MODULE_PROC_FAMILY "586TSC "
#elif defined CONFIG_M586MMX
#define MODULE_PROC_FAMILY "586MMX "
-#elif defined CONFIG_MCORE2
-#define MODULE_PROC_FAMILY "CORE2 "
#elif defined CONFIG_MATOM
#define MODULE_PROC_FAMILY "ATOM "
#elif defined CONFIG_M686
@@ -33,8 +31,6 @@
#define MODULE_PROC_FAMILY "K6 "
#elif defined CONFIG_MK7
#define MODULE_PROC_FAMILY "K7 "
-#elif defined CONFIG_MK8
-#define MODULE_PROC_FAMILY "K8 "
#elif defined CONFIG_MELAN
#define MODULE_PROC_FAMILY "ELAN "
#elif defined CONFIG_MCRUSOE
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 7aa38b2ad8a9..a0ce291abcae 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -14,11 +14,6 @@
#include <uapi/linux/time.h>
-#ifdef BUILD_VDSO32_64
-typedef u64 gtod_long_t;
-#else
-typedef unsigned long gtod_long_t;
-#endif
#endif /* CONFIG_GENERIC_GETTIMEOFDAY */
#endif /* _ASM_X86_VGTOD_H */
diff --git a/arch/x86/include/asm/video.h b/arch/x86/include/asm/video.h
new file mode 100644
index 000000000000..0950c9535fae
--- /dev/null
+++ b/arch/x86/include/asm/video.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_VIDEO_H
+#define _ASM_X86_VIDEO_H
+
+#include <linux/types.h>
+
+#include <asm/page.h>
+
+struct device;
+
+pgprot_t pgprot_framebuffer(pgprot_t prot,
+ unsigned long vm_start, unsigned long vm_end,
+ unsigned long offset);
+#define pgprot_framebuffer pgprot_framebuffer
+
+bool video_is_primary_device(struct device *dev);
+#define video_is_primary_device video_is_primary_device
+
+#include <asm-generic/video.h>
+
+#endif /* _ASM_X86_VIDEO_H */
diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h
index 9e8ac5073ecb..62ee19909903 100644
--- a/arch/x86/include/asm/vm86.h
+++ b/arch/x86/include/asm/vm86.h
@@ -84,7 +84,7 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c)
static inline void save_v86_state(struct kernel_vm86_regs *a, int b) { }
-#define free_vm86(t) do { } while(0)
+#define free_vm86(task) do { (void)(task); } while(0)
#endif /* CONFIG_VM86 */
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h
index ac9fc51e2b18..c9cf43d5ef23 100644
--- a/arch/x86/include/asm/vmware.h
+++ b/arch/x86/include/asm/vmware.h
@@ -7,51 +7,321 @@
#include <linux/stringify.h>
/*
- * The hypercall definitions differ in the low word of the %edx argument
- * in the following way: the old port base interface uses the port
- * number to distinguish between high- and low bandwidth versions.
+ * VMware hypercall ABI.
+ *
+ * - Low bandwidth (LB) hypercalls (I/O port based, vmcall and vmmcall)
+ * have up to 6 input and 6 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg2 - Hypercall command
+ * arg3 bits [15:0] - Port number, LB and direction flags
+ *
+ * - Low bandwidth TDX hypercalls (x86_64 only) are similar to LB
+ * hypercalls. They also have up to 6 input and 6 output on registers
+ * arguments, with different argument to register mapping:
+ * %r12 (arg0), %rbx (arg1), %r13 (arg2), %rdx (arg3),
+ * %rsi (arg4), %rdi (arg5).
+ *
+ * - High bandwidth (HB) hypercalls are I/O port based only. They have
+ * up to 7 input and 7 output arguments passed and returned using
+ * registers: %eax (arg0), %ebx (arg1), %ecx (arg2), %edx (arg3),
+ * %esi (arg4), %edi (arg5), %ebp (arg6).
+ * The following input arguments must be initialized by the caller:
+ * arg0 - VMWARE_HYPERVISOR_MAGIC
+ * arg1 - Hypercall command
+ * arg3 bits [15:0] - Port number, HB and direction flags
+ *
+ * For compatibility purposes, x86_64 systems use only lower 32 bits
+ * for input and output arguments.
+ *
+ * The hypercall definitions differ in the low word of the %edx (arg3)
+ * in the following way: the old I/O port based interface uses the port
+ * number to distinguish between high- and low bandwidth versions, and
+ * uses IN/OUT instructions to define transfer direction.
*
* The new vmcall interface instead uses a set of flags to select
* bandwidth mode and transfer direction. The flags should be loaded
- * into %dx by any user and are automatically replaced by the port
- * number if the VMWARE_HYPERVISOR_PORT method is used.
- *
- * In short, new driver code should strictly use the new definition of
- * %dx content.
+ * into arg3 by any user and are automatically replaced by the port
+ * number if the I/O port method is used.
*/
-/* Old port-based version */
-#define VMWARE_HYPERVISOR_PORT 0x5658
-#define VMWARE_HYPERVISOR_PORT_HB 0x5659
+#define VMWARE_HYPERVISOR_HB BIT(0)
+#define VMWARE_HYPERVISOR_OUT BIT(1)
-/* Current vmcall / vmmcall version */
-#define VMWARE_HYPERVISOR_HB BIT(0)
-#define VMWARE_HYPERVISOR_OUT BIT(1)
+#define VMWARE_HYPERVISOR_PORT 0x5658
+#define VMWARE_HYPERVISOR_PORT_HB (VMWARE_HYPERVISOR_PORT | \
+ VMWARE_HYPERVISOR_HB)
-/* The low bandwidth call. The low word of edx is presumed clear. */
-#define VMWARE_HYPERCALL \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT) ", %%dx; " \
- "inl (%%dx), %%eax", \
- "vmcall", X86_FEATURE_VMCALL, \
- "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#define VMWARE_HYPERVISOR_MAGIC 0x564d5868U
+#define VMWARE_CMD_GETVERSION 10
+#define VMWARE_CMD_GETHZ 45
+#define VMWARE_CMD_GETVCPU_INFO 68
+#define VMWARE_CMD_STEALCLOCK 91
/*
- * The high bandwidth out call. The low word of edx is presumed to have the
- * HB and OUT bits set.
+ * Hypercall command mask:
+ * bits [6:0] command, range [0, 127]
+ * bits [19:16] sub-command, range [0, 15]
*/
-#define VMWARE_HYPERCALL_HB_OUT \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
- "rep outsb", \
- "vmcall", X86_FEATURE_VMCALL, \
- "vmmcall", X86_FEATURE_VMW_VMMCALL)
+#define VMWARE_CMD_MASK 0xf007fU
+
+#define CPUID_VMWARE_FEATURES_ECX_VMMCALL BIT(0)
+#define CPUID_VMWARE_FEATURES_ECX_VMCALL BIT(1)
+
+extern unsigned long vmware_hypercall_slow(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
+
+#define VMWARE_TDX_VENDOR_LEAF 0x1af7e4909ULL
+#define VMWARE_TDX_HCALL_FUNC 1
+
+extern unsigned long vmware_tdx_hypercall(unsigned long cmd,
+ unsigned long in1, unsigned long in3,
+ unsigned long in4, unsigned long in5,
+ u32 *out1, u32 *out2, u32 *out3,
+ u32 *out4, u32 *out5);
/*
- * The high bandwidth in call. The low word of edx is presumed to have the
- * HB bit set.
+ * The low bandwidth call. The low word of %edx is presumed to have OUT bit
+ * set. The high word of %edx may contain input data from the caller.
*/
-#define VMWARE_HYPERCALL_HB_IN \
- ALTERNATIVE_2("movw $" __stringify(VMWARE_HYPERVISOR_PORT_HB) ", %%dx; " \
- "rep insb", \
- "vmcall", X86_FEATURE_VMCALL, \
+#define VMWARE_HYPERCALL \
+ ALTERNATIVE_2("movw %[port], %%dx\n\t" \
+ "inl (%%dx), %%eax", \
+ "vmcall", X86_FEATURE_VMCALL, \
"vmmcall", X86_FEATURE_VMW_VMMCALL)
+
+static inline
+unsigned long vmware_hypercall1(unsigned long cmd, unsigned long in1)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ NULL, NULL, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall3(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall4(unsigned long cmd, unsigned long in1,
+ u32 *out1, u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, 0, 0, 0,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (0)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall5(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out2)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ NULL, out2, NULL, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall6(unsigned long cmd, unsigned long in1,
+ unsigned long in3, u32 *out2,
+ u32 *out3, u32 *out4, u32 *out5)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, 0, 0,
+ NULL, out2, out3, out4, out5);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=c" (*out2), "=d" (*out3), "=S" (*out4),
+ "=D" (*out5)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall7(unsigned long cmd, unsigned long in1,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, u32 *out1,
+ u32 *out2, u32 *out3)
+{
+ unsigned long out0;
+
+ if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST))
+ return vmware_tdx_hypercall(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ if (unlikely(!alternatives_patched) && !__is_defined(MODULE))
+ return vmware_hypercall_slow(cmd, in1, in3, in4, in5,
+ out1, out2, out3, NULL, NULL);
+
+ asm_inline volatile (VMWARE_HYPERCALL
+ : "=a" (out0), "=b" (*out1), "=c" (*out2), "=d" (*out3)
+ : [port] "i" (VMWARE_HYPERVISOR_PORT),
+ "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (in1),
+ "c" (cmd),
+ "d" (in3),
+ "S" (in4),
+ "D" (in5)
+ : "cc", "memory");
+ return out0;
+}
+
+#ifdef CONFIG_X86_64
+#define VMW_BP_CONSTRAINT "r"
+#else
+#define VMW_BP_CONSTRAINT "m"
+#endif
+
+/*
+ * High bandwidth calls are not supported on encrypted memory guests.
+ * The caller should check cc_platform_has(CC_ATTR_MEM_ENCRYPT) and use
+ * low bandwidth hypercall if memory encryption is set.
+ * This assumption simplifies HB hypercall implementation to just I/O port
+ * based approach without alternative patching.
+ */
+static inline
+unsigned long vmware_hypercall_hb_out(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep outsb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+
+static inline
+unsigned long vmware_hypercall_hb_in(unsigned long cmd, unsigned long in2,
+ unsigned long in3, unsigned long in4,
+ unsigned long in5, unsigned long in6,
+ u32 *out1)
+{
+ unsigned long out0;
+
+ asm_inline volatile (
+ UNWIND_HINT_SAVE
+ "push %%" _ASM_BP "\n\t"
+ UNWIND_HINT_UNDEFINED
+ "mov %[in6], %%" _ASM_BP "\n\t"
+ "rep insb\n\t"
+ "pop %%" _ASM_BP "\n\t"
+ UNWIND_HINT_RESTORE
+ : "=a" (out0), "=b" (*out1)
+ : "a" (VMWARE_HYPERVISOR_MAGIC),
+ "b" (cmd),
+ "c" (in2),
+ "d" (in3 | VMWARE_HYPERVISOR_PORT_HB),
+ "S" (in4),
+ "D" (in5),
+ [in6] VMW_BP_CONSTRAINT (in6)
+ : "cc", "memory");
+ return out0;
+}
+#undef VMW_BP_CONSTRAINT
+#undef VMWARE_HYPERCALL
+
#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 4dba17363008..8707361b24da 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -71,6 +71,7 @@
#define SECONDARY_EXEC_ENCLS_EXITING VMCS_CONTROL_BIT(ENCLS_EXITING)
#define SECONDARY_EXEC_RDSEED_EXITING VMCS_CONTROL_BIT(RDSEED_EXITING)
#define SECONDARY_EXEC_ENABLE_PML VMCS_CONTROL_BIT(PAGE_MOD_LOGGING)
+#define SECONDARY_EXEC_EPT_VIOLATION_VE VMCS_CONTROL_BIT(EPT_VIOLATION_VE)
#define SECONDARY_EXEC_PT_CONCEAL_VMX VMCS_CONTROL_BIT(PT_CONCEAL_VMX)
#define SECONDARY_EXEC_ENABLE_XSAVES VMCS_CONTROL_BIT(XSAVES)
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC VMCS_CONTROL_BIT(MODE_BASED_EPT_EXEC)
@@ -121,19 +122,17 @@
#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
-#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
-#define VMX_MISC_SAVE_EFER_LMA 0x00000020
-#define VMX_MISC_ACTIVITY_HLT 0x00000040
-#define VMX_MISC_ACTIVITY_WAIT_SIPI 0x00000100
-#define VMX_MISC_ZERO_LEN_INS 0x40000000
-#define VMX_MISC_MSR_LIST_MULTIPLIER 512
-
/* VMFUNC functions */
#define VMFUNC_CONTROL_BIT(x) BIT((VMX_FEATURE_##x & 0x1f) - 28)
#define VMX_VMFUNC_EPTP_SWITCHING VMFUNC_CONTROL_BIT(EPTP_SWITCHING)
#define VMFUNC_EPTP_ENTRIES 512
+#define VMX_BASIC_32BIT_PHYS_ADDR_ONLY BIT_ULL(48)
+#define VMX_BASIC_DUAL_MONITOR_TREATMENT BIT_ULL(49)
+#define VMX_BASIC_INOUT BIT_ULL(54)
+#define VMX_BASIC_TRUE_CTLS BIT_ULL(55)
+
static inline u32 vmx_basic_vmcs_revision_id(u64 vmx_basic)
{
return vmx_basic & GENMASK_ULL(30, 0);
@@ -144,9 +143,30 @@ static inline u32 vmx_basic_vmcs_size(u64 vmx_basic)
return (vmx_basic & GENMASK_ULL(44, 32)) >> 32;
}
+static inline u32 vmx_basic_vmcs_mem_type(u64 vmx_basic)
+{
+ return (vmx_basic & GENMASK_ULL(53, 50)) >> 50;
+}
+
+static inline u64 vmx_basic_encode_vmcs_info(u32 revision, u16 size, u8 memtype)
+{
+ return revision | ((u64)size << 32) | ((u64)memtype << 50);
+}
+
+#define VMX_MISC_SAVE_EFER_LMA BIT_ULL(5)
+#define VMX_MISC_ACTIVITY_HLT BIT_ULL(6)
+#define VMX_MISC_ACTIVITY_SHUTDOWN BIT_ULL(7)
+#define VMX_MISC_ACTIVITY_WAIT_SIPI BIT_ULL(8)
+#define VMX_MISC_INTEL_PT BIT_ULL(14)
+#define VMX_MISC_RDMSR_IN_SMM BIT_ULL(15)
+#define VMX_MISC_VMXOFF_BLOCK_SMI BIT_ULL(28)
+#define VMX_MISC_VMWRITE_SHADOW_RO_FIELDS BIT_ULL(29)
+#define VMX_MISC_ZERO_LEN_INS BIT_ULL(30)
+#define VMX_MISC_MSR_LIST_MULTIPLIER 512
+
static inline int vmx_misc_preemption_timer_rate(u64 vmx_misc)
{
- return vmx_misc & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
+ return vmx_misc & GENMASK_ULL(4, 0);
}
static inline int vmx_misc_cr3_count(u64 vmx_misc)
@@ -226,6 +246,8 @@ enum vmcs_field {
VMREAD_BITMAP_HIGH = 0x00002027,
VMWRITE_BITMAP = 0x00002028,
VMWRITE_BITMAP_HIGH = 0x00002029,
+ VE_INFORMATION_ADDRESS = 0x0000202A,
+ VE_INFORMATION_ADDRESS_HIGH = 0x0000202B,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
ENCLS_EXITING_BITMAP = 0x0000202E,
@@ -505,15 +527,17 @@ enum vmcs_field {
#define VMX_EPTP_PWL_4 0x18ull
#define VMX_EPTP_PWL_5 0x20ull
#define VMX_EPTP_AD_ENABLE_BIT (1ull << 6)
+/* The EPTP memtype is encoded in bits 2:0, i.e. doesn't need to be shifted. */
#define VMX_EPTP_MT_MASK 0x7ull
-#define VMX_EPTP_MT_WB 0x6ull
-#define VMX_EPTP_MT_UC 0x0ull
+#define VMX_EPTP_MT_WB X86_MEMTYPE_WB
+#define VMX_EPTP_MT_UC X86_MEMTYPE_UC
#define VMX_EPT_READABLE_MASK 0x1ull
#define VMX_EPT_WRITABLE_MASK 0x2ull
#define VMX_EPT_EXECUTABLE_MASK 0x4ull
#define VMX_EPT_IPAT_BIT (1ull << 6)
#define VMX_EPT_ACCESS_BIT (1ull << 8)
#define VMX_EPT_DIRTY_BIT (1ull << 9)
+#define VMX_EPT_SUPPRESS_VE_BIT (1ull << 63)
#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \
VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK)
@@ -556,18 +580,22 @@ enum vm_entry_failure_code {
/*
* Exit Qualifications for EPT Violations
*/
-#define EPT_VIOLATION_ACC_READ_BIT 0
-#define EPT_VIOLATION_ACC_WRITE_BIT 1
-#define EPT_VIOLATION_ACC_INSTR_BIT 2
-#define EPT_VIOLATION_RWX_SHIFT 3
-#define EPT_VIOLATION_GVA_IS_VALID_BIT 7
-#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
-#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
-#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
-#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
-#define EPT_VIOLATION_RWX_MASK (VMX_EPT_RWX_MASK << EPT_VIOLATION_RWX_SHIFT)
-#define EPT_VIOLATION_GVA_IS_VALID (1 << EPT_VIOLATION_GVA_IS_VALID_BIT)
-#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
+#define EPT_VIOLATION_ACC_READ BIT(0)
+#define EPT_VIOLATION_ACC_WRITE BIT(1)
+#define EPT_VIOLATION_ACC_INSTR BIT(2)
+#define EPT_VIOLATION_PROT_READ BIT(3)
+#define EPT_VIOLATION_PROT_WRITE BIT(4)
+#define EPT_VIOLATION_PROT_EXEC BIT(5)
+#define EPT_VIOLATION_PROT_MASK (EPT_VIOLATION_PROT_READ | \
+ EPT_VIOLATION_PROT_WRITE | \
+ EPT_VIOLATION_PROT_EXEC)
+#define EPT_VIOLATION_GVA_IS_VALID BIT(7)
+#define EPT_VIOLATION_GVA_TRANSLATED BIT(8)
+
+#define EPT_VIOLATION_RWX_TO_PROT(__epte) (((__epte) & VMX_EPT_RWX_MASK) << 3)
+
+static_assert(EPT_VIOLATION_RWX_TO_PROT(VMX_EPT_RWX_MASK) ==
+ (EPT_VIOLATION_PROT_READ | EPT_VIOLATION_PROT_WRITE | EPT_VIOLATION_PROT_EXEC));
/*
* Exit Qualifications for NOTIFY VM EXIT
@@ -630,4 +658,13 @@ enum vmx_l1d_flush_state {
extern enum vmx_l1d_flush_state l1tf_vmx_mitigation;
+struct vmx_ve_information {
+ u32 exit_reason;
+ u32 delivery;
+ u64 exit_qualification;
+ u64 guest_linear_address;
+ u64 guest_physical_address;
+ u16 eptp_index;
+};
+
#endif
diff --git a/arch/x86/include/asm/vmxfeatures.h b/arch/x86/include/asm/vmxfeatures.h
index 266daf5b5b84..09b1d7e607c1 100644
--- a/arch/x86/include/asm/vmxfeatures.h
+++ b/arch/x86/include/asm/vmxfeatures.h
@@ -9,85 +9,85 @@
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature bit
+ * is not displayed in /proc/cpuinfo at all.
*/
/* Pin-Based VM-Execution Controls, EPT/VPID, APIC and VM-Functions, word 0 */
-#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* "" VM-Exit on vectored interrupts */
-#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* "" VM-Exit on NMIs */
+#define VMX_FEATURE_INTR_EXITING ( 0*32+ 0) /* VM-Exit on vectored interrupts */
+#define VMX_FEATURE_NMI_EXITING ( 0*32+ 3) /* VM-Exit on NMIs */
#define VMX_FEATURE_VIRTUAL_NMIS ( 0*32+ 5) /* "vnmi" NMI virtualization */
-#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* VMX Preemption Timer */
-#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* Posted Interrupts */
+#define VMX_FEATURE_PREEMPTION_TIMER ( 0*32+ 6) /* "preemption_timer" VMX Preemption Timer */
+#define VMX_FEATURE_POSTED_INTR ( 0*32+ 7) /* "posted_intr" Posted Interrupts */
/* EPT/VPID features, scattered to bits 16-23 */
-#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* INVVPID is supported */
+#define VMX_FEATURE_INVVPID ( 0*32+ 16) /* "invvpid" INVVPID is supported */
#define VMX_FEATURE_EPT_EXECUTE_ONLY ( 0*32+ 17) /* "ept_x_only" EPT entries can be execute only */
-#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* EPT Accessed/Dirty bits */
-#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* 1GB EPT pages */
-#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* 5-level EPT paging */
+#define VMX_FEATURE_EPT_AD ( 0*32+ 18) /* "ept_ad" EPT Accessed/Dirty bits */
+#define VMX_FEATURE_EPT_1GB ( 0*32+ 19) /* "ept_1gb" 1GB EPT pages */
+#define VMX_FEATURE_EPT_5LEVEL ( 0*32+ 20) /* "ept_5level" 5-level EPT paging */
/* Aggregated APIC features 24-27 */
-#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* TPR shadow + virt APIC */
-#define VMX_FEATURE_APICV ( 0*32+ 25) /* TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
+#define VMX_FEATURE_FLEXPRIORITY ( 0*32+ 24) /* "flexpriority" TPR shadow + virt APIC */
+#define VMX_FEATURE_APICV ( 0*32+ 25) /* "apicv" TPR shadow + APIC reg virt + virt intr delivery + posted interrupts */
/* VM-Functions, shifted to bits 28-31 */
-#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* EPTP switching (in guest) */
+#define VMX_FEATURE_EPTP_SWITCHING ( 0*32+ 28) /* "eptp_switching" EPTP switching (in guest) */
/* Primary Processor-Based VM-Execution Controls, word 1 */
-#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* "" VM-Exit if INTRs are unblocked in guest */
+#define VMX_FEATURE_INTR_WINDOW_EXITING ( 1*32+ 2) /* VM-Exit if INTRs are unblocked in guest */
#define VMX_FEATURE_USE_TSC_OFFSETTING ( 1*32+ 3) /* "tsc_offset" Offset hardware TSC when read in guest */
-#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* "" VM-Exit on HLT */
-#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* "" VM-Exit on INVLPG */
-#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* "" VM-Exit on MWAIT */
-#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* "" VM-Exit on RDPMC */
-#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* "" VM-Exit on RDTSC */
-#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* "" VM-Exit on writes to CR3 */
-#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* "" VM-Exit on reads from CR3 */
-#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* "" Enable Tertiary VM-Execution Controls */
-#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* "" VM-Exit on writes to CR8 */
-#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* "" VM-Exit on reads from CR8 */
+#define VMX_FEATURE_HLT_EXITING ( 1*32+ 7) /* VM-Exit on HLT */
+#define VMX_FEATURE_INVLPG_EXITING ( 1*32+ 9) /* VM-Exit on INVLPG */
+#define VMX_FEATURE_MWAIT_EXITING ( 1*32+ 10) /* VM-Exit on MWAIT */
+#define VMX_FEATURE_RDPMC_EXITING ( 1*32+ 11) /* VM-Exit on RDPMC */
+#define VMX_FEATURE_RDTSC_EXITING ( 1*32+ 12) /* VM-Exit on RDTSC */
+#define VMX_FEATURE_CR3_LOAD_EXITING ( 1*32+ 15) /* VM-Exit on writes to CR3 */
+#define VMX_FEATURE_CR3_STORE_EXITING ( 1*32+ 16) /* VM-Exit on reads from CR3 */
+#define VMX_FEATURE_TERTIARY_CONTROLS ( 1*32+ 17) /* Enable Tertiary VM-Execution Controls */
+#define VMX_FEATURE_CR8_LOAD_EXITING ( 1*32+ 19) /* VM-Exit on writes to CR8 */
+#define VMX_FEATURE_CR8_STORE_EXITING ( 1*32+ 20) /* VM-Exit on reads from CR8 */
#define VMX_FEATURE_VIRTUAL_TPR ( 1*32+ 21) /* "vtpr" TPR virtualization, a.k.a. TPR shadow */
-#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* "" VM-Exit if NMIs are unblocked in guest */
-#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* "" VM-Exit on accesses to debug registers */
-#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* "" VM-Exit on *all* IN{S} and OUT{S}*/
-#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* "" VM-Exit based on I/O port */
+#define VMX_FEATURE_NMI_WINDOW_EXITING ( 1*32+ 22) /* VM-Exit if NMIs are unblocked in guest */
+#define VMX_FEATURE_MOV_DR_EXITING ( 1*32+ 23) /* VM-Exit on accesses to debug registers */
+#define VMX_FEATURE_UNCOND_IO_EXITING ( 1*32+ 24) /* VM-Exit on *all* IN{S} and OUT{S}*/
+#define VMX_FEATURE_USE_IO_BITMAPS ( 1*32+ 25) /* VM-Exit based on I/O port */
#define VMX_FEATURE_MONITOR_TRAP_FLAG ( 1*32+ 27) /* "mtf" VMX single-step VM-Exits */
-#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* "" VM-Exit based on MSR index */
-#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* "" VM-Exit on MONITOR (MWAIT's accomplice) */
-#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* "" VM-Exit on PAUSE (unconditionally) */
-#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* "" Enable Secondary VM-Execution Controls */
+#define VMX_FEATURE_USE_MSR_BITMAPS ( 1*32+ 28) /* VM-Exit based on MSR index */
+#define VMX_FEATURE_MONITOR_EXITING ( 1*32+ 29) /* VM-Exit on MONITOR (MWAIT's accomplice) */
+#define VMX_FEATURE_PAUSE_EXITING ( 1*32+ 30) /* VM-Exit on PAUSE (unconditionally) */
+#define VMX_FEATURE_SEC_CONTROLS ( 1*32+ 31) /* Enable Secondary VM-Execution Controls */
/* Secondary Processor-Based VM-Execution Controls, word 2 */
#define VMX_FEATURE_VIRT_APIC_ACCESSES ( 2*32+ 0) /* "vapic" Virtualize memory mapped APIC accesses */
-#define VMX_FEATURE_EPT ( 2*32+ 1) /* Extended Page Tables, a.k.a. Two-Dimensional Paging */
-#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* "" VM-Exit on {S,L}*DT instructions */
-#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* "" Enable RDTSCP in guest */
-#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* "" Virtualize X2APIC for the guest */
-#define VMX_FEATURE_VPID ( 2*32+ 5) /* Virtual Processor ID (TLB ASID modifier) */
-#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* "" VM-Exit on WBINVD */
-#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* Allow Big Real Mode and other "invalid" states */
+#define VMX_FEATURE_EPT ( 2*32+ 1) /* "ept" Extended Page Tables, a.k.a. Two-Dimensional Paging */
+#define VMX_FEATURE_DESC_EXITING ( 2*32+ 2) /* VM-Exit on {S,L}*DT instructions */
+#define VMX_FEATURE_RDTSCP ( 2*32+ 3) /* Enable RDTSCP in guest */
+#define VMX_FEATURE_VIRTUAL_X2APIC ( 2*32+ 4) /* Virtualize X2APIC for the guest */
+#define VMX_FEATURE_VPID ( 2*32+ 5) /* "vpid" Virtual Processor ID (TLB ASID modifier) */
+#define VMX_FEATURE_WBINVD_EXITING ( 2*32+ 6) /* VM-Exit on WBINVD */
+#define VMX_FEATURE_UNRESTRICTED_GUEST ( 2*32+ 7) /* "unrestricted_guest" Allow Big Real Mode and other "invalid" states */
#define VMX_FEATURE_APIC_REGISTER_VIRT ( 2*32+ 8) /* "vapic_reg" Hardware emulation of reads to the virtual-APIC */
#define VMX_FEATURE_VIRT_INTR_DELIVERY ( 2*32+ 9) /* "vid" Evaluation and delivery of pending virtual interrupts */
#define VMX_FEATURE_PAUSE_LOOP_EXITING ( 2*32+ 10) /* "ple" Conditionally VM-Exit on PAUSE at CPL0 */
-#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* "" VM-Exit on RDRAND*/
-#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* "" Enable INVPCID in guest */
-#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* "" Enable VM-Functions (leaf dependent) */
-#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* VMREAD/VMWRITE in guest can access shadow VMCS */
-#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* "" VM-Exit on ENCLS (leaf dependent) */
-#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* "" VM-Exit on RDSEED */
+#define VMX_FEATURE_RDRAND_EXITING ( 2*32+ 11) /* VM-Exit on RDRAND*/
+#define VMX_FEATURE_INVPCID ( 2*32+ 12) /* Enable INVPCID in guest */
+#define VMX_FEATURE_VMFUNC ( 2*32+ 13) /* Enable VM-Functions (leaf dependent) */
+#define VMX_FEATURE_SHADOW_VMCS ( 2*32+ 14) /* "shadow_vmcs" VMREAD/VMWRITE in guest can access shadow VMCS */
+#define VMX_FEATURE_ENCLS_EXITING ( 2*32+ 15) /* VM-Exit on ENCLS (leaf dependent) */
+#define VMX_FEATURE_RDSEED_EXITING ( 2*32+ 16) /* VM-Exit on RDSEED */
#define VMX_FEATURE_PAGE_MOD_LOGGING ( 2*32+ 17) /* "pml" Log dirty pages into buffer */
-#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "" Conditionally reflect EPT violations as #VE exceptions */
-#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* "" Suppress VMX indicators in Processor Trace */
-#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* "" Enable XSAVES and XRSTORS in guest */
+#define VMX_FEATURE_EPT_VIOLATION_VE ( 2*32+ 18) /* "ept_violation_ve" Conditionally reflect EPT violations as #VE exceptions */
+#define VMX_FEATURE_PT_CONCEAL_VMX ( 2*32+ 19) /* Suppress VMX indicators in Processor Trace */
+#define VMX_FEATURE_XSAVES ( 2*32+ 20) /* Enable XSAVES and XRSTORS in guest */
#define VMX_FEATURE_MODE_BASED_EPT_EXEC ( 2*32+ 22) /* "ept_mode_based_exec" Enable separate EPT EXEC bits for supervisor vs. user */
-#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* "" Processor Trace logs GPAs */
-#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* Scale hardware TSC when read in guest */
-#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* Enable TPAUSE, UMONITOR, UMWAIT in guest */
-#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* "" VM-Exit on ENCLV (leaf dependent) */
-#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* "" VM-Exit when bus lock caused */
-#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* VM-Exit when no event windows after notify window */
+#define VMX_FEATURE_PT_USE_GPA ( 2*32+ 24) /* Processor Trace logs GPAs */
+#define VMX_FEATURE_TSC_SCALING ( 2*32+ 25) /* "tsc_scaling" Scale hardware TSC when read in guest */
+#define VMX_FEATURE_USR_WAIT_PAUSE ( 2*32+ 26) /* "usr_wait_pause" Enable TPAUSE, UMONITOR, UMWAIT in guest */
+#define VMX_FEATURE_ENCLV_EXITING ( 2*32+ 28) /* VM-Exit on ENCLV (leaf dependent) */
+#define VMX_FEATURE_BUS_LOCK_DETECTION ( 2*32+ 30) /* VM-Exit when bus lock caused */
+#define VMX_FEATURE_NOTIFY_VM_EXITING ( 2*32+ 31) /* "notify_vm_exiting" VM-Exit when no event windows after notify window */
/* Tertiary Processor-Based VM-Execution Controls, word 3 */
-#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* Enable IPI virtualization */
+#define VMX_FEATURE_IPI_VIRT ( 3*32+ 4) /* "ipi_virt" Enable IPI virtualization */
#endif /* _ASM_X86_VMXFEATURES_H */
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h
deleted file mode 100644
index 183e98e49ab9..000000000000
--- a/arch/x86/include/asm/vvar.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * vvar.h: Shared vDSO/kernel variable declarations
- * Copyright (c) 2011 Andy Lutomirski
- *
- * A handful of variables are accessible (read-only) from userspace
- * code in the vsyscall page and the vdso. They are declared here.
- * Some other file must define them with DEFINE_VVAR.
- *
- * In normal kernel code, they are used like any other variable.
- * In user code, they are accessed through the VVAR macro.
- *
- * These variables live in a page of kernel data that has an extra RO
- * mapping for userspace. Each variable needs a unique offset within
- * that page; specify that offset with the DECLARE_VVAR macro. (If
- * you mess up, the linker will catch it.)
- */
-
-#ifndef _ASM_X86_VVAR_H
-#define _ASM_X86_VVAR_H
-
-#ifdef EMIT_VVAR
-/*
- * EMIT_VVAR() is used by the kernel linker script to put vvars in the
- * right place. Also, it's used by kernel code to import offsets values.
- */
-#define DECLARE_VVAR(offset, type, name) \
- EMIT_VVAR(name, offset)
-
-#else
-
-extern char __vvar_page;
-
-#define DECLARE_VVAR(offset, type, name) \
- extern type vvar_ ## name[CS_BASES] \
- __attribute__((visibility("hidden"))); \
- extern type timens_ ## name[CS_BASES] \
- __attribute__((visibility("hidden"))); \
-
-#define VVAR(name) (vvar_ ## name)
-#define TIMENS(name) (timens_ ## name)
-
-#define DEFINE_VVAR(type, name) \
- type name[CS_BASES] \
- __attribute__((section(".vvar_" #name), aligned(16))) __visible
-
-#endif
-
-/* DECLARE_VVAR(offset, type, name) */
-
-DECLARE_VVAR(128, struct vdso_data, _vdso_data)
-
-#undef DECLARE_VVAR
-
-#endif
diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h
index e8d7d4941c4c..422a47746657 100644
--- a/arch/x86/include/asm/word-at-a-time.h
+++ b/arch/x86/include/asm/word-at-a-time.h
@@ -5,45 +5,12 @@
#include <linux/bitops.h>
#include <linux/wordpart.h>
-/*
- * This is largely generic for little-endian machines, but the
- * optimal byte mask counting is probably going to be something
- * that is architecture-specific. If you have a reliably fast
- * bit count instruction, that might be better than the multiply
- * and shift, for example.
- */
struct word_at_a_time {
const unsigned long one_bits, high_bits;
};
#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
-#ifdef CONFIG_64BIT
-
-/*
- * Jan Achrenius on G+: microoptimized version of
- * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
- * that works for the bytemasks without having to
- * mask them first.
- */
-static inline long count_masked_bytes(unsigned long mask)
-{
- return mask*0x0001020304050608ul >> 56;
-}
-
-#else /* 32-bit case */
-
-/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
-static inline long count_masked_bytes(long mask)
-{
- /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
- long a = (0x0ff0001+mask) >> 23;
- /* Fix the 1 for 00 case */
- return a & mask;
-}
-
-#endif
-
/* Return nonzero if it has a zero */
static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
{
@@ -57,6 +24,22 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
return bits;
}
+#ifdef CONFIG_64BIT
+
+/* Keep the initial has_zero() value for both bitmask and size calc */
+#define create_zero_mask(bits) (bits)
+
+static inline unsigned long zero_bytemask(unsigned long bits)
+{
+ bits = (bits - 1) & ~bits;
+ return bits >> 7;
+}
+
+#define find_zero(bits) (__ffs(bits) >> 3)
+
+#else
+
+/* Create the final mask for both bytemask and size */
static inline unsigned long create_zero_mask(unsigned long bits)
{
bits = (bits - 1) & ~bits;
@@ -66,11 +49,17 @@ static inline unsigned long create_zero_mask(unsigned long bits)
/* The mask we created is directly usable as a bytemask */
#define zero_bytemask(mask) (mask)
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
static inline unsigned long find_zero(unsigned long mask)
{
- return count_masked_bytes(mask);
+ /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+ long a = (0x0ff0001+mask) >> 23;
+ /* Fix the 1 for 00 case */
+ return a & mask;
}
+#endif
+
/*
* Load an unaligned word from kernel space.
*
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 6149eabe200f..213cf5379a5a 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -149,12 +149,22 @@ struct x86_init_acpi {
* @enc_status_change_finish Notify HV after the encryption status of a range is changed
* @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status
* @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status
+ * @enc_kexec_begin Begin the two-step process of converting shared memory back
+ * to private. It stops the new conversions from being started
+ * and waits in-flight conversions to finish, if possible.
+ * @enc_kexec_finish Finish the two-step process of converting shared memory to
+ * private. All memory is private after the call when
+ * the function returns.
+ * It is called on only one CPU while the others are shut down
+ * and with interrupts disabled.
*/
struct x86_guest {
- bool (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
- bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
+ int (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc);
+ int (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc);
bool (*enc_tlb_flush_required)(bool enc);
bool (*enc_cache_flush_required)(void);
+ void (*enc_kexec_begin)(void);
+ void (*enc_kexec_finish)(void);
};
/**
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a2dd24947eb8..59a62c3780a2 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -39,9 +39,11 @@
#include <linux/string.h>
#include <linux/types.h>
#include <linux/pgtable.h>
+#include <linux/instrumentation.h>
#include <trace/events/xen.h>
+#include <asm/alternative.h>
#include <asm/page.h>
#include <asm/smap.h>
#include <asm/nospec-branch.h>
@@ -86,11 +88,20 @@ struct xen_dm_op_buf;
* there aren't more than 5 arguments...)
*/
-extern struct { char _entry[32]; } hypercall_page[];
+void xen_hypercall_func(void);
+DECLARE_STATIC_CALL(xen_hypercall, xen_hypercall_func);
-#define __HYPERCALL "call hypercall_page+%c[offset]"
-#define __HYPERCALL_ENTRY(x) \
- [offset] "i" (__HYPERVISOR_##x * sizeof(hypercall_page[0]))
+#ifdef MODULE
+#define __ADDRESSABLE_xen_hypercall
+#else
+#define __ADDRESSABLE_xen_hypercall __ADDRESSABLE_ASM_STR(__SCK__xen_hypercall)
+#endif
+
+#define __HYPERCALL \
+ __ADDRESSABLE_xen_hypercall \
+ "call __SCT__xen_hypercall"
+
+#define __HYPERCALL_ENTRY(x) "a" (x)
#ifdef CONFIG_X86_32
#define __HYPERCALL_RETREG "eax"
@@ -148,7 +159,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_0ARG(); \
asm volatile (__HYPERCALL \
: __HYPERCALL_0PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER0); \
(type)__res; \
})
@@ -159,7 +170,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_1ARG(a1); \
asm volatile (__HYPERCALL \
: __HYPERCALL_1PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER1); \
(type)__res; \
})
@@ -170,7 +181,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_2ARG(a1, a2); \
asm volatile (__HYPERCALL \
: __HYPERCALL_2PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER2); \
(type)__res; \
})
@@ -181,7 +192,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_3ARG(a1, a2, a3); \
asm volatile (__HYPERCALL \
: __HYPERCALL_3PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER3); \
(type)__res; \
})
@@ -192,7 +203,7 @@ extern struct { char _entry[32]; } hypercall_page[];
__HYPERCALL_4ARG(a1, a2, a3, a4); \
asm volatile (__HYPERCALL \
: __HYPERCALL_4PARAM \
- : __HYPERCALL_ENTRY(name) \
+ : __HYPERCALL_ENTRY(__HYPERVISOR_ ## name) \
: __HYPERCALL_CLOBBER4); \
(type)__res; \
})
@@ -206,12 +217,9 @@ xen_single_call(unsigned int call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
- if (call >= PAGE_SIZE / sizeof(hypercall_page[0]))
- return -EINVAL;
-
- asm volatile(CALL_NOSPEC
+ asm volatile(__HYPERCALL
: __HYPERCALL_5PARAM
- : [thunk_target] "a" (&hypercall_page[call])
+ : __HYPERCALL_ENTRY(call)
: __HYPERCALL_CLOBBER5);
return (long)__res;
@@ -223,14 +231,12 @@ static __always_inline void __xen_stac(void)
* Suppress objtool seeing the STAC/CLAC and getting confused about it
* calling random code with AC=1.
*/
- asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
- ASM_STAC ::: "memory", "flags");
+ asm volatile(ASM_STAC_UNSAFE ::: "memory", "flags");
}
static __always_inline void __xen_clac(void)
{
- asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
- ASM_CLAC ::: "memory", "flags");
+ asm volatile(ASM_CLAC_UNSAFE ::: "memory", "flags");
}
static inline long
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 64fbd2dbc5b7..bd0fc69a10a7 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,11 +62,6 @@ void xen_arch_unregister_cpu(int num);
#ifdef CONFIG_PVH
void __init xen_pvh_init(struct boot_params *boot_params);
void __init mem_map_via_hcall(struct boot_params *boot_params_p);
-#ifdef CONFIG_XEN_PVH
-void __init xen_reserve_extra_memory(struct boot_params *bootp);
-#else
-static inline void xen_reserve_extra_memory(struct boot_params *bootp) { }
-#endif
#endif
/* Lazy mode for batching updates / context switch */
@@ -77,18 +72,10 @@ enum xen_lazy_mode {
};
DECLARE_PER_CPU(enum xen_lazy_mode, xen_lazy_mode);
-DECLARE_PER_CPU(unsigned int, xen_lazy_nesting);
static inline void enter_lazy(enum xen_lazy_mode mode)
{
- enum xen_lazy_mode old_mode = this_cpu_read(xen_lazy_mode);
-
- if (mode == old_mode) {
- this_cpu_inc(xen_lazy_nesting);
- return;
- }
-
- BUG_ON(old_mode != XEN_LAZY_NONE);
+ BUG_ON(this_cpu_read(xen_lazy_mode) != XEN_LAZY_NONE);
this_cpu_write(xen_lazy_mode, mode);
}
@@ -97,10 +84,7 @@ static inline void leave_lazy(enum xen_lazy_mode mode)
{
BUG_ON(this_cpu_read(xen_lazy_mode) != mode);
- if (this_cpu_read(xen_lazy_nesting) == 0)
- this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE);
- else
- this_cpu_dec(xen_lazy_nesting);
+ this_cpu_write(xen_lazy_mode, XEN_LAZY_NONE);
}
enum xen_lazy_mode xen_get_lazy_mode(void);
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index baca0b00ef76..a078a2b0f032 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -72,7 +72,7 @@
#endif
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/* Explicitly size integers that represent pfns in the public interface
* with Xen so that on ARM we can have one ABI that works for 32 and 64
* bit guests. */
@@ -137,7 +137,7 @@ DEFINE_GUEST_HANDLE(xen_ulong_t);
#define TI_SET_DPL(_ti, _dpl) ((_ti)->flags |= (_dpl))
#define TI_SET_IF(_ti, _if) ((_ti)->flags |= ((!!(_if))<<2))
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct trap_info {
uint8_t vector; /* exception vector */
uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */
@@ -186,7 +186,7 @@ struct arch_shared_info {
uint32_t wc_sec_hi;
#endif
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#ifdef CONFIG_X86_32
#include <asm/xen/interface_32.h>
@@ -196,7 +196,7 @@ struct arch_shared_info {
#include <asm/pvclock-abi.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* The following is all CPU context. Note that the fpu_ctxt block is filled
* in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
@@ -376,7 +376,7 @@ struct xen_pmu_arch {
} c;
};
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
* Prefix forces emulation of some non-trapping instructions.
diff --git a/arch/x86/include/asm/xen/interface_32.h b/arch/x86/include/asm/xen/interface_32.h
index dc40578abded..74d9768a9cf7 100644
--- a/arch/x86/include/asm/xen/interface_32.h
+++ b/arch/x86/include/asm/xen/interface_32.h
@@ -44,7 +44,7 @@
*/
#define __HYPERVISOR_VIRT_START 0xF5800000
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct cpu_user_regs {
uint32_t ebx;
@@ -85,7 +85,7 @@ typedef struct xen_callback xen_callback_t;
#define XEN_CALLBACK(__cs, __eip) \
((struct xen_callback){ .cs = (__cs), .eip = (unsigned long)(__eip) })
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
/*
diff --git a/arch/x86/include/asm/xen/interface_64.h b/arch/x86/include/asm/xen/interface_64.h
index c10f279aae93..38a19edb81a3 100644
--- a/arch/x86/include/asm/xen/interface_64.h
+++ b/arch/x86/include/asm/xen/interface_64.h
@@ -77,7 +77,7 @@
#define VGCF_in_syscall (1<<_VGCF_in_syscall)
#define VGCF_IN_SYSCALL VGCF_in_syscall
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
struct iret_context {
/* Top of stack (%rsp at point of hypercall). */
@@ -143,7 +143,7 @@ typedef unsigned long xen_callback_t;
#define XEN_CALLBACK(__cs, __rip) \
((unsigned long)(__rip))
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_XEN_INTERFACE_64_H */
diff --git a/arch/x86/include/uapi/asm/amd_hsmp.h b/arch/x86/include/uapi/asm/amd_hsmp.h
index e5d182c7373c..92d8f256d096 100644
--- a/arch/x86/include/uapi/asm/amd_hsmp.h
+++ b/arch/x86/include/uapi/asm/amd_hsmp.h
@@ -50,6 +50,12 @@ enum hsmp_message_ids {
HSMP_GET_METRIC_TABLE_VER, /* 23h Get metrics table version */
HSMP_GET_METRIC_TABLE, /* 24h Get metrics table */
HSMP_GET_METRIC_TABLE_DRAM_ADDR,/* 25h Get metrics table dram address */
+ HSMP_SET_XGMI_PSTATE_RANGE, /* 26h Set xGMI P-state range */
+ HSMP_CPU_RAIL_ISO_FREQ_POLICY, /* 27h Get/Set Cpu Iso frequency policy */
+ HSMP_DFC_ENABLE_CTRL, /* 28h Enable/Disable DF C-state */
+ HSMP_GET_RAPL_UNITS = 0x30, /* 30h Get scaling factor for energy */
+ HSMP_GET_RAPL_CORE_COUNTER, /* 31h Get core energy counter value */
+ HSMP_GET_RAPL_PACKAGE_COUNTER, /* 32h Get package energy counter value */
HSMP_MSG_ID_MAX,
};
@@ -65,6 +71,7 @@ enum hsmp_msg_type {
HSMP_RSVD = -1,
HSMP_SET = 0,
HSMP_GET = 1,
+ HSMP_SET_GET = 2,
};
enum hsmp_proto_versions {
@@ -72,7 +79,8 @@ enum hsmp_proto_versions {
HSMP_PROTO_VER3,
HSMP_PROTO_VER4,
HSMP_PROTO_VER5,
- HSMP_PROTO_VER6
+ HSMP_PROTO_VER6,
+ HSMP_PROTO_VER7
};
struct hsmp_msg_desc {
@@ -88,7 +96,8 @@ struct hsmp_msg_desc {
*
* Not supported messages would return -ENOMSG.
*/
-static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
+static const struct hsmp_msg_desc hsmp_msg_desc_table[]
+ __attribute__((unused)) = {
/* RESERVED */
{0, 0, HSMP_RSVD},
@@ -299,7 +308,7 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
* HSMP_SET_POWER_MODE, num_args = 1, response_sz = 0
* input: args[0] = power efficiency mode[2:0]
*/
- {1, 0, HSMP_SET},
+ {1, 1, HSMP_SET_GET},
/*
* HSMP_SET_PSTATE_MAX_MIN, num_args = 1, response_sz = 0
@@ -324,6 +333,58 @@ static const struct hsmp_msg_desc hsmp_msg_desc_table[] = {
* output: args[1] = upper 32 bits of the address
*/
{0, 2, HSMP_GET},
+
+ /*
+ * HSMP_SET_XGMI_PSTATE_RANGE, num_args = 1, response_sz = 0
+ * input: args[0] = min xGMI p-state[15:8] + max xGMI p-state[7:0]
+ */
+ {1, 0, HSMP_SET},
+
+ /*
+ * HSMP_CPU_RAIL_ISO_FREQ_POLICY, num_args = 1, response_sz = 1
+ * input: args[0] = set/get policy[31] +
+ * disable/enable independent control[0]
+ * output: args[0] = current policy[0]
+ */
+ {1, 1, HSMP_SET_GET},
+
+ /*
+ * HSMP_DFC_ENABLE_CTRL, num_args = 1, response_sz = 1
+ * input: args[0] = set/get policy[31] + enable/disable DFC[0]
+ * output: args[0] = current policy[0]
+ */
+ {1, 1, HSMP_SET_GET},
+
+ /* RESERVED(0x29-0x2f) */
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+ {0, 0, HSMP_RSVD},
+
+ /*
+ * HSMP_GET_RAPL_UNITS, response_sz = 1
+ * output: args[0] = tu value[19:16] + esu value[12:8]
+ */
+ {0, 1, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAPL_CORE_COUNTER, num_args = 1, response_sz = 1
+ * input: args[0] = apic id[15:0]
+ * output: args[0] = lower 32 bits of energy
+ * output: args[1] = upper 32 bits of energy
+ */
+ {1, 2, HSMP_GET},
+
+ /*
+ * HSMP_GET_RAPL_PACKAGE_COUNTER, num_args = 0, response_sz = 1
+ * output: args[0] = lower 32 bits of energy
+ * output: args[1] = upper 32 bits of energy
+ */
+ {0, 2, HSMP_GET},
+
};
/* Metrics table (supported only with proto version 6) */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 9b82eebd7add..dafbf581c515 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -26,7 +26,7 @@
#define XLF_5LEVEL_ENABLED (1<<6)
#define XLF_MEM_ENCRYPTION (1<<7)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/screen_info.h>
@@ -210,6 +210,6 @@ enum x86_hardware_subarch {
X86_NR_SUBARCHS,
};
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_X86_BOOTPARAM_H */
diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h
index 2f491efe3a12..55bc66867156 100644
--- a/arch/x86/include/uapi/asm/e820.h
+++ b/arch/x86/include/uapi/asm/e820.h
@@ -54,7 +54,7 @@
*/
#define E820_RESERVED_KERN 128
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
struct e820entry {
__u64 addr; /* start of memory segment */
@@ -76,7 +76,7 @@ struct e820map {
#define BIOS_ROM_BASE 0xffe00000
#define BIOS_ROM_END 0xffffffff
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_E820_H */
diff --git a/arch/x86/include/uapi/asm/elf.h b/arch/x86/include/uapi/asm/elf.h
new file mode 100644
index 000000000000..468e135fa285
--- /dev/null
+++ b/arch/x86/include/uapi/asm/elf.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_ELF_H
+#define _UAPI_ASM_X86_ELF_H
+
+#include <linux/types.h>
+
+struct x86_xfeat_component {
+ __u32 type;
+ __u32 size;
+ __u32 offset;
+ __u32 flags;
+} __packed;
+
+_Static_assert(sizeof(struct x86_xfeat_component) % 4 == 0, "x86_xfeat_component is not aligned");
+
+#endif /* _UAPI_ASM_X86_ELF_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index ef11aa4cab42..460306b35a4b 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -106,6 +106,7 @@ struct kvm_ioapic_state {
#define KVM_RUN_X86_SMM (1 << 0)
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
+#define KVM_RUN_X86_GUEST_MODE (1 << 2)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
@@ -438,6 +439,8 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
+#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
+#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
@@ -457,8 +460,13 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
-/* attributes for system fd (group 0) */
-#define KVM_X86_XCOMP_GUEST_SUPP 0
+/* vendor-independent attributes for system fd (group 0) */
+#define KVM_X86_GRP_SYSTEM 0
+# define KVM_X86_XCOMP_GUEST_SUPP 0
+
+/* vendor-specific groups and attributes for system fd */
+#define KVM_X86_GRP_SEV 1
+# define KVM_X86_SEV_VMSA_FEATURES 0
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@@ -551,6 +559,9 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8)
+#define KVM_XEN_MSR_MIN_INDEX 0x40000000u
+#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu
+
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
@@ -689,6 +700,14 @@ enum sev_cmd_id {
/* Guest Migration Extension */
KVM_SEV_SEND_CANCEL,
+ /* Second time is the charm; improved versions of the above ioctls. */
+ KVM_SEV_INIT2,
+
+ /* SNP-specific commands */
+ KVM_SEV_SNP_LAUNCH_START = 100,
+ KVM_SEV_SNP_LAUNCH_UPDATE,
+ KVM_SEV_SNP_LAUNCH_FINISH,
+
KVM_SEV_NR_MAX,
};
@@ -700,6 +719,14 @@ struct kvm_sev_cmd {
__u32 sev_fd;
};
+struct kvm_sev_init {
+ __u64 vmsa_features;
+ __u32 flags;
+ __u16 ghcb_version;
+ __u16 pad1;
+ __u32 pad2[8];
+};
+
struct kvm_sev_launch_start {
__u32 handle;
__u32 policy;
@@ -808,6 +835,48 @@ struct kvm_sev_receive_update_data {
__u32 pad2;
};
+struct kvm_sev_snp_launch_start {
+ __u64 policy;
+ __u8 gosvw[16];
+ __u16 flags;
+ __u8 pad0[6];
+ __u64 pad1[4];
+};
+
+/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+
+struct kvm_sev_snp_launch_update {
+ __u64 gfn_start;
+ __u64 uaddr;
+ __u64 len;
+ __u8 type;
+ __u8 pad0;
+ __u16 flags;
+ __u32 pad1;
+ __u64 pad2[4];
+};
+
+#define KVM_SEV_SNP_ID_BLOCK_SIZE 96
+#define KVM_SEV_SNP_ID_AUTH_SIZE 4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE 32
+
+struct kvm_sev_snp_launch_finish {
+ __u64 id_block_uaddr;
+ __u64 id_auth_uaddr;
+ __u8 id_block_en;
+ __u8 auth_key_en;
+ __u8 vcek_disabled;
+ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+ __u8 pad0[3];
+ __u16 flags;
+ __u64 pad1[4];
+};
+
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
@@ -856,5 +925,9 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
+#define KVM_X86_SEV_VM 2
+#define KVM_X86_SEV_ES_VM 3
+#define KVM_X86_SNP_VM 4
+#define KVM_X86_TDX_VM 5
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index d62ac5db093b..a82c039d8e6a 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h
@@ -12,7 +12,7 @@
/* The size of each LDT entry. */
#define LDT_ENTRY_SIZE 8
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
/*
* Note on 64bit base and limit is ignored and you cannot set DS/ES/CS
* not to the default values if you still want to do syscalls. This
@@ -44,5 +44,5 @@ struct user_desc {
#define MODIFY_LDT_CONTENTS_STACK 1
#define MODIFY_LDT_CONTENTS_CODE 2
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _ASM_X86_LDT_H */
diff --git a/arch/x86/include/uapi/asm/mce.h b/arch/x86/include/uapi/asm/mce.h
index db9adc081c5a..cb6b48a7c22b 100644
--- a/arch/x86/include/uapi/asm/mce.h
+++ b/arch/x86/include/uapi/asm/mce.h
@@ -8,7 +8,8 @@
/*
* Fields are zero when not available. Also, this struct is shared with
* userspace mcelog and thus must keep existing fields at current offsets.
- * Only add new fields to the end of the structure
+ * Only add new, shared fields to the end of the structure.
+ * Do not add vendor-specific fields.
*/
struct mce {
__u64 status; /* Bank's MCi_STATUS MSR */
diff --git a/arch/x86/include/uapi/asm/mman.h b/arch/x86/include/uapi/asm/mman.h
index 46cdc941f958..ac1e6277212b 100644
--- a/arch/x86/include/uapi/asm/mman.h
+++ b/arch/x86/include/uapi/asm/mman.h
@@ -5,9 +5,6 @@
#define MAP_32BIT 0x40 /* only give out 32bit addresses */
#define MAP_ABOVE4G 0x80 /* only map above 4GB */
-/* Flags for map_shadow_stack(2) */
-#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */
-
#include <asm-generic/mman.h>
#endif /* _ASM_X86_MMAN_H */
diff --git a/arch/x86/include/uapi/asm/msr.h b/arch/x86/include/uapi/asm/msr.h
index e7516b402a00..4b8917ca28fe 100644
--- a/arch/x86/include/uapi/asm/msr.h
+++ b/arch/x86/include/uapi/asm/msr.h
@@ -2,7 +2,7 @@
#ifndef _UAPI_ASM_X86_MSR_H
#define _UAPI_ASM_X86_MSR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/ioctl.h>
@@ -10,5 +10,5 @@
#define X86_IOC_RDMSR_REGS _IOWR('c', 0xA0, __u32[8])
#define X86_IOC_WRMSR_REGS _IOWR('c', 0xA1, __u32[8])
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_MSR_H */
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 16074b9c93bb..5823584dea13 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,7 +25,7 @@
#else /* __i386__ */
-#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+#if defined(__ASSEMBLER__) || defined(__FRAME_OFFSETS)
/*
* C ABI says these regs are callee-preserved. They aren't saved on kernel entry
* unless syscall needs a complete, fully filled "struct pt_regs".
@@ -57,7 +57,7 @@
#define EFLAGS 144
#define RSP 152
#define SS 160
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
/* top of stack page */
#define FRAME_SIZE 168
@@ -87,7 +87,7 @@
#define PTRACE_SINGLEBLOCK 33 /* resume execution until next branch */
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#endif
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index 85165c0edafc..e0b5b4f6226b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -7,7 +7,7 @@
#include <asm/processor-flags.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#ifdef __i386__
/* this struct defines the way the registers are stored on the
@@ -81,6 +81,6 @@ struct pt_regs {
-#endif /* !__ASSEMBLY__ */
+#endif /* !__ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/uapi/asm/setup_data.h b/arch/x86/include/uapi/asm/setup_data.h
index b111b0c18544..50c45ead4e7c 100644
--- a/arch/x86/include/uapi/asm/setup_data.h
+++ b/arch/x86/include/uapi/asm/setup_data.h
@@ -18,7 +18,7 @@
#define SETUP_INDIRECT (1<<31)
#define SETUP_TYPE_MAX (SETUP_ENUM_MAX | SETUP_INDIRECT)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
@@ -78,6 +78,6 @@ struct ima_setup_data {
__u64 size;
} __attribute__((packed));
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_SETUP_DATA_H */
diff --git a/arch/x86/include/uapi/asm/signal.h b/arch/x86/include/uapi/asm/signal.h
index f777346450ec..1067efabf18b 100644
--- a/arch/x86/include/uapi/asm/signal.h
+++ b/arch/x86/include/uapi/asm/signal.h
@@ -2,7 +2,7 @@
#ifndef _UAPI_ASM_X86_SIGNAL_H
#define _UAPI_ASM_X86_SIGNAL_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/types.h>
#include <linux/compiler.h>
@@ -16,7 +16,7 @@ struct siginfo;
typedef unsigned long sigset_t;
#endif /* __KERNEL__ */
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#define SIGHUP 1
@@ -68,7 +68,7 @@ typedef unsigned long sigset_t;
#include <asm-generic/signal-defs.h>
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
# ifndef __KERNEL__
@@ -106,6 +106,6 @@ typedef struct sigaltstack {
__kernel_size_t ss_size;
} stack_t;
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _UAPI_ASM_X86_SIGNAL_H */
diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index 80e1df482337..ec1321248dac 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -95,6 +95,7 @@
#define SVM_EXIT_CR14_WRITE_TRAP 0x09e
#define SVM_EXIT_CR15_WRITE_TRAP 0x09f
#define SVM_EXIT_INVPCID 0x0a2
+#define SVM_EXIT_IDLE_HLT 0x0a6
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
@@ -115,6 +116,7 @@
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
@@ -223,6 +225,7 @@
{ SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \
{ SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \
{ SVM_EXIT_INVPCID, "invpcid" }, \
+ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \