summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/Kconfig.cpu2
-rw-r--r--arch/x86/Makefile10
-rw-r--r--arch/x86/boot/header.S14
-rw-r--r--arch/x86/boot/setup.ld6
-rw-r--r--arch/x86/entry/entry.S23
-rw-r--r--arch/x86/entry/entry_32.S3
-rw-r--r--arch/x86/entry/entry_64.S11
-rw-r--r--arch/x86/entry/entry_64_compat.S1
-rw-r--r--arch/x86/include/asm/coco.h5
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h2
-rw-r--r--arch/x86/include/asm/entry-common.h1
-rw-r--r--arch/x86/include/asm/hardirq.h2
-rw-r--r--arch/x86/include/asm/idtentry.h2
-rw-r--r--arch/x86/include/asm/irq.h2
-rw-r--r--arch/x86/include/asm/irq_vectors.h2
-rw-r--r--arch/x86/include/asm/jump_label.h6
-rw-r--r--arch/x86/include/asm/kmsan.h17
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/nospec-branch.h25
-rw-r--r--arch/x86/include/asm/rmwcc.h2
-rw-r--r--arch/x86/include/asm/special_insns.h2
-rw-r--r--arch/x86/include/asm/uaccess.h10
-rw-r--r--arch/x86/include/asm/vsyscall.h10
-rw-r--r--arch/x86/include/uapi/asm/kvm.h278
-rw-r--r--arch/x86/include/uapi/asm/kvm_para.h2
-rw-r--r--arch/x86/kernel/cpu/bugs.c15
-rw-r--r--arch/x86/kernel/cpu/common.c4
-rw-r--r--arch/x86/kernel/cpu/intel.c178
-rw-r--r--arch/x86/kernel/e820.c8
-rw-r--r--arch/x86/kernel/fpu/signal.c13
-rw-r--r--arch/x86/kernel/idt.c2
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kvm/Kconfig11
-rw-r--r--arch/x86/kvm/debugfs.c3
-rw-r--r--arch/x86/kvm/hyperv.c50
-rw-r--r--arch/x86/kvm/hyperv.h3
-rw-r--r--arch/x86/kvm/svm/svm_ops.h6
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx/run_flags.h7
-rw-r--r--arch/x86/kvm/vmx/vmenter.S9
-rw-r--r--arch/x86/kvm/vmx/vmx.c24
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h6
-rw-r--r--arch/x86/kvm/x86.c29
-rw-r--r--arch/x86/lib/getuser.S24
-rw-r--r--arch/x86/lib/putuser.S20
-rw-r--r--arch/x86/mm/fault.c9
-rw-r--r--arch/x86/mm/ident_map.c23
-rw-r--r--arch/x86/mm/maccess.c10
-rw-r--r--arch/x86/mm/numa.c21
-rw-r--r--arch/x86/xen/smp.c12
54 files changed, 678 insertions, 264 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5edec175b9bf..da140c3aed84 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -244,7 +244,6 @@ config X86
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_KRETPROBES
select HAVE_RETHOOK
- select HAVE_KVM
select HAVE_LIVEPATCH if X86_64
select HAVE_MIXED_BREAKPOINTS_REGS
select HAVE_MOD_ARCH_SPECIFIC
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index b9224cf2ee4d..2a7279d80460 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -379,7 +379,7 @@ config X86_CMOV
config X86_MINIMUM_CPU_FAMILY
int
default "64" if X86_64
- default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8)
+ default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCORE2 || MK7 || MK8)
default "5" if X86_32 && X86_CMPXCHG64
default "4"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1a068de12a56..da8f3caf2781 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -112,13 +112,13 @@ ifeq ($(CONFIG_X86_32),y)
# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding
- ifeq ($(CONFIG_STACKPROTECTOR),y)
- ifeq ($(CONFIG_SMP),y)
+ ifeq ($(CONFIG_STACKPROTECTOR),y)
+ ifeq ($(CONFIG_SMP),y)
KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
- else
+ else
KBUILD_CFLAGS += -mstack-protector-guard=global
- endif
- endif
+ endif
+ endif
else
BITS := 64
UTS_MACHINE := x86_64
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b2771710ed98..a1bbedd989e4 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -106,8 +106,7 @@ extra_header_fields:
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue
- .long setup_size + ZO__end + pecompat_vsize
- # SizeOfImage
+ .long setup_size + ZO__end # SizeOfImage
.long salign # SizeOfHeaders
.long 0 # CheckSum
@@ -143,7 +142,7 @@ section_table:
.ascii ".setup"
.byte 0
.byte 0
- .long setup_size - salign # VirtualSize
+ .long pecompat_fstart - salign # VirtualSize
.long salign # VirtualAddress
.long pecompat_fstart - salign # SizeOfRawData
.long salign # PointerToRawData
@@ -156,8 +155,8 @@ section_table:
#ifdef CONFIG_EFI_MIXED
.asciz ".compat"
- .long 8 # VirtualSize
- .long setup_size + ZO__end # VirtualAddress
+ .long pecompat_fsize # VirtualSize
+ .long pecompat_fstart # VirtualAddress
.long pecompat_fsize # SizeOfRawData
.long pecompat_fstart # PointerToRawData
@@ -172,17 +171,16 @@ section_table:
* modes this image supports.
*/
.pushsection ".pecompat", "a", @progbits
- .balign falign
- .set pecompat_vsize, salign
+ .balign salign
.globl pecompat_fstart
pecompat_fstart:
.byte 0x1 # Version
.byte 8 # Size
.word IMAGE_FILE_MACHINE_I386 # PE machine type
.long setup_size + ZO_efi32_pe_entry # Entrypoint
+ .byte 0x0 # Sentinel
.popsection
#else
- .set pecompat_vsize, 0
.set pecompat_fstart, setup_size
#endif
.ascii ".text"
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld
index 83bb7efad8ae..3a2d1360abb0 100644
--- a/arch/x86/boot/setup.ld
+++ b/arch/x86/boot/setup.ld
@@ -24,6 +24,9 @@ SECTIONS
.text : { *(.text .text.*) }
.text32 : { *(.text32) }
+ .pecompat : { *(.pecompat) }
+ PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
+
. = ALIGN(16);
.rodata : { *(.rodata*) }
@@ -36,9 +39,6 @@ SECTIONS
. = ALIGN(16);
.data : { *(.data*) }
- .pecompat : { *(.pecompat) }
- PROVIDE(pecompat_fsize = setup_size - pecompat_fstart);
-
.signature : {
setup_sig = .;
LONG(0x5a5aaa55)
diff --git a/arch/x86/entry/entry.S b/arch/x86/entry/entry.S
index 8c8d38f0cb1d..003379049924 100644
--- a/arch/x86/entry/entry.S
+++ b/arch/x86/entry/entry.S
@@ -6,6 +6,9 @@
#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/msr-index.h>
+#include <asm/unwind_hints.h>
+#include <asm/segment.h>
+#include <asm/cache.h>
.pushsection .noinstr.text, "ax"
@@ -20,3 +23,23 @@ SYM_FUNC_END(entry_ibpb)
EXPORT_SYMBOL_GPL(entry_ibpb);
.popsection
+
+/*
+ * Define the VERW operand that is disguised as entry code so that
+ * it can be referenced with KPTI enabled. This ensure VERW can be
+ * used late in exit-to-user path after page tables are switched.
+ */
+.pushsection .entry.text, "ax"
+
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_START_NOALIGN(mds_verw_sel)
+ UNWIND_HINT_UNDEFINED
+ ANNOTATE_NOENDBR
+ .word __KERNEL_DS
+.align L1_CACHE_BYTES, 0xcc
+SYM_CODE_END(mds_verw_sel);
+/* For KVM */
+EXPORT_SYMBOL_GPL(mds_verw_sel);
+
+.popsection
+
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index c73047bf9f4b..fba427646805 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -885,6 +885,7 @@ SYM_FUNC_START(entry_SYSENTER_32)
BUG_IF_WRONG_CR3 no_user_check=1
popfl
popl %eax
+ CLEAR_CPU_BUFFERS
/*
* Return back to the vDSO, which will pop ecx and edx.
@@ -954,6 +955,7 @@ restore_all_switch_stack:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
+ CLEAR_CPU_BUFFERS
.Lirq_return:
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
@@ -1146,6 +1148,7 @@ SYM_CODE_START(asm_exc_nmi)
/* Not on SYSENTER stack. */
call exc_nmi
+ CLEAR_CPU_BUFFERS
jmp .Lnmi_return
.Lnmi_from_sysenter_stack:
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index c40f89ab1b4c..9bb485977629 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -161,6 +161,7 @@ syscall_return_via_sysret:
SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
swapgs
+ CLEAR_CPU_BUFFERS
sysretq
SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
@@ -573,6 +574,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
.Lswapgs_and_iret:
swapgs
+ CLEAR_CPU_BUFFERS
/* Assert that the IRET frame indicates user mode. */
testb $3, 8(%rsp)
jnz .Lnative_iret
@@ -723,6 +725,8 @@ native_irq_return_ldt:
*/
popq %rax /* Restore user RAX */
+ CLEAR_CPU_BUFFERS
+
/*
* RSP now points to an ordinary IRET frame, except that the page
* is read-only and RSP[31:16] are preloaded with the userspace
@@ -1450,6 +1454,12 @@ nmi_restore:
movq $0, 5*8(%rsp) /* clear "NMI executing" */
/*
+ * Skip CLEAR_CPU_BUFFERS here, since it only helps in rare cases like
+ * NMI in kernel after user state is restored. For an unprivileged user
+ * these conditions are hard to meet.
+ */
+
+ /*
* iretq reads the "iret" frame and exits the NMI stack in a
* single instruction. We are returning to kernel mode, so this
* cannot result in a fault. Similarly, we don't need to worry
@@ -1466,6 +1476,7 @@ SYM_CODE_START(entry_SYSCALL32_ignore)
UNWIND_HINT_END_OF_STACK
ENDBR
mov $-ENOSYS, %eax
+ CLEAR_CPU_BUFFERS
sysretl
SYM_CODE_END(entry_SYSCALL32_ignore)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index de94e2e84ecc..eabf48c4d4b4 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -270,6 +270,7 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
xorl %r9d, %r9d
xorl %r10d, %r10d
swapgs
+ CLEAR_CPU_BUFFERS
sysretl
SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h
index 6ae2d16a7613..76c310b19b11 100644
--- a/arch/x86/include/asm/coco.h
+++ b/arch/x86/include/asm/coco.h
@@ -10,13 +10,14 @@ enum cc_vendor {
CC_VENDOR_INTEL,
};
-extern enum cc_vendor cc_vendor;
-
#ifdef CONFIG_ARCH_HAS_CC_PLATFORM
+extern enum cc_vendor cc_vendor;
void cc_set_mask(u64 mask);
u64 cc_mkenc(u64 val);
u64 cc_mkdec(u64 val);
#else
+#define cc_vendor (CC_VENDOR_NONE)
+
static inline u64 cc_mkenc(u64 val)
{
return val;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a26bebbdff87..a1273698fc43 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -168,7 +168,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
*/
static __always_inline bool _static_cpu_has(u16 bit)
{
- asm_volatile_goto(
+ asm goto(
ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]")
".pushsection .altinstr_aux,\"ax\"\n"
"6:\n"
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index fdf723b6f6d0..2b62cdd8dd12 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -95,7 +95,7 @@
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-/* FREE, was #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index ce8f50192ae3..7e523bb3d2d3 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -91,7 +91,6 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
- mds_user_clear_cpu_buffers();
amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 66837b8c67f1..fbc7722b87d1 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -15,7 +15,7 @@ typedef struct {
unsigned int irq_spurious_count;
unsigned int icr_read_retry_count;
#endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
unsigned int kvm_posted_intr_ipis;
unsigned int kvm_posted_intr_wakeup_ipis;
unsigned int kvm_posted_intr_nested_ipis;
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 13639e57e1f8..d9c86733d0db 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -675,7 +675,7 @@ DECLARE_IDTENTRY_SYSVEC(IRQ_WORK_VECTOR, sysvec_irq_work);
# endif
#endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_VECTOR, sysvec_kvm_posted_intr_ipi);
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_WAKEUP_VECTOR, sysvec_kvm_posted_intr_wakeup_ipi);
DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested_ipi);
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 836c170d3087..194dfff84cb1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -29,7 +29,7 @@ struct irq_desc;
extern void fixup_irqs(void);
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
extern void kvm_set_posted_intr_wakeup_handler(void (*handler)(void));
#endif
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 3a19904c2db6..d18bfb238f66 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -84,11 +84,9 @@
#define HYPERVISOR_CALLBACK_VECTOR 0xf3
/* Vector for KVM to deliver posted interrupt IPI */
-#ifdef CONFIG_HAVE_KVM
#define POSTED_INTR_VECTOR 0xf2
#define POSTED_INTR_WAKEUP_VECTOR 0xf1
#define POSTED_INTR_NESTED_VECTOR 0xf0
-#endif
#define MANAGED_IRQ_SHUTDOWN_VECTOR 0xef
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 071572e23d3a..cbbef32517f0 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -24,7 +24,7 @@
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes] # objtool NOPs this \n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (2 | branch) : : l_yes);
@@ -38,7 +38,7 @@ l_yes:
static __always_inline bool arch_static_branch(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
".byte " __stringify(BYTES_NOP5) "\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
@@ -52,7 +52,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key * const key, const bool branch)
{
- asm_volatile_goto("1:"
+ asm goto("1:"
"jmp %l[l_yes]\n\t"
JUMP_TABLE_ENTRY
: : "i" (key), "i" (branch) : : l_yes);
diff --git a/arch/x86/include/asm/kmsan.h b/arch/x86/include/asm/kmsan.h
index 8fa6ac0e2d76..d91b37f5b4bb 100644
--- a/arch/x86/include/asm/kmsan.h
+++ b/arch/x86/include/asm/kmsan.h
@@ -64,6 +64,7 @@ static inline bool kmsan_virt_addr_valid(void *addr)
{
unsigned long x = (unsigned long)addr;
unsigned long y = x - __START_KERNEL_map;
+ bool ret;
/* use the carry flag to determine if x was < __START_KERNEL_map */
if (unlikely(x > y)) {
@@ -79,7 +80,21 @@ static inline bool kmsan_virt_addr_valid(void *addr)
return false;
}
- return pfn_valid(x >> PAGE_SHIFT);
+ /*
+ * pfn_valid() relies on RCU, and may call into the scheduler on exiting
+ * the critical section. However, this would result in recursion with
+ * KMSAN. Therefore, disable preemption here, and re-enable preemption
+ * below while suppressing reschedules to avoid recursion.
+ *
+ * Note, this sacrifices occasionally breaking scheduling guarantees.
+ * Although, a kernel compiled with KMSAN has already given up on any
+ * performance guarantees due to being heavily instrumented.
+ */
+ preempt_disable();
+ ret = pfn_valid(x >> PAGE_SHIFT);
+ preempt_enable_no_resched();
+
+ return ret;
}
#endif /* !MODULE */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b5b2d0fde579..d271ba20a0b2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1145,6 +1145,8 @@ struct kvm_hv {
unsigned int synic_auto_eoi_used;
struct kvm_hv_syndbg hv_syndbg;
+
+ bool xsaves_xsavec_checked;
};
#endif
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 262e65539f83..2aa52cab1e46 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -315,6 +315,17 @@
#endif
.endm
+/*
+ * Macro to execute VERW instruction that mitigate transient data sampling
+ * attacks such as MDS. On affected systems a microcode update overloaded VERW
+ * instruction to also clear the CPU buffers. VERW clobbers CFLAGS.ZF.
+ *
+ * Note: Only the memory operand variant of VERW clears the CPU buffers.
+ */
+.macro CLEAR_CPU_BUFFERS
+ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF
+.endm
+
#else /* __ASSEMBLY__ */
#define ANNOTATE_RETPOLINE_SAFE \
@@ -529,13 +540,14 @@ DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-DECLARE_STATIC_KEY_FALSE(mds_user_clear);
DECLARE_STATIC_KEY_FALSE(mds_idle_clear);
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+extern u16 mds_verw_sel;
+
#include <asm/segment.h>
/**
@@ -562,17 +574,6 @@ static __always_inline void mds_clear_cpu_buffers(void)
}
/**
- * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability
- *
- * Clear CPU buffers if the corresponding static key is enabled
- */
-static __always_inline void mds_user_clear_cpu_buffers(void)
-{
- if (static_branch_likely(&mds_user_clear))
- mds_clear_cpu_buffers();
-}
-
-/**
* mds_idle_clear_cpu_buffers - Mitigation for MDS vulnerability
*
* Clear CPU buffers if the corresponding static key is enabled
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index 4b081e0d3306..363266cbcada 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -13,7 +13,7 @@
#define __GEN_RMWcc(fullop, _var, cc, clobbers, ...) \
({ \
bool c = false; \
- asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \
+ asm goto (fullop "; j" #cc " %l[cc_label]" \
: : [var] "m" (_var), ## __VA_ARGS__ \
: clobbers : cc_label); \
if (0) { \
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index d6cd9344f6c7..48f8dd47cf68 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -205,7 +205,7 @@ static inline void clwb(volatile void *__p)
#ifdef CONFIG_X86_USER_SHADOW_STACK
static inline int write_user_shstk_64(u64 __user *addr, u64 val)
{
- asm_volatile_goto("1: wrussq %[val], (%[addr])\n"
+ asm goto("1: wrussq %[val], (%[addr])\n"
_ASM_EXTABLE(1b, %l[fail])
:: [addr] "r" (addr), [val] "r" (val)
:: fail);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 5c367c1290c3..237dc8cdd12b 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -133,7 +133,7 @@ extern int __get_user_bad(void);
#ifdef CONFIG_X86_32
#define __put_user_goto_u64(x, addr, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: movl %%eax,0(%1)\n" \
"2: movl %%edx,4(%1)\n" \
_ASM_EXTABLE_UA(1b, %l2) \
@@ -295,7 +295,7 @@ do { \
} while (0)
#define __get_user_asm(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: mov"itype" %[umem],%[output]\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: [output] ltype(x) \
@@ -375,7 +375,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg"itype" %[new], %[ptr]\n"\
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -394,7 +394,7 @@ do { \
__typeof__(_ptr) _old = (__typeof__(_ptr))(_pold); \
__typeof__(*(_ptr)) __old = *_old; \
__typeof__(*(_ptr)) __new = (_new); \
- asm_volatile_goto("\n" \
+ asm_goto_output("\n" \
"1: " LOCK_PREFIX "cmpxchg8b %[ptr]\n" \
_ASM_EXTABLE_UA(1b, %l[label]) \
: CC_OUT(z) (success), \
@@ -477,7 +477,7 @@ struct __large_struct { unsigned long buf[100]; };
* aliasing issues.
*/
#define __put_user_goto(x, addr, itype, ltype, label) \
- asm_volatile_goto("\n" \
+ asm goto("\n" \
"1: mov"itype" %0,%1\n" \
_ASM_EXTABLE_UA(1b, %l2) \
: : ltype(x), "m" (__m(addr)) \
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index ab60a71a8dcb..472f0263dbc6 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -4,6 +4,7 @@
#include <linux/seqlock.h>
#include <uapi/asm/vsyscall.h>
+#include <asm/page_types.h>
#ifdef CONFIG_X86_VSYSCALL_EMULATION
extern void map_vsyscall(void);
@@ -24,4 +25,13 @@ static inline bool emulate_vsyscall(unsigned long error_code,
}
#endif
+/*
+ * The (legacy) vsyscall page is the long page in the kernel portion
+ * of the address space that has user-accessible permissions.
+ */
+static inline bool is_vsyscall_vaddr(unsigned long vaddr)
+{
+ return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
+}
+
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index a448d0964fc0..0ad6bda1fc39 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -7,6 +7,8 @@
*
*/
+#include <linux/const.h>
+#include <linux/bits.h>
#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/stddef.h>
@@ -40,7 +42,6 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_MSI
#define __KVM_HAVE_USER_NMI
-#define __KVM_HAVE_GUEST_DEBUG
#define __KVM_HAVE_MSIX
#define __KVM_HAVE_MCE
#define __KVM_HAVE_PIT_STATE2
@@ -49,7 +50,6 @@
#define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS
-#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256
@@ -526,9 +526,271 @@ struct kvm_pmu_event_filter {
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
-#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS BIT(0)
+#define KVM_PMU_EVENT_FLAG_MASKED_EVENTS _BITUL(0)
#define KVM_PMU_EVENT_FLAGS_VALID_MASK (KVM_PMU_EVENT_FLAG_MASKED_EVENTS)
+/* for KVM_CAP_MCE */
+struct kvm_x86_mce {
+ __u64 status;
+ __u64 addr;
+ __u64 misc;
+ __u64 mcg_status;
+ __u8 bank;
+ __u8 pad1[7];
+ __u64 pad2[3];
+};
+
+/* for KVM_CAP_XEN_HVM */
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6)
+#define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7)
+
+struct kvm_xen_hvm_config {
+ __u32 flags;
+ __u32 msr;
+ __u64 blob_addr_32;
+ __u64 blob_addr_64;
+ __u8 blob_size_32;
+ __u8 blob_size_64;
+ __u8 pad2[30];
+};
+
+struct kvm_xen_hvm_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u8 long_mode;
+ __u8 vector;
+ __u8 runstate_update_flag;
+ struct {
+ __u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
+ } shared_info;
+ struct {
+ __u32 send_port;
+ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+ __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
+#define KVM_XEN_EVTCHN_RESET (1 << 2)
+ /*
+ * Events sent by the guest are either looped back to
+ * the guest itself (potentially on a different port#)
+ * or signalled via an eventfd.
+ */
+ union {
+ struct {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+ } port;
+ struct {
+ __u32 port; /* Zero for eventfd */
+ __s32 fd;
+ } eventfd;
+ __u32 padding[4];
+ } deliver;
+ } evtchn;
+ __u32 xen_version;
+ __u64 pad[8];
+ } u;
+};
+
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG 0x5
+
+struct kvm_xen_vcpu_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
+ __u64 pad[8];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
+ __u32 vcpu_id;
+ struct {
+ __u32 port;
+ __u32 priority;
+ __u64 expires_ns;
+ } timer;
+ __u8 vector;
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+ /* Attestation report */
+ KVM_SEV_GET_ATTESTATION_REPORT,
+ /* Guest Migration Extension */
+ KVM_SEV_SEND_CANCEL,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u64 session_uaddr;
+ __u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+};
+
+struct kvm_sev_attestation_report {
+ __u8 mnonce[16];
+ __u64 uaddr;
+ __u32 len;
+};
+
+struct kvm_sev_send_start {
+ __u32 policy;
+ __u64 pdh_cert_uaddr;
+ __u32 pdh_cert_len;
+ __u64 plat_certs_uaddr;
+ __u32 plat_certs_len;
+ __u64 amd_certs_uaddr;
+ __u32 amd_certs_len;
+ __u64 session_uaddr;
+ __u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 pdh_uaddr;
+ __u32 pdh_len;
+ __u64 session_uaddr;
+ __u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
+struct kvm_hyperv_eventfd {
+ __u32 conn_id;
+ __s32 fd;
+ __u32 flags;
+ __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+
/*
* Masked event layout.
* Bits Description
@@ -549,10 +811,10 @@ struct kvm_pmu_event_filter {
((__u64)(!!(exclude)) << 55))
#define KVM_PMU_MASKED_ENTRY_EVENT_SELECT \
- (GENMASK_ULL(7, 0) | GENMASK_ULL(35, 32))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (GENMASK_ULL(63, 56))
-#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (GENMASK_ULL(15, 8))
-#define KVM_PMU_MASKED_ENTRY_EXCLUDE (BIT_ULL(55))
+ (__GENMASK_ULL(7, 0) | __GENMASK_ULL(35, 32))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MASK (__GENMASK_ULL(63, 56))
+#define KVM_PMU_MASKED_ENTRY_UMASK_MATCH (__GENMASK_ULL(15, 8))
+#define KVM_PMU_MASKED_ENTRY_EXCLUDE (_BITULL(55))
#define KVM_PMU_MASKED_ENTRY_UMASK_MASK_SHIFT (56)
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
@@ -560,7 +822,7 @@ struct kvm_pmu_event_filter {
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
/* x86-specific KVM_EXIT_HYPERCALL flags. */
-#define KVM_EXIT_HYPERCALL_LONG_MODE BIT(0)
+#define KVM_EXIT_HYPERCALL_LONG_MODE _BITULL(0)
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_SW_PROTECTED_VM 1
diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h
index 6e64b27b2c1e..6bc3456a8ebf 100644
--- a/arch/x86/include/uapi/asm/kvm_para.h
+++ b/arch/x86/include/uapi/asm/kvm_para.h
@@ -92,7 +92,7 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
/* MSR_KVM_ASYNC_PF_INT */
-#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0)
+#define KVM_ASYNC_PF_VEC_MASK __GENMASK(7, 0)
/* MSR_KVM_MIGRATION_CONTROL */
#define KVM_MIGRATION_READY (1 << 0)
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index bb0ab8466b91..48d049cd74e7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -111,9 +111,6 @@ DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb);
/* Control unconditional IBPB in switch_mm() */
DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb);
-/* Control MDS CPU buffer clear before returning to user space */
-DEFINE_STATIC_KEY_FALSE(mds_user_clear);
-EXPORT_SYMBOL_GPL(mds_user_clear);
/* Control MDS CPU buffer clear before idling (halt, mwait) */
DEFINE_STATIC_KEY_FALSE(mds_idle_clear);
EXPORT_SYMBOL_GPL(mds_idle_clear);
@@ -252,7 +249,7 @@ static void __init mds_select_mitigation(void)
if (!boot_cpu_has(X86_FEATURE_MD_CLEAR))
mds_mitigation = MDS_MITIGATION_VMWERV;
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (!boot_cpu_has(X86_BUG_MSBDS_ONLY) &&
(mds_nosmt || cpu_mitigations_auto_nosmt()))
@@ -356,7 +353,7 @@ static void __init taa_select_mitigation(void)
* For guests that can't determine whether the correct microcode is
* present on host, enable the mitigation for UCODE_NEEDED as well.
*/
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
@@ -424,7 +421,7 @@ static void __init mmio_select_mitigation(void)
*/
if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
boot_cpu_has(X86_FEATURE_RTM)))
- static_branch_enable(&mds_user_clear);
+ setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
else
static_branch_enable(&mmio_stale_data_clear);
@@ -484,12 +481,12 @@ static void __init md_clear_update_mitigation(void)
if (cpu_mitigations_off())
return;
- if (!static_key_enabled(&mds_user_clear))
+ if (!boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
goto out;
/*
- * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
- * mitigation, if necessary.
+ * X86_FEATURE_CLEAR_CPU_BUF is now enabled. Update MDS, TAA and MMIO
+ * Stale Data mitigation, if necessary.
*/
if (mds_mitigation == MDS_MITIGATION_OFF &&
boot_cpu_has_bug(X86_BUG_MDS)) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 0b97bcde70c6..fbc4e60d027c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1589,6 +1589,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
get_cpu_vendor(c);
get_cpu_cap(c);
setup_force_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
cpu_parse_early_param();
if (this_cpu->c_early_init)
@@ -1601,10 +1602,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
this_cpu->c_bsp_init(c);
} else {
setup_clear_cpu_cap(X86_FEATURE_CPUID);
+ get_cpu_address_sizes(c);
}
- get_cpu_address_sizes(c);
-
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
cpu_set_bug_bits(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index a927a8fc9624..40dec9b56f87 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -184,6 +184,90 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
return false;
}
+#define MSR_IA32_TME_ACTIVATE 0x982
+
+/* Helpers to access TME_ACTIVATE MSR */
+#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
+#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
+
+#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
+#define TME_ACTIVATE_POLICY_AES_XTS_128 0
+
+#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
+
+#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
+#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
+
+/* Values for mktme_status (SW only construct) */
+#define MKTME_ENABLED 0
+#define MKTME_DISABLED 1
+#define MKTME_UNINITIALIZED 2
+static int mktme_status = MKTME_UNINITIALIZED;
+
+static void detect_tme_early(struct cpuinfo_x86 *c)
+{
+ u64 tme_activate, tme_policy, tme_crypto_algs;
+ int keyid_bits = 0, nr_keyids = 0;
+ static u64 tme_activate_cpu0 = 0;
+
+ rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
+
+ if (mktme_status != MKTME_UNINITIALIZED) {
+ if (tme_activate != tme_activate_cpu0) {
+ /* Broken BIOS? */
+ pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
+ pr_err_once("x86/tme: MKTME is not usable\n");
+ mktme_status = MKTME_DISABLED;
+
+ /* Proceed. We may need to exclude bits from x86_phys_bits. */
+ }
+ } else {
+ tme_activate_cpu0 = tme_activate;
+ }
+
+ if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
+ pr_info_once("x86/tme: not enabled by BIOS\n");
+ mktme_status = MKTME_DISABLED;
+ return;
+ }
+
+ if (mktme_status != MKTME_UNINITIALIZED)
+ goto detect_keyid_bits;
+
+ pr_info("x86/tme: enabled by BIOS\n");
+
+ tme_policy = TME_ACTIVATE_POLICY(tme_activate);
+ if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
+ pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
+
+ tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
+ if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
+ pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
+ tme_crypto_algs);
+ mktme_status = MKTME_DISABLED;
+ }
+detect_keyid_bits:
+ keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
+ nr_keyids = (1UL << keyid_bits) - 1;
+ if (nr_keyids) {
+ pr_info_once("x86/mktme: enabled by BIOS\n");
+ pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
+ } else {
+ pr_info_once("x86/mktme: disabled by BIOS\n");
+ }
+
+ if (mktme_status == MKTME_UNINITIALIZED) {
+ /* MKTME is usable */
+ mktme_status = MKTME_ENABLED;
+ }
+
+ /*
+ * KeyID bits effectively lower the number of physical address
+ * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
+ */
+ c->x86_phys_bits -= keyid_bits;
+}
+
static void early_init_intel(struct cpuinfo_x86 *c)
{
u64 misc_enable;
@@ -322,6 +406,13 @@ static void early_init_intel(struct cpuinfo_x86 *c)
*/
if (detect_extended_topology_early(c) < 0)
detect_ht_early(c);
+
+ /*
+ * Adjust the number of physical bits early because it affects the
+ * valid bits of the MTRR mask registers.
+ */
+ if (cpu_has(c, X86_FEATURE_TME))
+ detect_tme_early(c);
}
static void bsp_init_intel(struct cpuinfo_x86 *c)
@@ -482,90 +573,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-#define MSR_IA32_TME_ACTIVATE 0x982
-
-/* Helpers to access TME_ACTIVATE MSR */
-#define TME_ACTIVATE_LOCKED(x) (x & 0x1)
-#define TME_ACTIVATE_ENABLED(x) (x & 0x2)
-
-#define TME_ACTIVATE_POLICY(x) ((x >> 4) & 0xf) /* Bits 7:4 */
-#define TME_ACTIVATE_POLICY_AES_XTS_128 0
-
-#define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */
-
-#define TME_ACTIVATE_CRYPTO_ALGS(x) ((x >> 48) & 0xffff) /* Bits 63:48 */
-#define TME_ACTIVATE_CRYPTO_AES_XTS_128 1
-
-/* Values for mktme_status (SW only construct) */
-#define MKTME_ENABLED 0
-#define MKTME_DISABLED 1
-#define MKTME_UNINITIALIZED 2
-static int mktme_status = MKTME_UNINITIALIZED;
-
-static void detect_tme(struct cpuinfo_x86 *c)
-{
- u64 tme_activate, tme_policy, tme_crypto_algs;
- int keyid_bits = 0, nr_keyids = 0;
- static u64 tme_activate_cpu0 = 0;
-
- rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate);
-
- if (mktme_status != MKTME_UNINITIALIZED) {
- if (tme_activate != tme_activate_cpu0) {
- /* Broken BIOS? */
- pr_err_once("x86/tme: configuration is inconsistent between CPUs\n");
- pr_err_once("x86/tme: MKTME is not usable\n");
- mktme_status = MKTME_DISABLED;
-
- /* Proceed. We may need to exclude bits from x86_phys_bits. */
- }
- } else {
- tme_activate_cpu0 = tme_activate;
- }
-
- if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) {
- pr_info_once("x86/tme: not enabled by BIOS\n");
- mktme_status = MKTME_DISABLED;
- return;
- }
-
- if (mktme_status != MKTME_UNINITIALIZED)
- goto detect_keyid_bits;
-
- pr_info("x86/tme: enabled by BIOS\n");
-
- tme_policy = TME_ACTIVATE_POLICY(tme_activate);
- if (tme_policy != TME_ACTIVATE_POLICY_AES_XTS_128)
- pr_warn("x86/tme: Unknown policy is active: %#llx\n", tme_policy);
-
- tme_crypto_algs = TME_ACTIVATE_CRYPTO_ALGS(tme_activate);
- if (!(tme_crypto_algs & TME_ACTIVATE_CRYPTO_AES_XTS_128)) {
- pr_err("x86/mktme: No known encryption algorithm is supported: %#llx\n",
- tme_crypto_algs);
- mktme_status = MKTME_DISABLED;
- }
-detect_keyid_bits:
- keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate);
- nr_keyids = (1UL << keyid_bits) - 1;
- if (nr_keyids) {
- pr_info_once("x86/mktme: enabled by BIOS\n");
- pr_info_once("x86/mktme: %d KeyIDs available\n", nr_keyids);
- } else {
- pr_info_once("x86/mktme: disabled by BIOS\n");
- }
-
- if (mktme_status == MKTME_UNINITIALIZED) {
- /* MKTME is usable */
- mktme_status = MKTME_ENABLED;
- }
-
- /*
- * KeyID bits effectively lower the number of physical address
- * bits. Update cpuinfo_x86::x86_phys_bits accordingly.
- */
- c->x86_phys_bits -= keyid_bits;
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -702,9 +709,6 @@ static void init_intel(struct cpuinfo_x86 *c)
init_ia32_feat_ctl(c);
- if (cpu_has(c, X86_FEATURE_TME))
- detect_tme(c);
-
init_intel_misc_features(c);
split_lock_init();
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index fb8cf953380d..b66f540de054 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1017,10 +1017,12 @@ void __init e820__reserve_setup_data(void)
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
/*
- * SETUP_EFI and SETUP_IMA are supplied by kexec and do not need
- * to be reserved.
+ * SETUP_EFI, SETUP_IMA and SETUP_RNG_SEED are supplied by
+ * kexec and do not need to be reserved.
*/
- if (data->type != SETUP_EFI && data->type != SETUP_IMA)
+ if (data->type != SETUP_EFI &&
+ data->type != SETUP_IMA &&
+ data->type != SETUP_RNG_SEED)
e820__range_update_kexec(pa_data,
sizeof(*data) + data->len,
E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 558076dbde5b..247f2225aa9f 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -274,12 +274,13 @@ static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures,
* Attempt to restore the FPU registers directly from user memory.
* Pagefaults are handled and any errors returned are fatal.
*/
-static bool restore_fpregs_from_user(void __user *buf, u64 xrestore,
- bool fx_only, unsigned int size)
+static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only)
{
struct fpu *fpu = &current->thread.fpu;
int ret;
+ /* Restore enabled features only. */
+ xrestore &= fpu->fpstate->user_xfeatures;
retry:
fpregs_lock();
/* Ensure that XFD is up to date */
@@ -309,7 +310,7 @@ retry:
if (ret != X86_TRAP_PF)
return false;
- if (!fault_in_readable(buf, size))
+ if (!fault_in_readable(buf, fpu->fpstate->user_size))
goto retry;
return false;
}
@@ -339,7 +340,6 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
struct user_i387_ia32_struct env;
bool success, fx_only = false;
union fpregs_state *fpregs;
- unsigned int state_size;
u64 user_xfeatures = 0;
if (use_xsave()) {
@@ -349,17 +349,14 @@ static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx,
return false;
fx_only = !fx_sw_user.magic1;
- state_size = fx_sw_user.xstate_size;
user_xfeatures = fx_sw_user.xfeatures;
} else {
user_xfeatures = XFEATURE_MASK_FPSSE;
- state_size = fpu->fpstate->user_size;
}
if (likely(!ia32_fxstate)) {
/* Restore the FPU registers directly from user memory. */
- return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only,
- state_size);
+ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only);
}
/*
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 660b601f1d6c..d2bc67cbaf92 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -153,7 +153,7 @@ static const __initconst struct idt_data apic_idts[] = {
#ifdef CONFIG_X86_LOCAL_APIC
INTG(LOCAL_TIMER_VECTOR, asm_sysvec_apic_timer_interrupt),
INTG(X86_PLATFORM_IPI_VECTOR, asm_sysvec_x86_platform_ipi),
-# ifdef CONFIG_HAVE_KVM
+# if IS_ENABLED(CONFIG_KVM)
INTG(POSTED_INTR_VECTOR, asm_sysvec_kvm_posted_intr_ipi),
INTG(POSTED_INTR_WAKEUP_VECTOR, asm_sysvec_kvm_posted_intr_wakeup_ipi),
INTG(POSTED_INTR_NESTED_VECTOR, asm_sysvec_kvm_posted_intr_nested_ipi),
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 11761c124545..35fde0107901 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -164,7 +164,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
#if defined(CONFIG_X86_IO_APIC)
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
#endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
seq_printf(p, "%*s: ", prec, "PIN");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
@@ -290,7 +290,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platform_ipi)
}
#endif
-#ifdef CONFIG_HAVE_KVM
+#if IS_ENABLED(CONFIG_KVM)
static void dummy_handler(void) {}
static void (*kvm_posted_intr_wakeup_handler)(void) = dummy_handler;
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index dfe9945b9bec..428ee74002e1 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -434,7 +434,8 @@ static void __init sev_map_percpu_data(void)
{
int cpu;
- if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
+ if (cc_vendor != CC_VENDOR_AMD ||
+ !cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
return;
for_each_possible_cpu(cpu) {
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 17e955ab69fe..3082cf24b69e 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -563,9 +563,6 @@ nmi_restart:
}
if (this_cpu_dec_return(nmi_state))
goto nmi_restart;
-
- if (user_mode(regs))
- mds_user_clear_cpu_buffers();
}
#if IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 87e3da7b0439..8c3032a96caf 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -7,7 +7,6 @@ source "virt/kvm/Kconfig"
menuconfig VIRTUALIZATION
bool "Virtualization"
- depends on HAVE_KVM || X86
default y
help
Say Y here to get to see options for using your Linux host to run other
@@ -20,7 +19,6 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
- depends on HAVE_KVM
depends on HIGH_RES_TIMERS
depends on X86_LOCAL_APIC
select KVM_COMMON
@@ -29,9 +27,9 @@ config KVM
select HAVE_KVM_PFNCACHE
select HAVE_KVM_DIRTY_RING_TSO
select HAVE_KVM_DIRTY_RING_ACQ_REL
- select IRQ_BYPASS_MANAGER
select HAVE_KVM_IRQ_BYPASS
select HAVE_KVM_IRQ_ROUTING
+ select HAVE_KVM_READONLY_MEM
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
@@ -80,9 +78,10 @@ config KVM_SW_PROTECTED_VM
depends on KVM && X86_64
select KVM_GENERIC_PRIVATE_MEM
help
- Enable support for KVM software-protected VMs. Currently "protected"
- means the VM can be backed with memory provided by
- KVM_CREATE_GUEST_MEMFD.
+ Enable support for KVM software-protected VMs. Currently, software-
+ protected VMs are purely a development and testing vehicle for
+ KVM_CREATE_GUEST_MEMFD. Attempting to run a "real" VM workload as a
+ software-protected VM will fail miserably.
If unsure, say "N".
diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c
index 95ea1a1f7403..999227fc7c66 100644
--- a/arch/x86/kvm/debugfs.c
+++ b/arch/x86/kvm/debugfs.c
@@ -189,9 +189,8 @@ static const struct file_operations mmu_rmaps_stat_fops = {
.release = kvm_mmu_rmaps_stat_release,
};
-int kvm_arch_create_vm_debugfs(struct kvm *kvm)
+void kvm_arch_create_vm_debugfs(struct kvm *kvm)
{
debugfs_create_file("mmu_rmaps_stat", 0644, kvm->debugfs_dentry, kvm,
&mmu_rmaps_stat_fops);
- return 0;
}
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 4943f6b2bbee..8a47f8541eab 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1322,6 +1322,56 @@ static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
return false;
}
+#define KVM_HV_WIN2016_GUEST_ID 0x1040a00003839
+#define KVM_HV_WIN2016_GUEST_ID_MASK (~GENMASK_ULL(23, 16)) /* mask out the service version */
+
+/*
+ * Hyper-V enabled Windows Server 2016 SMP VMs fail to boot in !XSAVES && XSAVEC
+ * configuration.
+ * Such configuration can result from, for example, AMD Erratum 1386 workaround.
+ *
+ * Print a notice so users aren't left wondering what's suddenly gone wrong.
+ */
+static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_hv *hv = to_kvm_hv(kvm);
+
+ /* Check again under the hv_lock. */
+ if (hv->xsaves_xsavec_checked)
+ return;
+
+ if ((hv->hv_guest_os_id & KVM_HV_WIN2016_GUEST_ID_MASK) !=
+ KVM_HV_WIN2016_GUEST_ID)
+ return;
+
+ hv->xsaves_xsavec_checked = true;
+
+ /* UP configurations aren't affected */
+ if (atomic_read(&kvm->online_vcpus) < 2)
+ return;
+
+ if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
+ !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC))
+ return;
+
+ pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. "
+ "If it fails to boot try disabling XSAVEC in the VM config.\n");
+}
+
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
+{
+ struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
+
+ if (!vcpu->arch.hyperv_enabled ||
+ hv->xsaves_xsavec_checked)
+ return;
+
+ mutex_lock(&hv->hv_lock);
+ __kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+ mutex_unlock(&hv->hv_lock);
+}
+
static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
bool host)
{
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 1dc0b6604526..923e64903da9 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -182,6 +182,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock);
void kvm_hv_request_tsc_page_update(struct kvm *kvm);
+void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu);
+
void kvm_hv_init_vm(struct kvm *kvm);
void kvm_hv_destroy_vm(struct kvm *kvm);
int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
@@ -267,6 +269,7 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
static inline void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock) {}
static inline void kvm_hv_request_tsc_page_update(struct kvm *kvm) {}
+static inline void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu) {}
static inline void kvm_hv_init_vm(struct kvm *kvm) {}
static inline void kvm_hv_destroy_vm(struct kvm *kvm) {}
static inline int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 36c8af87a707..4e725854c63a 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -8,7 +8,7 @@
#define svm_asm(insn, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) "\n\t" \
+ asm goto("1: " __stringify(insn) "\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
::: clobber : fault); \
return; \
@@ -18,7 +18,7 @@ fault: \
#define svm_asm1(insn, op1, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1 : clobber : fault); \
return; \
@@ -28,7 +28,7 @@ fault: \
#define svm_asm2(insn, op1, op2, clobber...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
:: op1, op2 : clobber : fault); \
return; \
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a6216c874729..315c7c2ba89b 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -71,7 +71,7 @@ static int fixed_pmc_events[] = {
static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
{
struct kvm_pmc *pmc;
- u8 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
+ u64 old_fixed_ctr_ctrl = pmu->fixed_ctr_ctrl;
int i;
pmu->fixed_ctr_ctrl = data;
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index edc3f16cc189..6a9bfdfbb6e5 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -2,7 +2,10 @@
#ifndef __KVM_X86_VMX_RUN_FLAGS_H
#define __KVM_X86_VMX_RUN_FLAGS_H
-#define VMX_RUN_VMRESUME (1 << 0)
-#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+#define VMX_RUN_VMRESUME_SHIFT 0
+#define VMX_RUN_SAVE_SPEC_CTRL_SHIFT 1
+
+#define VMX_RUN_VMRESUME BIT(VMX_RUN_VMRESUME_SHIFT)
+#define VMX_RUN_SAVE_SPEC_CTRL BIT(VMX_RUN_SAVE_SPEC_CTRL_SHIFT)
#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 906ecd001511..2bfbf758d061 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -139,7 +139,7 @@ SYM_FUNC_START(__vmx_vcpu_run)
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- test $VMX_RUN_VMRESUME, %ebx
+ bt $VMX_RUN_VMRESUME_SHIFT, %ebx
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
@@ -161,8 +161,11 @@ SYM_FUNC_START(__vmx_vcpu_run)
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Check EFLAGS.ZF from 'test VMX_RUN_VMRESUME' above */
- jz .Lvmlaunch
+ /* Clobbers EFLAGS.ZF */
+ CLEAR_CPU_BUFFERS
+
+ /* Check EFLAGS.CF from the VMX_RUN_VMRESUME bit test above. */
+ jnc .Lvmlaunch
/*
* After a successful VMRESUME/VMLAUNCH, control flow "magically"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e262bc2ba4e5..88a4ff200d04 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -388,7 +388,16 @@ static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
{
- vmx->disable_fb_clear = (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
+ /*
+ * Disable VERW's behavior of clearing CPU buffers for the guest if the
+ * CPU isn't affected by MDS/TAA, and the host hasn't forcefully enabled
+ * the mitigation. Disabling the clearing behavior provides a
+ * performance boost for guests that aren't aware that manually clearing
+ * CPU buffers is unnecessary, at the cost of MSR accesses on VM-Entry
+ * and VM-Exit.
+ */
+ vmx->disable_fb_clear = !cpu_feature_enabled(X86_FEATURE_CLEAR_CPU_BUF) &&
+ (host_arch_capabilities & ARCH_CAP_FB_CLEAR_CTRL) &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_TAA);
@@ -738,7 +747,7 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
*/
static int kvm_cpu_vmxoff(void)
{
- asm_volatile_goto("1: vmxoff\n\t"
+ asm goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);
@@ -2784,7 +2793,7 @@ static int kvm_cpu_vmxon(u64 vmxon_pointer)
cr4_set_bits(X86_CR4_VMXE);
- asm_volatile_goto("1: vmxon %[vmxon_pointer]\n\t"
+ asm goto("1: vmxon %[vmxon_pointer]\n\t"
_ASM_EXTABLE(1b, %l[fault])
: : [vmxon_pointer] "m"(vmxon_pointer)
: : fault);
@@ -7224,11 +7233,14 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
guest_state_enter_irqoff();
- /* L1D Flush includes CPU buffer clear to mitigate MDS */
+ /*
+ * L1D Flush includes CPU buffer clear to mitigate MDS, but VERW
+ * mitigation for MDS is done late in VMentry and is still
+ * executed in spite of L1D Flush. This is because an extra VERW
+ * should not matter much after the big hammer L1D Flush.
+ */
if (static_branch_unlikely(&vmx_l1d_should_flush))
vmx_l1d_flush(vcpu);
- else if (static_branch_unlikely(&mds_user_clear))
- mds_clear_cpu_buffers();
else if (static_branch_unlikely(&mmio_stale_data_clear) &&
kvm_arch_has_assigned_device(vcpu->kvm))
mds_clear_cpu_buffers();
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index f41ce3c24123..8060e5fc6dbd 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -94,7 +94,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
- asm_volatile_goto("1: vmread %[field], %[output]\n\t"
+ asm_goto_output("1: vmread %[field], %[output]\n\t"
"jna %l[do_fail]\n\t"
_ASM_EXTABLE(1b, %l[do_exception])
@@ -188,7 +188,7 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
#define vmx_asm1(insn, op1, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %0\n\t" \
+ asm goto("1: " __stringify(insn) " %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
@@ -205,7 +205,7 @@ fault: \
#define vmx_asm2(insn, op1, op2, error_args...) \
do { \
- asm_volatile_goto("1: " __stringify(insn) " %1, %0\n\t" \
+ asm goto("1: " __stringify(insn) " %1, %0\n\t" \
".byte 0x2e\n\t" /* branch not taken hint */ \
"jna %l[error]\n\t" \
_ASM_EXTABLE(1b, %l[fault]) \
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 363b1c080205..3638a104bcf7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1704,22 +1704,17 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
struct kvm_msr_entry msr;
int r;
+ /* Unconditionally clear the output for simplicity */
+ msr.data = 0;
msr.index = index;
r = kvm_get_msr_feature(&msr);
- if (r == KVM_MSR_RET_INVALID) {
- /* Unconditionally clear the output for simplicity */
- *data = 0;
- if (kvm_msr_ignored_check(index, 0, false))
- r = 0;
- }
-
- if (r)
- return r;
+ if (r == KVM_MSR_RET_INVALID && kvm_msr_ignored_check(index, 0, false))
+ r = 0;
*data = msr.data;
- return 0;
+ return r;
}
static bool __kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer)
@@ -1782,6 +1777,10 @@ static int set_efer(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if ((efer ^ old_efer) & KVM_MMU_EFER_ROLE_BITS)
kvm_mmu_reset_context(vcpu);
+ if (!static_cpu_has(X86_FEATURE_XSAVES) &&
+ (efer & EFER_SVME))
+ kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
+
return 0;
}
@@ -2507,7 +2506,7 @@ static u64 compute_guest_tsc(struct kvm_vcpu *vcpu, s64 kernel_ns)
}
#ifdef CONFIG_X86_64
-static inline int gtod_is_based_on_tsc(int mode)
+static inline bool gtod_is_based_on_tsc(int mode)
{
return mode == VDSO_CLOCKMODE_TSC || mode == VDSO_CLOCKMODE_HVCLOCK;
}
@@ -4581,7 +4580,7 @@ static bool kvm_is_vm_type_supported(unsigned long type)
{
return type == KVM_X86_DEFAULT_VM ||
(type == KVM_X86_SW_PROTECTED_VM &&
- IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_enabled);
+ IS_ENABLED(CONFIG_KVM_SW_PROTECTED_VM) && tdp_mmu_enabled);
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@@ -5454,7 +5453,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING) {
vcpu->arch.nmi_pending = 0;
atomic_set(&vcpu->arch.nmi_queued, events->nmi.pending);
- kvm_make_request(KVM_REQ_NMI, vcpu);
+ if (events->nmi.pending)
+ kvm_make_request(KVM_REQ_NMI, vcpu);
}
static_call(kvm_x86_set_nmi_mask)(vcpu, events->nmi.masked);
@@ -7016,6 +7016,9 @@ set_identity_unlock:
r = -EEXIST;
if (kvm->arch.vpit)
goto create_pit_unlock;
+ r = -ENOENT;
+ if (!pic_in_kernel(kvm))
+ goto create_pit_unlock;
r = -ENOMEM;
kvm->arch.vpit = kvm_create_pit(kvm, u.pit_config.flags);
if (kvm->arch.vpit)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 20ef350a60fb..10d5ed8b5990 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -163,23 +163,23 @@ SYM_CODE_END(__get_user_8_handle_exception)
#endif
/* get_user */
- _ASM_EXTABLE(1b, __get_user_handle_exception)
- _ASM_EXTABLE(2b, __get_user_handle_exception)
- _ASM_EXTABLE(3b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(1b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(2b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(3b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(4b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(4b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(4b, __get_user_8_handle_exception)
- _ASM_EXTABLE(5b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(4b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(5b, __get_user_8_handle_exception)
#endif
/* __get_user */
- _ASM_EXTABLE(6b, __get_user_handle_exception)
- _ASM_EXTABLE(7b, __get_user_handle_exception)
- _ASM_EXTABLE(8b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(6b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(7b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(8b, __get_user_handle_exception)
#ifdef CONFIG_X86_64
- _ASM_EXTABLE(9b, __get_user_handle_exception)
+ _ASM_EXTABLE_UA(9b, __get_user_handle_exception)
#else
- _ASM_EXTABLE(9b, __get_user_8_handle_exception)
- _ASM_EXTABLE(10b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(9b, __get_user_8_handle_exception)
+ _ASM_EXTABLE_UA(10b, __get_user_8_handle_exception)
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 2877f5934177..975c9c18263d 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -133,15 +133,15 @@ SYM_CODE_START_LOCAL(__put_user_handle_exception)
RET
SYM_CODE_END(__put_user_handle_exception)
- _ASM_EXTABLE(1b, __put_user_handle_exception)
- _ASM_EXTABLE(2b, __put_user_handle_exception)
- _ASM_EXTABLE(3b, __put_user_handle_exception)
- _ASM_EXTABLE(4b, __put_user_handle_exception)
- _ASM_EXTABLE(5b, __put_user_handle_exception)
- _ASM_EXTABLE(6b, __put_user_handle_exception)
- _ASM_EXTABLE(7b, __put_user_handle_exception)
- _ASM_EXTABLE(9b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(1b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(2b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(3b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(4b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(5b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(6b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(7b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(9b, __put_user_handle_exception)
#ifdef CONFIG_X86_32
- _ASM_EXTABLE(8b, __put_user_handle_exception)
- _ASM_EXTABLE(10b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(8b, __put_user_handle_exception)
+ _ASM_EXTABLE_UA(10b, __put_user_handle_exception)
#endif
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 679b09cfe241..d6375b3c633b 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -798,15 +798,6 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
show_opcodes(regs, loglvl);
}
-/*
- * The (legacy) vsyscall page is the long page in the kernel portion
- * of the address space that has user-accessible permissions.
- */
-static bool is_vsyscall_vaddr(unsigned long vaddr)
-{
- return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
-}
-
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
unsigned long address, u32 pkey, int si_code)
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 968d7005f4a7..f50cc210a981 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -26,18 +26,31 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
for (; addr < end; addr = next) {
pud_t *pud = pud_page + pud_index(addr);
pmd_t *pmd;
+ bool use_gbpage;
next = (addr & PUD_MASK) + PUD_SIZE;
if (next > end)
next = end;
- if (info->direct_gbpages) {
- pud_t pudval;
+ /* if this is already a gbpage, this portion is already mapped */
+ if (pud_large(*pud))
+ continue;
+
+ /* Is using a gbpage allowed? */
+ use_gbpage = info->direct_gbpages;
- if (pud_present(*pud))
- continue;
+ /* Don't use gbpage if it maps more than the requested region. */
+ /* at the begining: */
+ use_gbpage &= ((addr & ~PUD_MASK) == 0);
+ /* ... or at the end: */
+ use_gbpage &= ((next & ~PUD_MASK) == 0);
+
+ /* Never overwrite existing mappings */
+ use_gbpage &= !pud_present(*pud);
+
+ if (use_gbpage) {
+ pud_t pudval;
- addr &= PUD_MASK;
pudval = __pud((addr - info->offset) | info->page_flag);
set_pud(pud, pudval);
continue;
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
index 6993f026adec..42115ac079cf 100644
--- a/arch/x86/mm/maccess.c
+++ b/arch/x86/mm/maccess.c
@@ -3,6 +3,8 @@
#include <linux/uaccess.h>
#include <linux/kernel.h>
+#include <asm/vsyscall.h>
+
#ifdef CONFIG_X86_64
bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
{
@@ -16,6 +18,14 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size)
return false;
/*
+ * Reading from the vsyscall page may cause an unhandled fault in
+ * certain cases. Though it is at an address above TASK_SIZE_MAX, it is
+ * usually considered as a user space address.
+ */
+ if (is_vsyscall_vaddr(vaddr))
+ return false;
+
+ /*
* Allow everything during early boot before 'x86_virt_bits'
* is initialized. Needed for instruction decoding in early
* exception handlers.
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index adc497b93f03..65e9a6e391c0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -934,7 +934,7 @@ static int __init cmp_memblk(const void *a, const void *b)
const struct numa_memblk *ma = *(const struct numa_memblk **)a;
const struct numa_memblk *mb = *(const struct numa_memblk **)b;
- return ma->start - mb->start;
+ return (ma->start > mb->start) - (ma->start < mb->start);
}
static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
@@ -944,14 +944,12 @@ static struct numa_memblk *numa_memblk_list[NR_NODE_MEMBLKS] __initdata;
* @start: address to begin fill
* @end: address to end fill
*
- * Find and extend numa_meminfo memblks to cover the @start-@end
- * physical address range, such that the first memblk includes
- * @start, the last memblk includes @end, and any gaps in between
- * are filled.
+ * Find and extend numa_meminfo memblks to cover the physical
+ * address range @start-@end
*
* RETURNS:
* 0 : Success
- * NUMA_NO_MEMBLK : No memblk exists in @start-@end range
+ * NUMA_NO_MEMBLK : No memblks exist in address range @start-@end
*/
int __init numa_fill_memblks(u64 start, u64 end)
@@ -963,17 +961,14 @@ int __init numa_fill_memblks(u64 start, u64 end)
/*
* Create a list of pointers to numa_meminfo memblks that
- * overlap start, end. Exclude (start == bi->end) since
- * end addresses in both a CFMWS range and a memblk range
- * are exclusive.
- *
- * This list of pointers is used to make in-place changes
- * that fill out the numa_meminfo memblks.
+ * overlap start, end. The list is used to make in-place
+ * changes that fill out the numa_meminfo memblks.
*/
for (int i = 0; i < mi->nr_blks; i++) {
struct numa_memblk *bi = &mi->blk[i];
- if (start < bi->end && end >= bi->start) {
+ if (memblock_addrs_overlap(start, end - start, bi->start,
+ bi->end - bi->start)) {
blk[count] = &mi->blk[i];
count++;
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 4b0d6fff88de..1fb9a1644d94 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -65,6 +65,8 @@ int xen_smp_intr_init(unsigned int cpu)
char *resched_name, *callfunc_name, *debug_name;
resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
+ if (!resched_name)
+ goto fail_mem;
per_cpu(xen_resched_irq, cpu).name = resched_name;
rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
cpu,
@@ -77,6 +79,8 @@ int xen_smp_intr_init(unsigned int cpu)
per_cpu(xen_resched_irq, cpu).irq = rc;
callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
per_cpu(xen_callfunc_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR,
cpu,
@@ -90,6 +94,9 @@ int xen_smp_intr_init(unsigned int cpu)
if (!xen_fifo_events) {
debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+ if (!debug_name)
+ goto fail_mem;
+
per_cpu(xen_debug_irq, cpu).name = debug_name;
rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu,
xen_debug_interrupt,
@@ -101,6 +108,9 @@ int xen_smp_intr_init(unsigned int cpu)
}
callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu);
+ if (!callfunc_name)
+ goto fail_mem;
+
per_cpu(xen_callfuncsingle_irq, cpu).name = callfunc_name;
rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR,
cpu,
@@ -114,6 +124,8 @@ int xen_smp_intr_init(unsigned int cpu)
return 0;
+ fail_mem:
+ rc = -ENOMEM;
fail:
xen_smp_intr_free(cpu);
return rc;