summaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S19
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c1
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c1
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c1
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c1
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c1
-rw-r--r--arch/x86/events/intel/lbr.c4
-rw-r--r--arch/x86/hyperv/hv_apic.c8
-rw-r--r--arch/x86/include/asm/atomic.h12
-rw-r--r--arch/x86/include/asm/atomic64_32.h8
-rw-r--r--arch/x86/include/asm/atomic64_64.h12
-rw-r--r--arch/x86/include/asm/fixmap.h10
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h16
-rw-r--r--arch/x86/include/asm/kdebug.h12
-rw-r--r--arch/x86/include/asm/kvm_host.h27
-rw-r--r--arch/x86/include/asm/mem_encrypt.h7
-rw-r--r--arch/x86/include/asm/pgtable-2level.h9
-rw-r--r--arch/x86/include/asm/pgtable.h2
-rw-r--r--arch/x86/include/asm/pgtable_64.h23
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h17
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c27
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c53
-rw-r--r--arch/x86/kernel/cpu/microcode/amd.c24
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c17
-rw-r--r--arch/x86/kernel/dumpstack.c11
-rw-r--r--arch/x86/kernel/eisa.c10
-rw-r--r--arch/x86/kernel/head64.c20
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/kvmclock.c52
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/topology.c4
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S19
-rw-r--r--arch/x86/kvm/lapic.c49
-rw-r--r--arch/x86/kvm/mmu.c35
-rw-r--r--arch/x86/kvm/svm.c26
-rw-r--r--arch/x86/kvm/vmx.c181
-rw-r--r--arch/x86/kvm/x86.c129
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/mm/init.c4
-rw-r--r--arch/x86/mm/mem_encrypt.c24
-rw-r--r--arch/x86/mm/pgtable.c17
-rw-r--r--arch/x86/platform/efi/efi_32.c3
-rw-r--r--arch/x86/xen/mmu_pv.c8
-rw-r--r--arch/x86/xen/pmu.c2
50 files changed, 661 insertions, 290 deletions
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index eaa843a52907..a480356e0ed8 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -25,20 +25,6 @@ ENTRY(get_sev_encryption_bit)
push %ebx
push %ecx
push %edx
- push %edi
-
- /*
- * RIP-relative addressing is needed to access the encryption bit
- * variable. Since we are running in 32-bit mode we need this call/pop
- * sequence to get the proper relative addressing.
- */
- call 1f
-1: popl %edi
- subl $1b, %edi
-
- movl enc_bit(%edi), %eax
- cmpl $0, %eax
- jge .Lsev_exit
/* Check if running under a hypervisor */
movl $1, %eax
@@ -69,15 +55,12 @@ ENTRY(get_sev_encryption_bit)
movl %ebx, %eax
andl $0x3f, %eax /* Return the encryption bit location */
- movl %eax, enc_bit(%edi)
jmp .Lsev_exit
.Lno_sev:
xor %eax, %eax
- movl %eax, enc_bit(%edi)
.Lsev_exit:
- pop %edi
pop %edx
pop %ecx
pop %ebx
@@ -113,8 +96,6 @@ ENTRY(set_sev_encryption_mask)
ENDPROC(set_sev_encryption_mask)
.data
-enc_bit:
- .int 0xffffffff
#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index acd11b3bf639..2a356b948720 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -379,7 +379,6 @@ static int __init crypto_aegis128_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 2071c3d1ae07..dbe8bb980da1 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -379,7 +379,6 @@ static int __init crypto_aegis128l_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index b5f2a8fd5a71..8bebda2de92f 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -379,7 +379,6 @@ static int __init crypto_aegis256_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
!boot_cpu_has(X86_FEATURE_AES) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index 95cf857d2cbb..f40244eaf14d 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -40,7 +40,6 @@ MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
static int __init crypto_morus1280_sse2_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 615fb7bc9a32..9afaf8f8565a 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -40,7 +40,6 @@ MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
static int __init crypto_morus640_sse2_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
- !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index f3e006bed9a7..c88ed39582a1 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1272,4 +1272,8 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
x86_pmu.lbr_sel_map = snb_lbr_sel_map;
+
+ /* Knights Landing does have MISPREDICT bit */
+ if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_LIP)
+ x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5b0f613428c2..2c43e3055948 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -95,8 +95,8 @@ static void hv_apic_eoi_write(u32 reg, u32 val)
*/
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
{
- struct ipi_arg_ex **arg;
- struct ipi_arg_ex *ipi_arg;
+ struct hv_send_ipi_ex **arg;
+ struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
int nr_bank = 0;
int ret = 1;
@@ -105,7 +105,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector)
return false;
local_irq_save(flags);
- arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
ipi_arg = *arg;
if (unlikely(!ipi_arg))
@@ -135,7 +135,7 @@ ipi_mask_ex_done:
static bool __send_ipi_mask(const struct cpumask *mask, int vector)
{
int cur_cpu, vcpu;
- struct ipi_arg_non_ex ipi_arg;
+ struct hv_send_ipi ipi_arg;
int ret = 1;
trace_hyperv_send_ipi_mask(mask, vector);
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b143717b92b3..ce84388e540c 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,11 +80,11 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic_sub_and_test arch_atomic_sub_and_test
static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
}
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
/**
* arch_atomic_inc - increment atomic variable
@@ -92,12 +92,12 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic_inc arch_atomic_inc
static __always_inline void arch_atomic_inc(atomic_t *v)
{
asm volatile(LOCK_PREFIX "incl %0"
: "+m" (v->counter));
}
+#define arch_atomic_inc arch_atomic_inc
/**
* arch_atomic_dec - decrement atomic variable
@@ -105,12 +105,12 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic_dec arch_atomic_dec
static __always_inline void arch_atomic_dec(atomic_t *v)
{
asm volatile(LOCK_PREFIX "decl %0"
: "+m" (v->counter));
}
+#define arch_atomic_dec arch_atomic_dec
/**
* arch_atomic_dec_and_test - decrement and test
@@ -120,11 +120,11 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-#define arch_atomic_dec_and_test arch_atomic_dec_and_test
static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
}
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
/**
* arch_atomic_inc_and_test - increment and test
@@ -134,11 +134,11 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic_inc_and_test arch_atomic_inc_and_test
static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
}
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
/**
* arch_atomic_add_negative - add and test if negative
@@ -149,11 +149,11 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-#define arch_atomic_add_negative arch_atomic_add_negative
static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
}
+#define arch_atomic_add_negative arch_atomic_add_negative
/**
* arch_atomic_add_return - add integer and return
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index ef959f02d070..6a5b0ec460da 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -205,12 +205,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic64_inc arch_atomic64_inc
static inline void arch_atomic64_inc(atomic64_t *v)
{
__alternative_atomic64(inc, inc_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
+#define arch_atomic64_inc arch_atomic64_inc
/**
* arch_atomic64_dec - decrement atomic64 variable
@@ -218,12 +218,12 @@ static inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic64_dec arch_atomic64_dec
static inline void arch_atomic64_dec(atomic64_t *v)
{
__alternative_atomic64(dec, dec_return, /* no output */,
"S" (v) : "memory", "eax", "ecx", "edx");
}
+#define arch_atomic64_dec arch_atomic64_dec
/**
* arch_atomic64_add_unless - add unless the number is a given value
@@ -245,7 +245,6 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
return (int)a;
}
-#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
{
int r;
@@ -253,8 +252,8 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
"S" (v) : "ecx", "edx", "memory");
return r;
}
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
{
long long r;
@@ -262,6 +261,7 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
"S" (v) : "ecx", "memory");
return r;
}
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
#undef alternative_atomic64
#undef __alternative_atomic64
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 4343d9b4f30e..5f851d92eecd 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,11 +71,11 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
* true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
}
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
/**
* arch_atomic64_inc - increment atomic64 variable
@@ -83,13 +83,13 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
*
* Atomically increments @v by 1.
*/
-#define arch_atomic64_inc arch_atomic64_inc
static __always_inline void arch_atomic64_inc(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "incq %0"
: "=m" (v->counter)
: "m" (v->counter));
}
+#define arch_atomic64_inc arch_atomic64_inc
/**
* arch_atomic64_dec - decrement atomic64 variable
@@ -97,13 +97,13 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
*
* Atomically decrements @v by 1.
*/
-#define arch_atomic64_dec arch_atomic64_dec
static __always_inline void arch_atomic64_dec(atomic64_t *v)
{
asm volatile(LOCK_PREFIX "decq %0"
: "=m" (v->counter)
: "m" (v->counter));
}
+#define arch_atomic64_dec arch_atomic64_dec
/**
* arch_atomic64_dec_and_test - decrement and test
@@ -113,11 +113,11 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
* returns true if the result is 0, or false for all other
* cases.
*/
-#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
}
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
/**
* arch_atomic64_inc_and_test - increment and test
@@ -127,11 +127,11 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
* and returns true if the result is zero, or false for all
* other cases.
*/
-#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
{
GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
}
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
/**
* arch_atomic64_add_negative - add and test if negative
@@ -142,11 +142,11 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
* if the result is negative, or false when
* result is greater than or equal to zero.
*/
-#define arch_atomic64_add_negative arch_atomic64_add_negative
static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
{
GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
}
+#define arch_atomic64_add_negative arch_atomic64_add_negative
/**
* arch_atomic64_add_return - add and return
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index e203169931c7..6390bd8c141b 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -14,6 +14,16 @@
#ifndef _ASM_X86_FIXMAP_H
#define _ASM_X86_FIXMAP_H
+/*
+ * Exposed to assembly code for setting up initial page tables. Cannot be
+ * calculated in assembly code (fixmap entries are an enum), but is sanity
+ * checked in the actual fixmap C code to make sure that the fixmap is
+ * covered fully.
+ */
+#define FIXMAP_PMD_NUM 2
+/* fixmap starts downwards from the 507th entry in level2_fixmap_pgt */
+#define FIXMAP_PMD_TOP 507
+
#ifndef __ASSEMBLY__
#include <linux/kernel.h>
#include <asm/acpi.h>
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index e977b6b3a538..00e01d215f74 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -726,19 +726,21 @@ struct hv_enlightened_vmcs {
#define HV_STIMER_AUTOENABLE (1ULL << 3)
#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F)
-struct ipi_arg_non_ex {
- u32 vector;
- u32 reserved;
- u64 cpu_mask;
-};
-
struct hv_vpset {
u64 format;
u64 valid_bank_mask;
u64 bank_contents[];
};
-struct ipi_arg_ex {
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+ u32 vector;
+ u32 reserved;
+ u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
u32 vector;
u32 reserved;
struct hv_vpset vp_set;
diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 395c9631e000..75f1e35e7c15 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -22,10 +22,20 @@ enum die_val {
DIE_NMIUNKNOWN,
};
+enum show_regs_mode {
+ SHOW_REGS_SHORT,
+ /*
+ * For when userspace crashed, but we don't think it's our fault, and
+ * therefore don't print kernel registers.
+ */
+ SHOW_REGS_USER,
+ SHOW_REGS_ALL
+};
+
extern void die(const char *, struct pt_regs *,long);
extern int __must_check __die(const char *, struct pt_regs *, long);
extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, int all);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
extern void show_iret_regs(struct pt_regs *regs);
extern unsigned long oops_begin(void);
extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 00ddb0c9e612..09b2e3e2cf1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -869,6 +869,8 @@ struct kvm_arch {
bool x2apic_format;
bool x2apic_broadcast_quirk_disabled;
+
+ bool guest_can_read_msr_platform_info;
};
struct kvm_vm_stat {
@@ -1022,6 +1024,7 @@ struct kvm_x86_ops {
void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr);
+ bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
@@ -1055,6 +1058,7 @@ struct kvm_x86_ops {
bool (*umip_emulated)(void);
int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
+ void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
@@ -1237,19 +1241,12 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
-#define EMULTYPE_RETRY (1 << 3)
-#define EMULTYPE_NO_REEXECUTE (1 << 4)
-#define EMULTYPE_NO_UD_ON_FAIL (1 << 5)
-#define EMULTYPE_VMWARE (1 << 6)
-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
- int emulation_type, void *insn, int insn_len);
-
-static inline int emulate_instruction(struct kvm_vcpu *vcpu,
- int emulation_type)
-{
- return x86_emulate_instruction(vcpu, 0,
- emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
-}
+#define EMULTYPE_ALLOW_RETRY (1 << 3)
+#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
+#define EMULTYPE_VMWARE (1 << 5)
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+ void *insn, int insn_len);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
@@ -1450,7 +1447,6 @@ asmlinkage void kvm_spurious_fault(void);
____kvm_handle_fault_on_reboot(insn, "")
#define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@@ -1463,7 +1459,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
- unsigned long ipi_bitmap_high, int min,
+ unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
u64 kvm_get_arch_capabilities(void);
@@ -1490,6 +1486,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
+void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
int kvm_is_in_guest(void);
diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h
index c0643831706e..616f8e637bc3 100644
--- a/arch/x86/include/asm/mem_encrypt.h
+++ b/arch/x86/include/asm/mem_encrypt.h
@@ -48,10 +48,13 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
/* Architecture __weak replacement functions */
void __init mem_encrypt_init(void);
+void __init mem_encrypt_free_decrypted_mem(void);
bool sme_active(void);
bool sev_active(void);
+#define __bss_decrypted __attribute__((__section__(".bss..decrypted")))
+
#else /* !CONFIG_AMD_MEM_ENCRYPT */
#define sme_me_mask 0ULL
@@ -77,6 +80,8 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0;
static inline int __init
early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; }
+#define __bss_decrypted
+
#endif /* CONFIG_AMD_MEM_ENCRYPT */
/*
@@ -88,6 +93,8 @@ early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0;
#define __sme_pa(x) (__pa(x) | sme_me_mask)
#define __sme_pa_nodebug(x) (__pa_nodebug(x) | sme_me_mask)
+extern char __start_bss_decrypted[], __end_bss_decrypted[], __start_bss_decrypted_unused[];
+
#endif /* __ASSEMBLY__ */
#endif /* __X86_MEM_ENCRYPT_H__ */
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h
index 24c6cf5f16b7..60d0f9015317 100644
--- a/arch/x86/include/asm/pgtable-2level.h
+++ b/arch/x86/include/asm/pgtable-2level.h
@@ -19,9 +19,6 @@ static inline void native_set_pte(pte_t *ptep , pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pmd.pud.p4d.pgd = pti_set_user_pgtbl(&pmdp->pud.p4d.pgd, pmd.pud.p4d.pgd);
-#endif
*pmdp = pmd;
}
@@ -61,9 +58,6 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp)
#ifdef CONFIG_SMP
static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pti_set_user_pgtbl(&xp->pud.p4d.pgd, __pgd(0));
-#endif
return __pmd(xchg((pmdval_t *)xp, 0));
}
#else
@@ -73,9 +67,6 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#ifdef CONFIG_SMP
static inline pud_t native_pudp_get_and_clear(pud_t *xp)
{
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
- pti_set_user_pgtbl(&xp->p4d.pgd, __pgd(0));
-#endif
return __pud(xchg((pudval_t *)xp, 0));
}
#else
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e4ffa565a69f..690c0307afed 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1195,7 +1195,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
return xchg(pmdp, pmd);
} else {
pmd_t old = *pmdp;
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
return old;
}
}
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index f773d5e6c8cc..9c85b54bf03c 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -14,6 +14,7 @@
#include <asm/processor.h>
#include <linux/bitops.h>
#include <linux/threads.h>
+#include <asm/fixmap.h>
extern p4d_t level4_kernel_pgt[512];
extern p4d_t level4_ident_pgt[512];
@@ -22,7 +23,7 @@ extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pmd_t level2_fixmap_pgt[512];
extern pmd_t level2_ident_pgt[512];
-extern pte_t level1_fixmap_pgt[512];
+extern pte_t level1_fixmap_pgt[512 * FIXMAP_PMD_NUM];
extern pgd_t init_top_pgt[];
#define swapper_pg_dir init_top_pgt
@@ -55,15 +56,15 @@ struct mm_struct;
void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
-static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
- pte_t *ptep)
+static inline void native_set_pte(pte_t *ptep, pte_t pte)
{
- *ptep = native_make_pte(0);
+ WRITE_ONCE(*ptep, pte);
}
-static inline void native_set_pte(pte_t *ptep, pte_t pte)
+static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
{
- *ptep = pte;
+ native_set_pte(ptep, native_make_pte(0));
}
static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -73,7 +74,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
{
- *pmdp = pmd;
+ WRITE_ONCE(*pmdp, pmd);
}
static inline void native_pmd_clear(pmd_t *pmd)
@@ -109,7 +110,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
- *pudp = pud;
+ WRITE_ONCE(*pudp, pud);
}
static inline void native_pud_clear(pud_t *pud)
@@ -137,13 +138,13 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
pgd_t pgd;
if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
- *p4dp = p4d;
+ WRITE_ONCE(*p4dp, p4d);
return;
}
pgd = native_make_pgd(native_p4d_val(p4d));
pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd);
- *p4dp = native_make_p4d(native_pgd_val(pgd));
+ WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd)));
}
static inline void native_p4d_clear(p4d_t *p4d)
@@ -153,7 +154,7 @@ static inline void native_p4d_clear(p4d_t *p4d)
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- *pgdp = pti_set_user_pgtbl(pgdp, pgd);
+ WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd));
}
static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 86299efa804a..fd23d5778ea1 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -377,6 +377,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 9f148e3d45b4..7654febd5102 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -413,7 +413,7 @@ static int activate_managed(struct irq_data *irqd)
if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
/* Something in the core code broke! Survive gracefully */
pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
- return EINVAL;
+ return -EINVAL;
}
ret = assign_managed_vector(irqd, vector_searchmask);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index ec00d1ff5098..f7151cd03cb0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -1640,6 +1640,7 @@ static int do_open(struct inode *inode, struct file *filp)
return 0;
}
+#ifdef CONFIG_PROC_FS
static int proc_apm_show(struct seq_file *m, void *v)
{
unsigned short bx;
@@ -1719,6 +1720,7 @@ static int proc_apm_show(struct seq_file *m, void *v)
units);
return 0;
}
+#endif
static int apm(void *unused)
{
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 4e588f36228f..285eb3ec4200 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -382,6 +382,11 @@ static inline bool is_mbm_event(int e)
e <= QOS_L3_MBM_LOCAL_EVENT_ID);
}
+struct rdt_parse_data {
+ struct rdtgroup *rdtgrp;
+ char *buf;
+};
+
/**
* struct rdt_resource - attributes of an RDT resource
* @rid: The index of the resource
@@ -423,16 +428,19 @@ struct rdt_resource {
struct rdt_cache cache;
struct rdt_membw membw;
const char *format_str;
- int (*parse_ctrlval) (void *data, struct rdt_resource *r,
- struct rdt_domain *d);
+ int (*parse_ctrlval)(struct rdt_parse_data *data,
+ struct rdt_resource *r,
+ struct rdt_domain *d);
struct list_head evt_list;
int num_rmid;
unsigned int mon_scale;
unsigned long fflags;
};
-int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d);
-int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d);
+int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d);
extern struct mutex rdtgroup_mutex;
@@ -536,6 +544,7 @@ int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
int update_domains(struct rdt_resource *r, int closid);
+int closids_supported(void);
void closid_free(int closid);
int alloc_rmid(void);
void free_rmid(u32 rmid);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index af358ca05160..0f53049719cd 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -64,19 +64,19 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return true;
}
-int parse_bw(void *_buf, struct rdt_resource *r, struct rdt_domain *d)
+int parse_bw(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
- unsigned long data;
- char *buf = _buf;
+ unsigned long bw_val;
if (d->have_new_ctrl) {
rdt_last_cmd_printf("duplicate domain %d\n", d->id);
return -EINVAL;
}
- if (!bw_validate(buf, &data, r))
+ if (!bw_validate(data->buf, &bw_val, r))
return -EINVAL;
- d->new_ctrl = data;
+ d->new_ctrl = bw_val;
d->have_new_ctrl = true;
return 0;
@@ -123,18 +123,13 @@ static bool cbm_validate(char *buf, u32 *data, struct rdt_resource *r)
return true;
}
-struct rdt_cbm_parse_data {
- struct rdtgroup *rdtgrp;
- char *buf;
-};
-
/*
* Read one cache bit mask (hex). Check that it is valid for the current
* resource type.
*/
-int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
+int parse_cbm(struct rdt_parse_data *data, struct rdt_resource *r,
+ struct rdt_domain *d)
{
- struct rdt_cbm_parse_data *data = _data;
struct rdtgroup *rdtgrp = data->rdtgrp;
u32 cbm_val;
@@ -195,11 +190,17 @@ int parse_cbm(void *_data, struct rdt_resource *r, struct rdt_domain *d)
static int parse_line(char *line, struct rdt_resource *r,
struct rdtgroup *rdtgrp)
{
- struct rdt_cbm_parse_data data;
+ struct rdt_parse_data data;
char *dom = NULL, *id;
struct rdt_domain *d;
unsigned long dom_id;
+ if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP &&
+ r->rid == RDT_RESOURCE_MBA) {
+ rdt_last_cmd_puts("Cannot pseudo-lock MBA resource\n");
+ return -EINVAL;
+ }
+
next:
if (!line || line[0] == '\0')
return 0;
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index b799c00bef09..1b8e86a5d5e1 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -97,6 +97,12 @@ void rdt_last_cmd_printf(const char *fmt, ...)
* limited as the number of resources grows.
*/
static int closid_free_map;
+static int closid_free_map_len;
+
+int closids_supported(void)
+{
+ return closid_free_map_len;
+}
static void closid_init(void)
{
@@ -111,6 +117,7 @@ static void closid_init(void)
/* CLOSID 0 is always reserved for the default group */
closid_free_map &= ~1;
+ closid_free_map_len = rdt_min_closid;
}
static int closid_alloc(void)
@@ -802,7 +809,7 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
sw_shareable = 0;
exclusive = 0;
seq_printf(seq, "%d=", dom->id);
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
if (!closid_allocated(i))
continue;
mode = rdtgroup_mode_by_closid(i);
@@ -989,7 +996,7 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
/* Check for overlap with other resource groups */
ctrl = d->ctrl_val;
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
ctrl_b = (unsigned long *)ctrl;
mode = rdtgroup_mode_by_closid(i);
if (closid_allocated(i) && i != closid &&
@@ -1024,16 +1031,27 @@ static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
{
int closid = rdtgrp->closid;
struct rdt_resource *r;
+ bool has_cache = false;
struct rdt_domain *d;
for_each_alloc_enabled_rdt_resource(r) {
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
+ has_cache = true;
list_for_each_entry(d, &r->domains, list) {
if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
- rdtgrp->closid, false))
+ rdtgrp->closid, false)) {
+ rdt_last_cmd_puts("schemata overlaps\n");
return false;
+ }
}
}
+ if (!has_cache) {
+ rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
+ return false;
+ }
+
return true;
}
@@ -1085,7 +1103,6 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
rdtgrp->mode = RDT_MODE_SHAREABLE;
} else if (!strcmp(buf, "exclusive")) {
if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
- rdt_last_cmd_printf("schemata overlaps\n");
ret = -EINVAL;
goto out;
}
@@ -1155,8 +1172,8 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
struct rdt_resource *r;
struct rdt_domain *d;
unsigned int size;
- bool sep = false;
- u32 cbm;
+ bool sep;
+ u32 ctrl;
rdtgrp = rdtgroup_kn_lock_live(of->kn);
if (!rdtgrp) {
@@ -1174,6 +1191,7 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
}
for_each_alloc_enabled_rdt_resource(r) {
+ sep = false;
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(d, &r->domains, list) {
if (sep)
@@ -1181,8 +1199,13 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
size = 0;
} else {
- cbm = d->ctrl_val[rdtgrp->closid];
- size = rdtgroup_cbm_to_size(r, d, cbm);
+ ctrl = (!is_mba_sc(r) ?
+ d->ctrl_val[rdtgrp->closid] :
+ d->mbps_val[rdtgrp->closid]);
+ if (r->rid == RDT_RESOURCE_MBA)
+ size = ctrl;
+ else
+ size = rdtgroup_cbm_to_size(r, d, ctrl);
}
seq_printf(s, "%d=%u", d->id, size);
sep = true;
@@ -2336,12 +2359,18 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
u32 *ctrl;
for_each_alloc_enabled_rdt_resource(r) {
+ /*
+ * Only initialize default allocations for CBM cache
+ * resources
+ */
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
list_for_each_entry(d, &r->domains, list) {
d->have_new_ctrl = false;
d->new_ctrl = r->cache.shareable_bits;
used_b = r->cache.shareable_bits;
ctrl = d->ctrl_val;
- for (i = 0; i < r->num_closid; i++, ctrl++) {
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
if (closid_allocated(i) && i != closid) {
mode = rdtgroup_mode_by_closid(i);
if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
@@ -2373,6 +2402,12 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
}
for_each_alloc_enabled_rdt_resource(r) {
+ /*
+ * Only initialize default allocations for CBM cache
+ * resources
+ */
+ if (r->rid == RDT_RESOURCE_MBA)
+ continue;
ret = update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("failed to initialize allocations\n");
diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 0624957aa068..07b5fc00b188 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -504,6 +504,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
struct microcode_amd *mc_amd;
struct ucode_cpu_info *uci;
struct ucode_patch *p;
+ enum ucode_state ret;
u32 rev, dummy;
BUG_ON(raw_smp_processor_id() != cpu);
@@ -521,9 +522,8 @@ static enum ucode_state apply_microcode_amd(int cpu)
/* need to apply patch? */
if (rev >= mc_amd->hdr.patch_id) {
- c->microcode = rev;
- uci->cpu_sig.rev = rev;
- return UCODE_OK;
+ ret = UCODE_OK;
+ goto out;
}
if (__apply_microcode_amd(mc_amd)) {
@@ -531,13 +531,21 @@ static enum ucode_state apply_microcode_amd(int cpu)
cpu, mc_amd->hdr.patch_id);
return UCODE_ERROR;
}
- pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
- mc_amd->hdr.patch_id);
- uci->cpu_sig.rev = mc_amd->hdr.patch_id;
- c->microcode = mc_amd->hdr.patch_id;
+ rev = mc_amd->hdr.patch_id;
+ ret = UCODE_UPDATED;
+
+ pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
- return UCODE_UPDATED;
+out:
+ uci->cpu_sig.rev = rev;
+ c->microcode = rev;
+
+ /* Update boot_cpu_data's revision too, if we're on the BSP: */
+ if (c->cpu_index == boot_cpu_data.cpu_index)
+ boot_cpu_data.microcode = rev;
+
+ return ret;
}
static int install_equiv_cpu_table(const u8 *buf)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 97ccf4c3b45b..16936a24795c 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -795,6 +795,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct cpuinfo_x86 *c = &cpu_data(cpu);
struct microcode_intel *mc;
+ enum ucode_state ret;
static int prev_rev;
u32 rev;
@@ -817,9 +818,8 @@ static enum ucode_state apply_microcode_intel(int cpu)
*/
rev = intel_get_microcode_revision();
if (rev >= mc->hdr.rev) {
- uci->cpu_sig.rev = rev;
- c->microcode = rev;
- return UCODE_OK;
+ ret = UCODE_OK;
+ goto out;
}
/*
@@ -848,10 +848,17 @@ static enum ucode_state apply_microcode_intel(int cpu)
prev_rev = rev;
}
+ ret = UCODE_UPDATED;
+
+out:
uci->cpu_sig.rev = rev;
- c->microcode = rev;
+ c->microcode = rev;
+
+ /* Update boot_cpu_data's revision too, if we're on the BSP: */
+ if (c->cpu_index == boot_cpu_data.cpu_index)
+ boot_cpu_data.microcode = rev;
- return UCODE_UPDATED;
+ return ret;
}
static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f56895106ccf..2b5886401e5f 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -146,7 +146,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
* they can be printed in the right context.
*/
if (!partial && on_stack(info, regs, sizeof(*regs))) {
- __show_regs(regs, 0);
+ __show_regs(regs, SHOW_REGS_SHORT);
} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
IRET_FRAME_SIZE)) {
@@ -344,7 +344,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
oops_exit();
/* Executive summary in case the oops scrolled away */
- __show_regs(&exec_summary_regs, true);
+ __show_regs(&exec_summary_regs, SHOW_REGS_ALL);
if (!signr)
return;
@@ -407,14 +407,9 @@ void die(const char *str, struct pt_regs *regs, long err)
void show_regs(struct pt_regs *regs)
{
- bool all = true;
-
show_regs_print_info(KERN_DEFAULT);
- if (IS_ENABLED(CONFIG_X86_32))
- all = !user_mode(regs);
-
- __show_regs(regs, all);
+ __show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL);
/*
* When in-kernel, we also print out the stack at the time of the fault..
diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c
index f260e452e4f8..e8c8c5d78dbd 100644
--- a/arch/x86/kernel/eisa.c
+++ b/arch/x86/kernel/eisa.c
@@ -7,11 +7,17 @@
#include <linux/eisa.h>
#include <linux/io.h>
+#include <xen/xen.h>
+
static __init int eisa_bus_probe(void)
{
- void __iomem *p = ioremap(0x0FFFD9, 4);
+ void __iomem *p;
+
+ if (xen_pv_domain() && !xen_initial_domain())
+ return 0;
- if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+ p = ioremap(0x0FFFD9, 4);
+ if (p && readl(p) == 'E' + ('I' << 8) + ('S' << 16) + ('A' << 24))
EISA_bus = 1;
iounmap(p);
return 0;
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 8047379e575a..ddee1f0870c4 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -35,6 +35,7 @@
#include <asm/bootparam_utils.h>
#include <asm/microcode.h>
#include <asm/kasan.h>
+#include <asm/fixmap.h>
/*
* Manage page tables very early on.
@@ -112,6 +113,7 @@ static bool __head check_la57_support(unsigned long physaddr)
unsigned long __head __startup_64(unsigned long physaddr,
struct boot_params *bp)
{
+ unsigned long vaddr, vaddr_end;
unsigned long load_delta, *p;
unsigned long pgtable_flags;
pgdval_t *pgd;
@@ -165,7 +167,8 @@ unsigned long __head __startup_64(unsigned long physaddr,
pud[511] += load_delta;
pmd = fixup_pointer(level2_fixmap_pgt, physaddr);
- pmd[506] += load_delta;
+ for (i = FIXMAP_PMD_TOP; i > FIXMAP_PMD_TOP - FIXMAP_PMD_NUM; i--)
+ pmd[i] += load_delta;
/*
* Set up the identity mapping for the switchover. These
@@ -235,6 +238,21 @@ unsigned long __head __startup_64(unsigned long physaddr,
sme_encrypt_kernel(bp);
/*
+ * Clear the memory encryption mask from the .bss..decrypted section.
+ * The bss section will be memset to zero later in the initialization so
+ * there is no need to zero it after changing the memory encryption
+ * attribute.
+ */
+ if (mem_encrypt_active()) {
+ vaddr = (unsigned long)__start_bss_decrypted;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ for (; vaddr < vaddr_end; vaddr += PMD_SIZE) {
+ i = pmd_index(vaddr);
+ pmd[i] -= sme_get_me_mask();
+ }
+ }
+
+ /*
* Return the SME encryption mask (if SME is active) to be used as a
* modifier for the initial pgdir entry programmed into CR3.
*/
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 15ebc2fc166e..a3618cf04cf6 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -24,6 +24,7 @@
#include "../entry/calling.h"
#include <asm/export.h>
#include <asm/nospec-branch.h>
+#include <asm/fixmap.h>
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -445,13 +446,20 @@ NEXT_PAGE(level2_kernel_pgt)
KERNEL_IMAGE_SIZE/PMD_SIZE)
NEXT_PAGE(level2_fixmap_pgt)
- .fill 506,8,0
- .quad level1_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
- /* 8MB reserved for vsyscalls + a 2MB hole = 4 + 1 entries */
- .fill 5,8,0
+ .fill (512 - 4 - FIXMAP_PMD_NUM),8,0
+ pgtno = 0
+ .rept (FIXMAP_PMD_NUM)
+ .quad level1_fixmap_pgt + (pgtno << PAGE_SHIFT) - __START_KERNEL_map \
+ + _PAGE_TABLE_NOENC;
+ pgtno = pgtno + 1
+ .endr
+ /* 6 MB reserved space + a 2MB hole */
+ .fill 4,8,0
NEXT_PAGE(level1_fixmap_pgt)
+ .rept (FIXMAP_PMD_NUM)
.fill 512,8,0
+ .endr
#undef PMDS
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1e6764648af3..013fe3d21dbb 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -28,6 +28,7 @@
#include <linux/sched/clock.h>
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/set_memory.h>
#include <asm/hypervisor.h>
#include <asm/mem_encrypt.h>
@@ -61,9 +62,10 @@ early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
(PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
static struct pvclock_vsyscall_time_info
- hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __aligned(PAGE_SIZE);
-static struct pvclock_wall_clock wall_clock;
+ hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
+static struct pvclock_wall_clock wall_clock __bss_decrypted;
static DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
+static struct pvclock_vsyscall_time_info *hvclock_mem;
static inline struct pvclock_vcpu_time_info *this_cpu_pvti(void)
{
@@ -236,6 +238,45 @@ static void kvm_shutdown(void)
native_machine_shutdown();
}
+static void __init kvmclock_init_mem(void)
+{
+ unsigned long ncpus;
+ unsigned int order;
+ struct page *p;
+ int r;
+
+ if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
+ return;
+
+ ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
+ order = get_order(ncpus * sizeof(*hvclock_mem));
+
+ p = alloc_pages(GFP_KERNEL, order);
+ if (!p) {
+ pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
+ return;
+ }
+
+ hvclock_mem = page_address(p);
+
+ /*
+ * hvclock is shared between the guest and the hypervisor, must
+ * be mapped decrypted.
+ */
+ if (sev_active()) {
+ r = set_memory_decrypted((unsigned long) hvclock_mem,
+ 1UL << order);
+ if (r) {
+ __free_pages(p, order);
+ hvclock_mem = NULL;
+ pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
+ return;
+ }
+ }
+
+ memset(hvclock_mem, 0, PAGE_SIZE << order);
+}
+
static int __init kvm_setup_vsyscall_timeinfo(void)
{
#ifdef CONFIG_X86_64
@@ -250,6 +291,9 @@ static int __init kvm_setup_vsyscall_timeinfo(void)
kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
#endif
+
+ kvmclock_init_mem();
+
return 0;
}
early_initcall(kvm_setup_vsyscall_timeinfo);
@@ -269,8 +313,10 @@ static int kvmclock_setup_percpu(unsigned int cpu)
/* Use the static page for the first CPUs, allocate otherwise */
if (cpu < HVC_BOOT_ARRAY_SIZE)
p = &hv_clock_boot[cpu];
+ else if (hvclock_mem)
+ p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
else
- p = kzalloc(sizeof(*p), GFP_KERNEL);
+ return -ENOMEM;
per_cpu(hv_clock_per_cpu, cpu) = p;
return p ? 0 : -ENOMEM;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index afdb303285f8..8dc69d82567e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -91,7 +91,7 @@ unsigned paravirt_patch_call(void *insnbuf,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect CALL in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
@@ -111,7 +111,7 @@ unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
if (len < 5) {
#ifdef CONFIG_RETPOLINE
- WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
+ WARN_ONCE(1, "Failing to patch indirect JMP in %ps\n", (void *)addr);
#endif
return len; /* call too long for patch site */
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2924fd447e61..5046a3c9dec2 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,7 @@
#include <asm/intel_rdt_sched.h>
#include <asm/proto.h>
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -85,7 +85,7 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
- if (!all)
+ if (mode != SHOW_REGS_ALL)
return;
cr0 = read_cr0();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a451bc374b9b..ea5ea850348d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -62,7 +62,7 @@
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
unsigned long d0, d1, d2, d3, d6, d7;
@@ -87,9 +87,17 @@ void __show_regs(struct pt_regs *regs, int all)
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15);
- if (!all)
+ if (mode == SHOW_REGS_SHORT)
return;
+ if (mode == SHOW_REGS_USER) {
+ rdmsrl(MSR_FS_BASE, fs);
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+ printk(KERN_DEFAULT "FS: %016lx GS: %016lx\n",
+ fs, shadowgs);
+ return;
+ }
+
asm("movl %%ds,%0" : "=r" (ds));
asm("movl %%cs,%0" : "=r" (cs));
asm("movl %%es,%0" : "=r" (es));
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 12cbe2b88c0f..738bf42b0218 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -111,8 +111,10 @@ int arch_register_cpu(int num)
/*
* Currently CPU0 is only hotpluggable on Intel platforms. Other
* vendors can add hotplug support later.
+ * Xen PV guests don't support CPU0 hotplug at all.
*/
- if (c->x86_vendor != X86_VENDOR_INTEL)
+ if (c->x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_has(X86_FEATURE_XENPV))
cpu0_hotpluggable = 0;
/*
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1463468ba9a0..6490f618e096 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1415,7 +1415,7 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
static unsigned long __init get_loops_per_jiffy(void)
{
- unsigned long lpj = tsc_khz * KHZ;
+ u64 lpj = (u64)tsc_khz * KHZ;
do_div(lpj, HZ);
return lpj;
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 8bde0a419f86..5dd3317d761f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -65,6 +65,23 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
+/*
+ * This section contains data which will be mapped as decrypted. Memory
+ * encryption operates on a page basis. Make this section PMD-aligned
+ * to avoid splitting the pages while mapping the section early.
+ *
+ * Note: We use a separate section so that only this section gets
+ * decrypted to avoid exposing more than we wish.
+ */
+#define BSS_DECRYPTED \
+ . = ALIGN(PMD_SIZE); \
+ __start_bss_decrypted = .; \
+ *(.bss..decrypted); \
+ . = ALIGN(PAGE_SIZE); \
+ __start_bss_decrypted_unused = .; \
+ . = ALIGN(PMD_SIZE); \
+ __end_bss_decrypted = .; \
+
#else
#define X86_ALIGN_RODATA_BEGIN
@@ -74,6 +91,7 @@ jiffies_64 = jiffies;
#define ALIGN_ENTRY_TEXT_BEGIN
#define ALIGN_ENTRY_TEXT_END
+#define BSS_DECRYPTED
#endif
@@ -355,6 +373,7 @@ SECTIONS
__bss_start = .;
*(.bss..page_aligned)
*(.bss)
+ BSS_DECRYPTED
. = ALIGN(PAGE_SIZE);
__bss_stop = .;
}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0cefba28c864..fbb0e6df121b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
}
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
- unsigned long ipi_bitmap_high, int min,
+ unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
int i;
@@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
+ if (min > map->max_apic_id)
+ goto out;
/* Bits above cluster_size are masked in the caller. */
- for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ for_each_set_bit(i, &ipi_bitmap_low,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
}
min += cluster_size;
- for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
- vcpu = map->phys_map[min + i]->vcpu;
- count += kvm_apic_set_irq(vcpu, &irq, NULL);
+
+ if (min > map->max_apic_id)
+ goto out;
+
+ for_each_set_bit(i, &ipi_bitmap_high,
+ min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+ if (map->phys_map[min + i]) {
+ vcpu = map->phys_map[min + i]->vcpu;
+ count += kvm_apic_set_irq(vcpu, &irq, NULL);
+ }
}
+out:
rcu_read_unlock();
return count;
}
@@ -1331,9 +1344,8 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reg_read);
static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
{
- return kvm_apic_hw_enabled(apic) &&
- addr >= apic->base_address &&
- addr < apic->base_address + LAPIC_MMIO_LENGTH;
+ return addr >= apic->base_address &&
+ addr < apic->base_address + LAPIC_MMIO_LENGTH;
}
static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
@@ -1345,6 +1357,15 @@ static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ memset(data, 0xff, len);
+ return 0;
+ }
+
kvm_lapic_reg_read(apic, offset, len, data);
return 0;
@@ -1904,6 +1925,14 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!apic_mmio_in_range(apic, address))
return -EOPNOTSUPP;
+ if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
+ if (!kvm_check_has_quirk(vcpu->kvm,
+ KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
+ return -EOPNOTSUPP;
+
+ return 0;
+ }
+
/*
* APIC register must be aligned on 128-bits boundary.
* 32/64/128 bits registers must be accessed thru 32 bits.
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a282321329b5..d7e9bce6ff61 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -899,7 +899,7 @@ static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{
/*
* Make sure the write to vcpu->mode is not reordered in front of
- * reads to sptes. If it does, kvm_commit_zap_page() can see us
+ * reads to sptes. If it does, kvm_mmu_commit_zap_page() can see us
* OUTSIDE_GUEST_MODE and proceed to free the shadow page table.
*/
smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE);
@@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
- return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
-}
-
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
@@ -5217,7 +5212,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
void *insn, int insn_len)
{
- int r, emulation_type = EMULTYPE_RETRY;
+ int r, emulation_type = 0;
enum emulation_result er;
bool direct = vcpu->arch.mmu.direct_map;
@@ -5230,10 +5225,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2, direct);
- if (r == RET_PF_EMULATE) {
- emulation_type = 0;
+ if (r == RET_PF_EMULATE)
goto emulate;
- }
}
if (r == RET_PF_INVALID) {
@@ -5260,8 +5253,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
}
- if (mmio_info_in_cache(vcpu, cr2, direct))
- emulation_type = 0;
+ /*
+ * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
+ * optimistically try to just unprotect the page and let the processor
+ * re-execute the instruction that caused the page fault. Do not allow
+ * retrying MMIO emulation, as it's not only pointless but could also
+ * cause us to enter an infinite loop because the processor will keep
+ * faulting on the non-existent MMIO address. Retrying an instruction
+ * from a nested guest is also pointless and dangerous as we are only
+ * explicitly shadowing L1's page tables, i.e. unprotecting something
+ * for L1 isn't going to magically fix whatever issue cause L2 to fail.
+ */
+ if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
+ emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
* On AMD platforms, under certain conditions insn_len may be zero on #NPF.
@@ -5413,7 +5417,12 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
{
MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu.root_hpa));
- kvm_init_mmu(vcpu, true);
+ /*
+ * kvm_mmu_setup() is called only on vCPU initialization.
+ * Therefore, no need to reset mmu roots as they are not yet
+ * initialized.
+ */
+ kvm_init_mmu(vcpu, false);
}
static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6276140044d0..d96092b35936 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
}
if (!svm->next_rip) {
- if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
+ if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
@@ -1226,8 +1226,7 @@ static __init int sev_hardware_setup(void)
min_sev_asid = cpuid_edx(0x8000001F);
/* Initialize SEV ASID bitmap */
- sev_asid_bitmap = kcalloc(BITS_TO_LONGS(max_sev_asid),
- sizeof(unsigned long), GFP_KERNEL);
+ sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
if (!sev_asid_bitmap)
return 1;
@@ -1405,7 +1404,7 @@ static __exit void svm_hardware_unsetup(void)
int cpu;
if (svm_sev_enabled())
- kfree(sev_asid_bitmap);
+ bitmap_free(sev_asid_bitmap);
for_each_possible_cpu(cpu)
svm_cpu_uninit(cpu);
@@ -2715,7 +2714,7 @@ static int gp_interception(struct vcpu_svm *svm)
WARN_ON_ONCE(!enable_vmware_backdoor);
- er = emulate_instruction(vcpu,
+ er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -2819,7 +2818,7 @@ static int io_interception(struct vcpu_svm *svm)
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
if (string)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3861,7 +3860,7 @@ static int iret_interception(struct vcpu_svm *svm)
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
- return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3869,13 +3868,13 @@ static int invlpg_interception(struct vcpu_svm *svm)
static int emulate_on_interception(struct vcpu_svm *svm)
{
- return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
static int rsm_interception(struct vcpu_svm *svm)
{
- return x86_emulate_instruction(&svm->vcpu, 0, 0,
- rsm_ins_bytes, 2) == EMULATE_DONE;
+ return kvm_emulate_instruction_from_buffer(&svm->vcpu,
+ rsm_ins_bytes, 2) == EMULATE_DONE;
}
static int rdpmc_interception(struct vcpu_svm *svm)
@@ -4700,7 +4699,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
ret = avic_unaccel_trap_write(svm);
} else {
/* Handling Fault */
- ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+ ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
}
return ret;
@@ -6747,7 +6746,7 @@ e_free:
static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
{
unsigned long vaddr, vaddr_end, next_vaddr;
- unsigned long dst_vaddr, dst_vaddr_end;
+ unsigned long dst_vaddr;
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
@@ -6763,7 +6762,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
size = debug.len;
vaddr_end = vaddr + size;
dst_vaddr = debug.dst_uaddr;
- dst_vaddr_end = dst_vaddr + size;
for (; vaddr < vaddr_end; vaddr = next_vaddr) {
int len, s_off, d_off;
@@ -7150,6 +7148,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
+ .request_immediate_exit = __kvm_request_immediate_exit,
+
.sched_in = svm_sched_in,
.pmu_ops = &amd_pmu_ops,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d26f3c4985b..06412ba46aa3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -397,6 +397,7 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
+ bool hv_timer_armed;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
@@ -1019,6 +1020,8 @@ struct vcpu_vmx {
int ple_window;
bool ple_window_dirty;
+ bool req_immediate_exit;
+
/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
@@ -2864,6 +2867,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel;
int i;
+ vmx->req_immediate_exit = false;
+
if (vmx->loaded_cpu_state)
return;
@@ -5393,9 +5398,10 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* To use VMXON (and later other VMX instructions), a guest
* must first be able to turn on cr4.VMXE (see handle_vmon()).
* So basically the check on whether to allow nested VMX
- * is here.
+ * is here. We operate under the default treatment of SMM,
+ * so VMX cannot be enabled under SMM.
*/
- if (!nested_vmx_allowed(vcpu))
+ if (!nested_vmx_allowed(vcpu) || is_smm(vcpu))
return 1;
}
@@ -6183,6 +6189,27 @@ static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
nested_mark_vmcs12_pages_dirty(vcpu);
}
+static bool vmx_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ void *vapic_page;
+ u32 vppr;
+ int rvi;
+
+ if (WARN_ON_ONCE(!is_guest_mode(vcpu)) ||
+ !nested_cpu_has_vid(get_vmcs12(vcpu)) ||
+ WARN_ON_ONCE(!vmx->nested.virtual_apic_page))
+ return false;
+
+ rvi = vmcs_read16(GUEST_INTR_STATUS) & 0xff;
+
+ vapic_page = kmap(vmx->nested.virtual_apic_page);
+ vppr = *((u32 *)(vapic_page + APIC_PROCPRI));
+ kunmap(vmx->nested.virtual_apic_page);
+
+ return ((rvi & 0xf0) > (vppr & 0xf0));
+}
+
static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu,
bool nested)
{
@@ -6983,7 +7010,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* Cause the #SS fault with 0 error code in VM86 mode.
*/
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
- if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+ if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
return kvm_vcpu_halt(vcpu);
@@ -7054,7 +7081,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
WARN_ON_ONCE(!enable_vmware_backdoor);
- er = emulate_instruction(vcpu,
+ er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@@ -7157,7 +7184,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
++vcpu->stat.io_exits;
if (string)
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
@@ -7231,7 +7258,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
static int handle_desc(struct kvm_vcpu *vcpu)
{
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_cr(struct kvm_vcpu *vcpu)
@@ -7480,7 +7507,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
static int handle_invd(struct kvm_vcpu *vcpu)
{
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -7547,7 +7574,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
}
- return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -7704,8 +7731,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
return kvm_skip_emulated_instruction(vcpu);
else
- return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
- NULL, 0) == EMULATE_DONE;
+ return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
+ EMULATE_DONE;
}
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@@ -7748,7 +7775,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
- err = emulate_instruction(vcpu, 0);
+ err = kvm_emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@@ -7966,6 +7993,9 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}
+ if (!cpu_has_vmx_preemption_timer())
+ kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
+
if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
u64 vmx_msr;
@@ -9208,7 +9238,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
- kvm_lapic_expired_hv_timer(vcpu);
+ if (!to_vmx(vcpu)->req_immediate_exit)
+ kvm_lapic_expired_hv_timer(vcpu);
return 1;
}
@@ -10595,24 +10626,43 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
-static void vmx_arm_hv_timer(struct kvm_vcpu *vcpu)
+static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
+{
+ vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
+ if (!vmx->loaded_vmcs->hv_timer_armed)
+ vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+ vmx->loaded_vmcs->hv_timer_armed = true;
+}
+
+static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u64 tscl;
u32 delta_tsc;
- if (vmx->hv_deadline_tsc == -1)
+ if (vmx->req_immediate_exit) {
+ vmx_arm_hv_timer(vmx, 0);
return;
+ }
- tscl = rdtsc();
- if (vmx->hv_deadline_tsc > tscl)
- /* sure to be 32 bit only because checked on set_hv_timer */
- delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
- cpu_preemption_timer_multi);
- else
- delta_tsc = 0;
+ if (vmx->hv_deadline_tsc != -1) {
+ tscl = rdtsc();
+ if (vmx->hv_deadline_tsc > tscl)
+ /* set_hv_timer ensures the delta fits in 32-bits */
+ delta_tsc = (u32)((vmx->hv_deadline_tsc - tscl) >>
+ cpu_preemption_timer_multi);
+ else
+ delta_tsc = 0;
+
+ vmx_arm_hv_timer(vmx, delta_tsc);
+ return;
+ }
- vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
+ if (vmx->loaded_vmcs->hv_timer_armed)
+ vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
+ PIN_BASED_VMX_PREEMPTION_TIMER);
+ vmx->loaded_vmcs->hv_timer_armed = false;
}
static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
@@ -10672,7 +10722,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
- vmx_arm_hv_timer(vcpu);
+ vmx_update_hv_timer(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@@ -11427,16 +11477,18 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value;
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (vcpu->arch.virtual_tsc_khz == 0)
- return;
-
- /* Make sure short timeouts reliably trigger an immediate vmexit.
- * hrtimer_start does not guarantee this. */
- if (preemption_timeout <= 1) {
+ /*
+ * A timer value of zero is architecturally guaranteed to cause
+ * a VMExit prior to executing any instructions in the guest.
+ */
+ if (preemption_timeout == 0) {
vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
return;
}
+ if (vcpu->arch.virtual_tsc_khz == 0)
+ return;
+
preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
preemption_timeout *= 1000000;
do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
@@ -11646,11 +11698,15 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
* bits 15:8 should be zero in posted_intr_nv,
* the descriptor address has been already checked
* in nested_get_vmcs12_pages.
+ *
+ * bits 5:0 of posted_intr_desc_addr should be zero.
*/
if (nested_cpu_has_posted_intr(vmcs12) &&
(!nested_cpu_has_vid(vmcs12) ||
!nested_exit_intr_ack_set(vcpu) ||
- vmcs12->posted_intr_nv & 0xff00))
+ (vmcs12->posted_intr_nv & 0xff00) ||
+ (vmcs12->posted_intr_desc_addr & 0x3f) ||
+ (!page_address_valid(vcpu, vmcs12->posted_intr_desc_addr))))
return -EINVAL;
/* tpr shadow is needed by all apicv features. */
@@ -12076,11 +12132,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
exec_control = vmcs12->pin_based_vm_exec_control;
- /* Preemption timer setting is only taken from vmcs01. */
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ /* Preemption timer setting is computed directly in vmx_vcpu_run. */
exec_control |= vmcs_config.pin_based_exec_ctrl;
- if (vmx->hv_deadline_tsc == -1)
- exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
+ vmx->loaded_vmcs->hv_timer_armed = false;
/* Posted interrupts setting is only taken from vmcs12. */
if (nested_cpu_has_posted_intr(vmcs12)) {
@@ -12318,6 +12373,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ if (nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@@ -12537,8 +12595,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
bool from_vmentry = !!exit_qual;
u32 dummy_exit_qual;
+ u32 vmcs01_cpu_exec_ctrl;
int r = 0;
+ vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -12575,6 +12636,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
}
/*
+ * If L1 had a pending IRQ/NMI until it executed
+ * VMLAUNCH/VMRESUME which wasn't delivered because it was
+ * disallowed (e.g. interrupts disabled), L0 needs to
+ * evaluate if this pending event should cause an exit from L2
+ * to L1 or delivered directly to L2 (e.g. In case L1 don't
+ * intercept EXTERNAL_INTERRUPT).
+ *
+ * Usually this would be handled by L0 requesting a
+ * IRQ/NMI window by setting VMCS accordingly. However,
+ * this setting was done on VMCS01 and now VMCS02 is active
+ * instead. Thus, we force L0 to perform pending event
+ * evaluation by requesting a KVM_REQ_EVENT.
+ */
+ if (vmcs01_cpu_exec_ctrl &
+ (CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ }
+
+ /*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
* returned as far as L1 is concerned. It will only return (and set
@@ -12841,6 +12921,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}
+static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
+{
+ to_vmx(vcpu)->req_immediate_exit = true;
+}
+
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
{
ktime_t remaining =
@@ -13231,12 +13316,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
- if (vmx->hv_deadline_tsc == -1)
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
- else
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
+
if (kvm_has_tsc_control)
decache_tsc_multiplier(vmx);
@@ -13440,18 +13520,12 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
return -ERANGE;
vmx->hv_deadline_tsc = tscl + delta_tsc;
- vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
-
return delta_tsc == 0;
}
static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- vmx->hv_deadline_tsc = -1;
- vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
- PIN_BASED_VMX_PREEMPTION_TIMER);
+ to_vmx(vcpu)->hv_deadline_tsc = -1;
}
#endif
@@ -13932,6 +14006,14 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
+ /*
+ * SMM temporarily disables VMX, so we cannot be in guest mode,
+ * nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
+ * must be zero.
+ */
+ if (is_smm(vcpu) ? kvm_state->flags : kvm_state->vmx.smm.flags)
+ return -EINVAL;
+
if ((kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
!(kvm_state->vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
return -EINVAL;
@@ -13988,9 +14070,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
return -EINVAL;
- if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
- vmx->nested.nested_run_pending = 1;
-
vmx->nested.dirty_vmcs12 = true;
ret = enter_vmx_non_root_mode(vcpu, NULL);
if (ret)
@@ -14078,6 +14157,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.apicv_post_state_restore = vmx_apicv_post_state_restore,
.hwapic_irr_update = vmx_hwapic_irr_update,
.hwapic_isr_update = vmx_hwapic_isr_update,
+ .guest_apic_has_interrupt = vmx_guest_apic_has_interrupt,
.sync_pir_to_irr = vmx_sync_pir_to_irr,
.deliver_posted_interrupt = vmx_deliver_posted_interrupt,
@@ -14111,6 +14191,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.umip_emulated = vmx_umip_emulated,
.check_nested_events = vmx_check_nested_events,
+ .request_immediate_exit = vmx_request_immediate_exit,
.sched_in = vmx_sched_in,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 506bd2b4b8bb..edbf00ec56b3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -628,7 +628,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
gfn_t gfn;
int r;
- if (is_long_mode(vcpu) || !is_pae(vcpu))
+ if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
return false;
if (!test_bit(VCPU_EXREG_PDPTR,
@@ -2537,7 +2537,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_PLATFORM_INFO:
if (!msr_info->host_initiated ||
- data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
(!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
cpuid_fault_enabled(vcpu)))
return 1;
@@ -2780,6 +2779,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vcpu->arch.osvw.status;
break;
case MSR_PLATFORM_INFO:
+ if (!msr_info->host_initiated &&
+ !vcpu->kvm->arch.guest_can_read_msr_platform_info)
+ return 1;
msr_info->data = vcpu->arch.msr_platform_info;
break;
case MSR_MISC_FEATURES_ENABLES:
@@ -2927,6 +2929,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_GET_MSR_FEATURES:
+ case KVM_CAP_MSR_PLATFORM_INFO:
r = 1;
break;
case KVM_CAP_SYNC_REGS:
@@ -4007,19 +4010,23 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
break;
BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size));
+ r = -EFAULT;
if (get_user(user_data_size, &user_kvm_nested_state->size))
- return -EFAULT;
+ break;
r = kvm_x86_ops->get_nested_state(vcpu, user_kvm_nested_state,
user_data_size);
if (r < 0)
- return r;
+ break;
if (r > user_data_size) {
if (put_user(r, &user_kvm_nested_state->size))
- return -EFAULT;
- return -E2BIG;
+ r = -EFAULT;
+ else
+ r = -E2BIG;
+ break;
}
+
r = 0;
break;
}
@@ -4031,19 +4038,21 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
if (!kvm_x86_ops->set_nested_state)
break;
+ r = -EFAULT;
if (copy_from_user(&kvm_state, user_kvm_nested_state, sizeof(kvm_state)))
- return -EFAULT;
+ break;
+ r = -EINVAL;
if (kvm_state.size < sizeof(kvm_state))
- return -EINVAL;
+ break;
if (kvm_state.flags &
~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE))
- return -EINVAL;
+ break;
/* nested_run_pending implies guest_mode. */
if (kvm_state.flags == KVM_STATE_NESTED_RUN_PENDING)
- return -EINVAL;
+ break;
r = kvm_x86_ops->set_nested_state(vcpu, user_kvm_nested_state, &kvm_state);
break;
@@ -4350,6 +4359,10 @@ split_irqchip_unlock:
kvm->arch.pause_in_guest = true;
r = 0;
break;
+ case KVM_CAP_MSR_PLATFORM_INFO:
+ kvm->arch.guest_can_read_msr_platform_info = cap->args[0];
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -4987,7 +5000,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
emul_type = 0;
}
- er = emulate_instruction(vcpu, emul_type);
+ er = kvm_emulate_instruction(vcpu, emul_type);
if (er == EMULATE_USER_EXIT)
return 0;
if (er != EMULATE_DONE)
@@ -5870,7 +5883,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
gpa_t gpa = cr2;
kvm_pfn_t pfn;
- if (emulation_type & EMULTYPE_NO_REEXECUTE)
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ return false;
+
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (!vcpu->arch.mmu.direct_map) {
@@ -5958,7 +5974,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
*/
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
- if (!(emulation_type & EMULTYPE_RETRY))
+ if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
+ return false;
+
+ if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (x86_page_table_writing_insn(ctxt))
@@ -6276,7 +6295,19 @@ restart:
return r;
}
-EXPORT_SYMBOL_GPL(x86_emulate_instruction);
+
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
+{
+ return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
+
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+ void *insn, int insn_len)
+{
+ return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port)
@@ -7343,6 +7374,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
+{
+ smp_send_reschedule(vcpu->cpu);
+}
+EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);
+
/*
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
@@ -7547,7 +7584,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
- smp_send_reschedule(vcpu->cpu);
+ kvm_x86_ops->request_immediate_exit(vcpu);
}
trace_kvm_entry(vcpu->vcpu_id);
@@ -7734,7 +7771,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
{
int r;
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
- r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+ r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r != EMULATE_DONE)
return 0;
@@ -7811,6 +7848,29 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
return 0;
}
+/* Swap (qemu) user FPU context for the guest FPU context. */
+static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
+ /* PKRU is separately restored in kvm_x86_ops->run. */
+ __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
+ ~XFEATURE_MASK_PKRU);
+ preempt_enable();
+ trace_kvm_fpu(1);
+}
+
+/* When vcpu_run ends, restore user space FPU context. */
+static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
+ copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
+ preempt_enable();
+ ++vcpu->stat.fpu_reload;
+ trace_kvm_fpu(0);
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int r;
@@ -8159,7 +8219,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (!is_long_mode(vcpu) && is_pae(vcpu)) {
+ if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
mmu_reset_needed = 1;
}
@@ -8388,29 +8448,6 @@ static void fx_init(struct kvm_vcpu *vcpu)
vcpu->arch.cr0 |= X86_CR0_ET;
}
-/* Swap (qemu) user FPU context for the guest FPU context. */
-void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
- copy_fpregs_to_fpstate(&vcpu->arch.user_fpu);
- /* PKRU is separately restored in kvm_x86_ops->run. */
- __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state,
- ~XFEATURE_MASK_PKRU);
- preempt_enable();
- trace_kvm_fpu(1);
-}
-
-/* When vcpu_run ends, restore user space FPU context. */
-void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
- copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
- copy_kernel_to_fpregs(&vcpu->arch.user_fpu.state);
- preempt_enable();
- ++vcpu->stat.fpu_reload;
- trace_kvm_fpu(0);
-}
-
void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu)
{
void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask;
@@ -8834,6 +8871,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.kvmclock_offset = -ktime_get_boot_ns();
pvclock_update_vm_gtod_copy(kvm);
+ kvm->arch.guest_can_read_msr_platform_info = true;
+
INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn);
INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn);
@@ -9182,6 +9221,13 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvm_page_track_flush_slot(kvm, slot);
}
+static inline bool kvm_guest_apic_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ return (is_guest_mode(vcpu) &&
+ kvm_x86_ops->guest_apic_has_interrupt &&
+ kvm_x86_ops->guest_apic_has_interrupt(vcpu));
+}
+
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
{
if (!list_empty_careful(&vcpu->async_pf.done))
@@ -9206,7 +9252,8 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu))
+ (kvm_cpu_has_interrupt(vcpu) ||
+ kvm_guest_apic_has_interrupt(vcpu)))
return true;
if (kvm_hv_has_stimer_pending(vcpu))
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 257f27620bc2..67b9568613f3 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+ int emulation_type, void *insn, int insn_len);
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 7a8fc26c1115..faca978ebf9d 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -815,10 +815,14 @@ void free_kernel_image_pages(void *begin, void *end)
set_memory_np_noalias(begin_ul, len_pages);
}
+void __weak mem_encrypt_free_decrypted_mem(void) { }
+
void __ref free_initmem(void)
{
e820__reallocate_tables();
+ mem_encrypt_free_decrypted_mem();
+
free_kernel_image_pages(&__init_begin, &__init_end);
}
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index b2de398d1fd3..006f373f54ab 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -348,6 +348,30 @@ bool sev_active(void)
EXPORT_SYMBOL(sev_active);
/* Architecture __weak replacement functions */
+void __init mem_encrypt_free_decrypted_mem(void)
+{
+ unsigned long vaddr, vaddr_end, npages;
+ int r;
+
+ vaddr = (unsigned long)__start_bss_decrypted_unused;
+ vaddr_end = (unsigned long)__end_bss_decrypted;
+ npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
+
+ /*
+ * The unused memory range was mapped decrypted, change the encryption
+ * attribute from decrypted to encrypted before freeing it.
+ */
+ if (mem_encrypt_active()) {
+ r = set_memory_encrypted(vaddr, npages);
+ if (r) {
+ pr_warn("failed to free unused decrypted pages\n");
+ return;
+ }
+ }
+
+ free_init_pages("unused decrypted", vaddr, vaddr_end);
+}
+
void __init mem_encrypt_init(void)
{
if (!sme_me_mask)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e848a4811785..089e78c4effd 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -269,7 +269,7 @@ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
if (pgd_val(pgd) != 0) {
pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
- *pgdp = native_make_pgd(0);
+ pgd_clear(pgdp);
paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
pmd_free(mm, pmd);
@@ -494,7 +494,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
int changed = !pte_same(*ptep, entry);
if (changed && dirty)
- *ptep = entry;
+ set_pte(ptep, entry);
return changed;
}
@@ -509,7 +509,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
if (changed && dirty) {
- *pmdp = entry;
+ set_pmd(pmdp, entry);
/*
* We had a write-protection fault here and changed the pmd
* to to more permissive. No need to flush the TLB for that,
@@ -529,7 +529,7 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
VM_BUG_ON(address & ~HPAGE_PUD_MASK);
if (changed && dirty) {
- *pudp = entry;
+ set_pud(pudp, entry);
/*
* We had a write-protection fault here and changed the pud
* to to more permissive. No need to flush the TLB for that,
@@ -637,6 +637,15 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
{
unsigned long address = __fix_to_virt(idx);
+#ifdef CONFIG_X86_64
+ /*
+ * Ensure that the static initial page tables are covering the
+ * fixmap completely.
+ */
+ BUILD_BUG_ON(__end_of_permanent_fixed_addresses >
+ (FIXMAP_PMD_NUM * PTRS_PER_PTE));
+#endif
+
if (idx >= __end_of_fixed_addresses) {
BUG();
return;
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 05ca14222463..9959657127f4 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -85,10 +85,9 @@ pgd_t * __init efi_call_phys_prolog(void)
void __init efi_call_phys_epilog(pgd_t *save_pgd)
{
+ load_fixmap_gdt(0);
load_cr3(save_pgd);
__flush_tlb_all();
-
- load_fixmap_gdt(0);
}
void __init efi_runtime_update_mappings(void)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 2fe5c9b1816b..dd461c0167ef 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -1907,7 +1907,7 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
/* L3_k[511] -> level2_fixmap_pgt */
convert_pfn_mfn(level3_kernel_pgt);
- /* L3_k[511][506] -> level1_fixmap_pgt */
+ /* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
convert_pfn_mfn(level2_fixmap_pgt);
/* We get [511][511] and have Xen's version of level2_kernel_pgt */
@@ -1952,7 +1952,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
- set_page_prot(level1_fixmap_pgt, PAGE_KERNEL_RO);
+
+ for (i = 0; i < FIXMAP_PMD_NUM; i++) {
+ set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
+ PAGE_KERNEL_RO);
+ }
/* Pin down new L4 */
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
diff --git a/arch/x86/xen/pmu.c b/arch/x86/xen/pmu.c
index 7d00d4ad44d4..95997e6c0696 100644
--- a/arch/x86/xen/pmu.c
+++ b/arch/x86/xen/pmu.c
@@ -478,7 +478,7 @@ static void xen_convert_regs(const struct xen_pmu_regs *xen_regs,
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
int err, ret = IRQ_NONE;
- struct pt_regs regs;
+ struct pt_regs regs = {0};
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
uint8_t xenpmu_flags = get_xenpmu_flags();