summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/xen/enlighten.c7
-rw-r--r--arch/arm64/crypto/aes-glue.c2
-rw-r--r--arch/arm64/include/asm/kvm_host.h1
-rw-r--r--arch/arm64/include/asm/sysreg.h11
-rw-r--r--arch/arm64/kernel/cpufeature.c2
-rw-r--r--arch/arm64/kernel/smp.c2
-rw-r--r--arch/arm64/kvm/fpsimd.c36
-rw-r--r--arch/arm64/mm/dma-mapping.c9
-rw-r--r--arch/arm64/mm/proc.S5
-rw-r--r--arch/powerpc/Makefile1
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h1
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-4k.h21
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable-64k.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h5
-rw-r--r--arch/powerpc/include/asm/nmi.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h1
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h1
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c3
-rw-r--r--arch/powerpc/kernel/setup-common.c12
-rw-r--r--arch/powerpc/kernel/setup_64.c8
-rw-r--r--arch/powerpc/kernel/smp.c6
-rw-r--r--arch/powerpc/kernel/stacktrace.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c3
-rw-r--r--arch/powerpc/mm/pgtable-book3s64.c12
-rw-r--r--arch/powerpc/mm/tlb-radix.c98
-rw-r--r--arch/s390/include/asm/css_chars.h62
-rw-r--r--arch/x86/include/asm/vmx.h3
-rw-r--r--arch/x86/kvm/vmx.c67
-rw-r--r--arch/x86/kvm/x86.h9
-rw-r--r--arch/x86/xen/enlighten.c7
-rw-r--r--arch/x86/xen/enlighten_pv.c1
-rw-r--r--arch/x86/xen/enlighten_pvh.c1
32 files changed, 327 insertions, 85 deletions
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 8073625371f5..07060e5b5864 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -59,6 +59,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
static __read_mostly unsigned int xen_events_irq;
+uint32_t xen_start_flags;
+EXPORT_SYMBOL(xen_start_flags);
+
int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
unsigned long addr,
xen_pfn_t *gfn, int nr,
@@ -293,9 +296,7 @@ void __init xen_early_init(void)
xen_setup_features();
if (xen_feature(XENFEAT_dom0))
- xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
- else
- xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);
+ xen_start_flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
if (!console_set_on_cmdline && !xen_initial_domain())
add_preferred_console("hvc", 0, NULL);
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 253188fb8cb0..e3e50950a863 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -223,8 +223,8 @@ static int ctr_encrypt(struct skcipher_request *req)
kernel_neon_begin();
aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
- err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
kernel_neon_end();
+ err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
}
if (walk.nbytes) {
u8 __aligned(8) tail[AES_BLOCK_SIZE];
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index fda9a8ca48be..fe8777b12f86 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -306,6 +306,7 @@ struct kvm_vcpu_arch {
#define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */
#define KVM_ARM64_FP_HOST (1 << 2) /* host FP regs loaded */
#define KVM_ARM64_HOST_SVE_IN_USE (1 << 3) /* backup for host TIF_SVE */
+#define KVM_ARM64_HOST_SVE_ENABLED (1 << 4) /* SVE enabled for EL0 */
#define vcpu_gp_regs(v) (&(v)->arch.ctxt.gp_regs)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 6171178075dc..a8f84812c6e8 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -728,6 +728,17 @@ asm(
asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \
} while (0)
+/*
+ * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the
+ * set mask are set. Other bits are left as-is.
+ */
+#define sysreg_clear_set(sysreg, clear, set) do { \
+ u64 __scs_val = read_sysreg(sysreg); \
+ u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set); \
+ if (__scs_new != __scs_val) \
+ write_sysreg(__scs_new, sysreg); \
+} while (0)
+
static inline void config_sctlr_el1(u32 clear, u32 set)
{
u32 val;
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index d2856b129097..f24892a40d2c 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -937,7 +937,7 @@ static int __init parse_kpti(char *str)
__kpti_forced = enabled ? 1 : -1;
return 0;
}
-__setup("kpti=", parse_kpti);
+early_param("kpti", parse_kpti);
#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
#ifdef CONFIG_ARM64_HW_AFDBM
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index f3e2e3aec0b0..2faa9863d2e5 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -179,7 +179,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
* This is the secondary CPU boot entry. We're using this CPUs
* idle thread stack, but a set of temporary page tables.
*/
-asmlinkage void secondary_start_kernel(void)
+asmlinkage notrace void secondary_start_kernel(void)
{
u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
struct mm_struct *mm = &init_mm;
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index dc6ecfa5a2d2..aac7808ce216 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -5,13 +5,14 @@
* Copyright 2018 Arm Limited
* Author: Dave Martin <Dave.Martin@arm.com>
*/
-#include <linux/bottom_half.h>
+#include <linux/irqflags.h>
#include <linux/sched.h>
#include <linux/thread_info.h>
#include <linux/kvm_host.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_host.h>
#include <asm/kvm_mmu.h>
+#include <asm/sysreg.h>
/*
* Called on entry to KVM_RUN unless this vcpu previously ran at least
@@ -61,10 +62,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
{
BUG_ON(!current->mm);
- vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE);
+ vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+ KVM_ARM64_HOST_SVE_IN_USE |
+ KVM_ARM64_HOST_SVE_ENABLED);
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+
if (test_thread_flag(TIF_SVE))
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
+
+ if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
+ vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
}
/*
@@ -92,19 +99,30 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
*/
void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
{
- local_bh_disable();
+ unsigned long flags;
- update_thread_flag(TIF_SVE,
- vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+ local_irq_save(flags);
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
/* Clean guest FP state to memory and invalidate cpu view */
fpsimd_save();
fpsimd_flush_cpu_state();
- } else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
- /* Ensure user trap controls are correctly restored */
- fpsimd_bind_task_to_cpu();
+ } else if (system_supports_sve()) {
+ /*
+ * The FPSIMD/SVE state in the CPU has not been touched, and we
+ * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
+ * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
+ * for EL0. To avoid spurious traps, restore the trap state
+ * seen by kvm_arch_vcpu_load_fp():
+ */
+ if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
+ sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
+ else
+ sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
}
- local_bh_enable();
+ update_thread_flag(TIF_SVE,
+ vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+
+ local_irq_restore(flags);
}
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 49e217ac7e1e..61e93f0b5482 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -583,13 +583,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
size >> PAGE_SHIFT);
return NULL;
}
- if (!coherent)
- __dma_flush_area(page_to_virt(page), iosize);
-
addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
prot,
__builtin_return_address(0));
- if (!addr) {
+ if (addr) {
+ memset(addr, 0, size);
+ if (!coherent)
+ __dma_flush_area(page_to_virt(page), iosize);
+ } else {
iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
dma_release_from_contiguous(dev, page,
size >> PAGE_SHIFT);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 5f9a73a4452c..03646e6a2ef4 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -217,8 +217,9 @@ ENDPROC(idmap_cpu_replace_ttbr1)
.macro __idmap_kpti_put_pgtable_ent_ng, type
orr \type, \type, #PTE_NG // Same bit for blocks and pages
- str \type, [cur_\()\type\()p] // Update the entry and ensure it
- dc civac, cur_\()\type\()p // is visible to all CPUs.
+ str \type, [cur_\()\type\()p] // Update the entry and ensure
+ dmb sy // that it is visible to all
+ dc civac, cur_\()\type\()p // CPUs.
.endm
/*
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index bd06a3ccda31..2ea575cb3401 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -244,6 +244,7 @@ cpu-as-$(CONFIG_4xx) += -Wa,-m405
cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec)
cpu-as-$(CONFIG_E200) += -Wa,-me200
cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
+cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc)
KBUILD_AFLAGS += $(cpu-as-y)
KBUILD_CFLAGS += $(cpu-as-y)
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 6a6673907e45..e4633803fe43 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -108,6 +108,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
}
#define check_pgt_cache() do { } while (0)
+#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb,
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
index af5f2baac80f..a069dfcac9a9 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-4k.h
@@ -49,6 +49,27 @@ static inline int hugepd_ok(hugepd_t hpd)
}
#define is_hugepd(hpd) (hugepd_ok(hpd))
+/*
+ * 16M and 16G huge page directory tables are allocated from slab cache
+ *
+ */
+#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24)
+#define H_16G_CACHE_INDEX \
+ (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34)
+
+static inline int get_hugepd_cache_index(int index)
+{
+ switch (index) {
+ case H_16M_CACHE_INDEX:
+ return HTLB_16M_INDEX;
+ case H_16G_CACHE_INDEX:
+ return HTLB_16G_INDEX;
+ default:
+ BUG();
+ }
+ /* should not reach */
+}
+
#else /* !CONFIG_HUGETLB_PAGE */
static inline int pmd_huge(pmd_t pmd) { return 0; }
static inline int pud_huge(pud_t pud) { return 0; }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
index fb4b3ba52339..d7ee249d6890 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable-64k.h
@@ -45,8 +45,17 @@ static inline int hugepd_ok(hugepd_t hpd)
{
return 0;
}
+
#define is_hugepd(pdep) 0
+/*
+ * This should never get called
+ */
+static inline int get_hugepd_cache_index(int index)
+{
+ BUG();
+}
+
#else /* !CONFIG_HUGETLB_PAGE */
static inline int pmd_huge(pmd_t pmd) { return 0; }
static inline int pud_huge(pud_t pud) { return 0; }
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 63cee159022b..42aafba7a308 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -287,6 +287,11 @@ enum pgtable_index {
PMD_INDEX,
PUD_INDEX,
PGD_INDEX,
+ /*
+ * Below are used with 4k page size and hugetlb
+ */
+ HTLB_16M_INDEX,
+ HTLB_16G_INDEX,
};
extern unsigned long __vmalloc_start;
diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h
index 0f571e0ebca1..bd9ba8defd72 100644
--- a/arch/powerpc/include/asm/nmi.h
+++ b/arch/powerpc/include/asm/nmi.h
@@ -8,7 +8,7 @@ extern void arch_touch_nmi_watchdog(void);
static inline void arch_touch_nmi_watchdog(void) {}
#endif
-#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE)
+#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE)
extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
bool exclude_self);
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index 1707781d2f20..9de40eb614da 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -109,6 +109,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
}
#define check_pgt_cache() do { } while (0)
+#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb,
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 0e693f322cb2..e2d62d033708 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -141,6 +141,7 @@ static inline void pgtable_free(void *table, int shift)
}
}
+#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP
static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
{
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 4be1c0de9406..96dd3d871986 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -711,7 +711,8 @@ static __init void cpufeatures_cpu_quirks(void)
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
- } else /* DD2.1 and up have DD2_1 */
+ } else if ((version & 0xffff0000) == 0x004e0000)
+ /* DD2.1 and up have DD2_1 */
cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
if ((version & 0xffff0000) == 0x004e0000) {
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 62b1a40d8957..40b44bb53a4e 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -701,11 +701,18 @@ static int ppc_panic_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
/*
+ * panic does a local_irq_disable, but we really
+ * want interrupts to be hard disabled.
+ */
+ hard_irq_disable();
+
+ /*
* If firmware-assisted dump has been registered then trigger
* firmware-assisted dump and let firmware handle everything else.
*/
crash_fadump(NULL, ptr);
- ppc_md.panic(ptr); /* May not return */
+ if (ppc_md.panic)
+ ppc_md.panic(ptr); /* May not return */
return NOTIFY_DONE;
}
@@ -716,7 +723,8 @@ static struct notifier_block ppc_panic_block = {
void __init setup_panic(void)
{
- if (!ppc_md.panic)
+ /* PPC64 always does a hard irq disable in its panic handler */
+ if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
return;
atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
}
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7a7ce8ad455e..225bc5f91049 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -387,6 +387,14 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
+void panic_smp_self_stop(void)
+{
+ hard_irq_disable();
+ spin_begin();
+ while (1)
+ spin_cpu_relax();
+}
+
#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
static bool use_spinloop(void)
{
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5eadfffabe35..4794d6b4f4d2 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -600,9 +600,6 @@ static void nmi_stop_this_cpu(struct pt_regs *regs)
nmi_ipi_busy_count--;
nmi_ipi_unlock();
- /* Remove this CPU */
- set_cpu_online(smp_processor_id(), false);
-
spin_begin();
while (1)
spin_cpu_relax();
@@ -617,9 +614,6 @@ void smp_send_stop(void)
static void stop_this_cpu(void *dummy)
{
- /* Remove this CPU */
- set_cpu_online(smp_processor_id(), false);
-
hard_irq_disable();
spin_begin();
while (1)
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 07e97f289c52..e2c50b55138f 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -196,7 +196,7 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
-#ifdef CONFIG_PPC_BOOK3S_64
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
static void handle_backtrace_ipi(struct pt_regs *regs)
{
nmi_cpu_backtrace(regs);
@@ -242,4 +242,4 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
}
-#endif /* CONFIG_PPC64 */
+#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 7c5f479c5c00..8a9a49c13865 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -337,7 +337,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
if (shift >= pdshift)
hugepd_free(tlb, hugepte);
else
- pgtable_free_tlb(tlb, hugepte, pdshift - shift);
+ pgtable_free_tlb(tlb, hugepte,
+ get_hugepd_cache_index(pdshift - shift));
}
static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c
index c1f4ca45c93a..4afbfbb64bfd 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/pgtable-book3s64.c
@@ -409,6 +409,18 @@ static inline void pgtable_free(void *table, int index)
case PUD_INDEX:
kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
break;
+#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
+ /* 16M hugepd directory at pud level */
+ case HTLB_16M_INDEX:
+ BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
+ kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
+ break;
+ /* 16G hugepd directory at the pgd level */
+ case HTLB_16G_INDEX:
+ BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
+ kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
+ break;
+#endif
/* We don't free pgd table via RCU callback */
default:
BUG();
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 67a6e86d3e7e..1135b43a597c 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
- unsigned long end)
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ bool flush_all_sizes)
{
- struct mm_struct *mm = vma->vm_mm;
unsigned long pid;
unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
unsigned long page_size = 1UL << page_shift;
unsigned long nr_pages = (end - start) >> page_shift;
bool local, full;
-#ifdef CONFIG_HUGETLB_PAGE
- if (is_vm_hugetlb_page(vma))
- return radix__flush_hugetlb_tlb_range(vma, start, end);
-#endif
-
pid = mm->context.id;
if (unlikely(pid == MMU_NO_CONTEXT))
return;
@@ -738,37 +733,64 @@ is_local:
_tlbie_pid(pid, RIC_FLUSH_TLB);
}
} else {
- bool hflush = false;
+ bool hflush = flush_all_sizes;
+ bool gflush = flush_all_sizes;
unsigned long hstart, hend;
+ unsigned long gstart, gend;
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
- hend = end >> HPAGE_PMD_SHIFT;
- if (hstart < hend) {
- hstart <<= HPAGE_PMD_SHIFT;
- hend <<= HPAGE_PMD_SHIFT;
+ if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
hflush = true;
+
+ if (hflush) {
+ hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+ hend = end & PMD_MASK;
+ if (hstart == hend)
+ hflush = false;
+ }
+
+ if (gflush) {
+ gstart = (start + PUD_SIZE - 1) & PUD_MASK;
+ gend = end & PUD_MASK;
+ if (gstart == gend)
+ gflush = false;
}
-#endif
asm volatile("ptesync": : :"memory");
if (local) {
__tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbiel_va_range(hstart, hend, pid,
- HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ PMD_SIZE, MMU_PAGE_2M);
+ if (gflush)
+ __tlbiel_va_range(gstart, gend, pid,
+ PUD_SIZE, MMU_PAGE_1G);
asm volatile("ptesync": : :"memory");
} else {
__tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
if (hflush)
__tlbie_va_range(hstart, hend, pid,
- HPAGE_PMD_SIZE, MMU_PAGE_2M);
+ PMD_SIZE, MMU_PAGE_2M);
+ if (gflush)
+ __tlbie_va_range(gstart, gend, pid,
+ PUD_SIZE, MMU_PAGE_1G);
fixup_tlbie();
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
}
preempt_enable();
}
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+ if (is_vm_hugetlb_page(vma))
+ return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+ __radix__flush_tlb_range(vma->vm_mm, start, end, false);
+}
EXPORT_SYMBOL(radix__flush_tlb_range);
static int radix_get_mmu_psize(int page_size)
@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)
int psize = 0;
struct mm_struct *mm = tlb->mm;
int page_size = tlb->page_size;
+ unsigned long start = tlb->start;
+ unsigned long end = tlb->end;
/*
* if page size is not something we understand, do a full mm flush
@@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb)
*/
if (tlb->fullmm) {
__flush_all_mm(mm, true);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+ } else if (mm_tlb_flush_nested(mm)) {
+ /*
+ * If there is a concurrent invalidation that is clearing ptes,
+ * then it's possible this invalidation will miss one of those
+ * cleared ptes and miss flushing the TLB. If this invalidate
+ * returns before the other one flushes TLBs, that can result
+ * in it returning while there are still valid TLBs inside the
+ * range to be invalidated.
+ *
+ * See mm/memory.c:tlb_finish_mmu() for more details.
+ *
+ * The solution to this is ensure the entire range is always
+ * flushed here. The problem for powerpc is that the flushes
+ * are page size specific, so this "forced flush" would not
+ * do the right thing if there are a mix of page sizes in
+ * the range to be invalidated. So use __flush_tlb_range
+ * which invalidates all possible page sizes in the range.
+ *
+ * PWC flush probably is not be required because the core code
+ * shouldn't free page tables in this path, but accounting
+ * for the possibility makes us a bit more robust.
+ *
+ * need_flush_all is an uncommon case because page table
+ * teardown should be done with exclusive locks held (but
+ * after locks are dropped another invalidate could come
+ * in), it could be optimized further if necessary.
+ */
+ if (!tlb->need_flush_all)
+ __radix__flush_tlb_range(mm, start, end, true);
+ else
+ radix__flush_all_mm(mm);
+#endif
} else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
if (!tlb->need_flush_all)
radix__flush_tlb_mm(mm);
else
radix__flush_all_mm(mm);
} else {
- unsigned long start = tlb->start;
- unsigned long end = tlb->end;
-
if (!tlb->need_flush_all)
radix__flush_tlb_range_psize(mm, start, end, psize);
else
@@ -1043,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
if (sib == cpu)
continue;
+ if (!cpu_possible(sib))
+ continue;
if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
flush = true;
}
diff --git a/arch/s390/include/asm/css_chars.h b/arch/s390/include/asm/css_chars.h
index 0563fd3e8458..480bb02ccacd 100644
--- a/arch/s390/include/asm/css_chars.h
+++ b/arch/s390/include/asm/css_chars.h
@@ -6,36 +6,38 @@
struct css_general_char {
u64 : 12;
- u32 dynio : 1; /* bit 12 */
- u32 : 4;
- u32 eadm : 1; /* bit 17 */
- u32 : 23;
- u32 aif : 1; /* bit 41 */
- u32 : 3;
- u32 mcss : 1; /* bit 45 */
- u32 fcs : 1; /* bit 46 */
- u32 : 1;
- u32 ext_mb : 1; /* bit 48 */
- u32 : 7;
- u32 aif_tdd : 1; /* bit 56 */
- u32 : 1;
- u32 qebsm : 1; /* bit 58 */
- u32 : 2;
- u32 aiv : 1; /* bit 61 */
- u32 : 5;
- u32 aif_osa : 1; /* bit 67 */
- u32 : 12;
- u32 eadm_rf : 1; /* bit 80 */
- u32 : 1;
- u32 cib : 1; /* bit 82 */
- u32 : 5;
- u32 fcx : 1; /* bit 88 */
- u32 : 19;
- u32 alt_ssi : 1; /* bit 108 */
- u32 : 1;
- u32 narf : 1; /* bit 110 */
- u32 : 12;
- u32 util_str : 1;/* bit 123 */
+ u64 dynio : 1; /* bit 12 */
+ u64 : 4;
+ u64 eadm : 1; /* bit 17 */
+ u64 : 23;
+ u64 aif : 1; /* bit 41 */
+ u64 : 3;
+ u64 mcss : 1; /* bit 45 */
+ u64 fcs : 1; /* bit 46 */
+ u64 : 1;
+ u64 ext_mb : 1; /* bit 48 */
+ u64 : 7;
+ u64 aif_tdd : 1; /* bit 56 */
+ u64 : 1;
+ u64 qebsm : 1; /* bit 58 */
+ u64 : 2;
+ u64 aiv : 1; /* bit 61 */
+ u64 : 2;
+
+ u64 : 3;
+ u64 aif_osa : 1; /* bit 67 */
+ u64 : 12;
+ u64 eadm_rf : 1; /* bit 80 */
+ u64 : 1;
+ u64 cib : 1; /* bit 82 */
+ u64 : 5;
+ u64 fcx : 1; /* bit 88 */
+ u64 : 19;
+ u64 alt_ssi : 1; /* bit 108 */
+ u64 : 1;
+ u64 narf : 1; /* bit 110 */
+ u64 : 12;
+ u64 util_str : 1;/* bit 123 */
} __packed;
extern struct css_general_char css_general_characteristics;
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 425e6b8b9547..6aa8499e1f62 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -114,6 +114,7 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
+#define VMX_MISC_ZERO_LEN_INS 0x40000000
/* VMFUNC functions */
#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
@@ -351,11 +352,13 @@ enum vmcs_field {
#define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK
#define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */
+#define INTR_TYPE_RESERVED (1 << 8) /* reserved */
#define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */
#define INTR_TYPE_HARD_EXCEPTION (3 << 8) /* processor exception */
#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
#define INTR_TYPE_PRIV_SW_EXCEPTION (5 << 8) /* ICE breakpoint - undocumented */
#define INTR_TYPE_SOFT_EXCEPTION (6 << 8) /* software exception */
+#define INTR_TYPE_OTHER_EVENT (7 << 8) /* other event */
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define GUEST_INTR_STATE_STI 0x00000001
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 559a12b6184d..1689f433f3a0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
}
+static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
+}
+
+static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
+{
+ return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
+ CPU_BASED_MONITOR_TRAP_FLAG;
+}
+
static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
{
return vmcs12->cpu_based_vm_exec_control & bit;
@@ -11620,6 +11631,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
!nested_cr3_valid(vcpu, vmcs12->host_cr3))
return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
+ /*
+ * From the Intel SDM, volume 3:
+ * Fields relevant to VM-entry event injection must be set properly.
+ * These fields are the VM-entry interruption-information field, the
+ * VM-entry exception error code, and the VM-entry instruction length.
+ */
+ if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
+ u32 intr_info = vmcs12->vm_entry_intr_info_field;
+ u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
+ u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
+ bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
+ bool should_have_error_code;
+ bool urg = nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_UNRESTRICTED_GUEST);
+ bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
+
+ /* VM-entry interruption-info field: interruption type */
+ if (intr_type == INTR_TYPE_RESERVED ||
+ (intr_type == INTR_TYPE_OTHER_EVENT &&
+ !nested_cpu_supports_monitor_trap_flag(vcpu)))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry interruption-info field: vector */
+ if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
+ (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
+ (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry interruption-info field: deliver error code */
+ should_have_error_code =
+ intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
+ x86_exception_has_error_code(vector);
+ if (has_error_code != should_have_error_code)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry exception error code */
+ if (has_error_code &&
+ vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry interruption-info field: reserved bits */
+ if (intr_info & INTR_INFO_RESVD_BITS_MASK)
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+ /* VM-entry instruction length */
+ switch (intr_type) {
+ case INTR_TYPE_SOFT_EXCEPTION:
+ case INTR_TYPE_SOFT_INTR:
+ case INTR_TYPE_PRIV_SW_EXCEPTION:
+ if ((vmcs12->vm_entry_instruction_len > 15) ||
+ (vmcs12->vm_entry_instruction_len == 0 &&
+ !nested_cpu_has_zero_length_injection(vcpu)))
+ return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+ }
+ }
+
return 0;
}
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 331993c49dae..257f27620bc2 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -110,6 +110,15 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
#endif
}
+static inline bool x86_exception_has_error_code(unsigned int vector)
+{
+ static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
+ BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) |
+ BIT(PF_VECTOR) | BIT(AC_VECTOR);
+
+ return (1U << vector) & exception_has_error_code;
+}
+
static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
{
return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c9081c6671f0..3b5318505c69 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -65,6 +65,13 @@ __read_mostly int xen_have_vector_callback;
EXPORT_SYMBOL_GPL(xen_have_vector_callback);
/*
+ * NB: needs to live in .data because it's used by xen_prepare_pvh which runs
+ * before clearing the bss.
+ */
+uint32_t xen_start_flags __attribute__((section(".data"))) = 0;
+EXPORT_SYMBOL(xen_start_flags);
+
+/*
* Point at some empty memory to start with. We map the real shared_info
* page as soon as fixmap is up and running.
*/
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 357969a3697c..8d4e2e1ae60b 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1203,6 +1203,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
return;
xen_domain_type = XEN_PV_DOMAIN;
+ xen_start_flags = xen_start_info->flags;
xen_setup_features();
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index aa1c6a6831a9..c85d1a88f476 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -97,6 +97,7 @@ void __init xen_prepare_pvh(void)
}
xen_pvh = 1;
+ xen_start_flags = pvh_start_info.flags;
msr = cpuid_ebx(xen_cpuid_base() + 2);
pfn = __pa(hypercall_page);