summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm64/Kconfig51
-rw-r--r--arch/arm64/Makefile2
-rw-r--r--arch/arm64/include/asm/Kbuild2
-rw-r--r--arch/arm64/include/asm/acpi.h19
-rw-r--r--arch/arm64/include/asm/assembler.h82
-rw-r--r--arch/arm64/include/asm/atomic_ll_sc.h63
-rw-r--r--arch/arm64/include/asm/atomic_lse.h48
-rw-r--r--arch/arm64/include/asm/barrier.h4
-rw-r--r--arch/arm64/include/asm/cmpxchg.h116
-rw-r--r--arch/arm64/include/asm/cpucaps.h5
-rw-r--r--arch/arm64/include/asm/cpufeature.h3
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/elf.h4
-rw-r--r--arch/arm64/include/asm/ftrace.h1
-rw-r--r--arch/arm64/include/asm/insn.h8
-rw-r--r--arch/arm64/include/asm/io.h32
-rw-r--r--arch/arm64/include/asm/memory.h7
-rw-r--r--arch/arm64/include/asm/mmu_context.h5
-rw-r--r--arch/arm64/include/asm/module.h44
-rw-r--r--arch/arm64/include/asm/neon-intrinsics.h39
-rw-r--r--arch/arm64/include/asm/percpu.h390
-rw-r--r--arch/arm64/include/asm/pgtable-hwdef.h10
-rw-r--r--arch/arm64/include/asm/pgtable.h22
-rw-r--r--arch/arm64/include/asm/preempt.h89
-rw-r--r--arch/arm64/include/asm/processor.h35
-rw-r--r--arch/arm64/include/asm/smp.h15
-rw-r--r--arch/arm64/include/asm/sysreg.h6
-rw-r--r--arch/arm64/include/asm/thread_info.h13
-rw-r--r--arch/arm64/include/asm/tlbflush.h15
-rw-r--r--arch/arm64/include/asm/uaccess.h3
-rw-r--r--arch/arm64/include/asm/xor.h73
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h1
-rw-r--r--arch/arm64/kernel/Makefile2
-rw-r--r--arch/arm64/kernel/arm64ksyms.c88
-rw-r--r--arch/arm64/kernel/cpu_errata.c109
-rw-r--r--arch/arm64/kernel/cpufeature.c181
-rw-r--r--arch/arm64/kernel/cpuinfo.c1
-rw-r--r--arch/arm64/kernel/entry-ftrace.S55
-rw-r--r--arch/arm64/kernel/entry.S6
-rw-r--r--arch/arm64/kernel/ftrace.c4
-rw-r--r--arch/arm64/kernel/head.S38
-rw-r--r--arch/arm64/kernel/hibernate-asm.S1
-rw-r--r--arch/arm64/kernel/image.h46
-rw-r--r--arch/arm64/kernel/insn.c29
-rw-r--r--arch/arm64/kernel/module-plts.c135
-rw-r--r--arch/arm64/kernel/module.c13
-rw-r--r--arch/arm64/kernel/smccc-call.S4
-rw-r--r--arch/arm64/kernel/smp.c7
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S9
-rw-r--r--arch/arm64/kvm/hyp/entry.S1
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S4
-rw-r--r--arch/arm64/lib/Makefile6
-rw-r--r--arch/arm64/lib/clear_page.S1
-rw-r--r--arch/arm64/lib/clear_user.S2
-rw-r--r--arch/arm64/lib/copy_from_user.S4
-rw-r--r--arch/arm64/lib/copy_in_user.S4
-rw-r--r--arch/arm64/lib/copy_page.S1
-rw-r--r--arch/arm64/lib/copy_to_user.S4
-rw-r--r--arch/arm64/lib/crc32.S54
-rw-r--r--arch/arm64/lib/memchr.S1
-rw-r--r--arch/arm64/lib/memcmp.S1
-rw-r--r--arch/arm64/lib/memcpy.S2
-rw-r--r--arch/arm64/lib/memmove.S2
-rw-r--r--arch/arm64/lib/memset.S2
-rw-r--r--arch/arm64/lib/strchr.S1
-rw-r--r--arch/arm64/lib/strcmp.S1
-rw-r--r--arch/arm64/lib/strlen.S1
-rw-r--r--arch/arm64/lib/strncmp.S1
-rw-r--r--arch/arm64/lib/strnlen.S1
-rw-r--r--arch/arm64/lib/strrchr.S1
-rw-r--r--arch/arm64/lib/tishift.S5
-rw-r--r--arch/arm64/lib/xor-neon.c184
-rw-r--r--arch/arm64/mm/cache.S3
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/mm/hugetlbpage.c33
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/mmu.c17
-rw-r--r--arch/arm64/mm/pageattr.c21
-rw-r--r--arch/arm64/mm/proc.S14
-rw-r--r--arch/s390/include/asm/preempt.h2
-rw-r--r--arch/x86/include/asm/preempt.h3
-rw-r--r--drivers/firmware/efi/arm-runtime.c2
-rw-r--r--drivers/firmware/efi/libstub/arm-stub.c2
-rw-r--r--include/linux/linkage.h6
-rw-r--r--include/linux/preempt.h3
-rw-r--r--mm/mmap.c25
86 files changed, 1575 insertions, 783 deletions
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a68bc6cc2167..090f008835f5 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -313,9 +313,13 @@ menu "Kernel Features"
menu "ARM errata workarounds via the alternatives framework"
+config ARM64_WORKAROUND_CLEAN_CACHE
+ def_bool n
+
config ARM64_ERRATUM_826319
bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted"
default y
+ select ARM64_WORKAROUND_CLEAN_CACHE
help
This option adds an alternative code sequence to work around ARM
erratum 826319 on Cortex-A53 parts up to r0p2 with an AMBA 4 ACE or
@@ -337,6 +341,7 @@ config ARM64_ERRATUM_826319
config ARM64_ERRATUM_827319
bool "Cortex-A53: 827319: Data cache clean instructions might cause overlapping transactions to the interconnect"
default y
+ select ARM64_WORKAROUND_CLEAN_CACHE
help
This option adds an alternative code sequence to work around ARM
erratum 827319 on Cortex-A53 parts up to r0p2 with an AMBA 5 CHI
@@ -358,6 +363,7 @@ config ARM64_ERRATUM_827319
config ARM64_ERRATUM_824069
bool "Cortex-A53: 824069: Cache line might not be marked as clean after a CleanShared snoop"
default y
+ select ARM64_WORKAROUND_CLEAN_CACHE
help
This option adds an alternative code sequence to work around ARM
erratum 824069 on Cortex-A53 parts up to r0p2 when it is connected
@@ -380,6 +386,7 @@ config ARM64_ERRATUM_824069
config ARM64_ERRATUM_819472
bool "Cortex-A53: 819472: Store exclusive instructions might cause data corruption"
default y
+ select ARM64_WORKAROUND_CLEAN_CACHE
help
This option adds an alternative code sequence to work around ARM
erratum 819472 on Cortex-A53 parts up to r0p1 with an L2 cache
@@ -687,15 +694,43 @@ config ARM64_VA_BITS_47
config ARM64_VA_BITS_48
bool "48-bit"
+config ARM64_USER_VA_BITS_52
+ bool "52-bit (user)"
+ depends on ARM64_64K_PAGES && (ARM64_PAN || !ARM64_SW_TTBR0_PAN)
+ help
+ Enable 52-bit virtual addressing for userspace when explicitly
+ requested via a hint to mmap(). The kernel will continue to
+ use 48-bit virtual addresses for its own mappings.
+
+ NOTE: Enabling 52-bit virtual addressing in conjunction with
+ ARMv8.3 Pointer Authentication will result in the PAC being
+ reduced from 7 bits to 3 bits, which may have a significant
+ impact on its susceptibility to brute-force attacks.
+
+ If unsure, select 48-bit virtual addressing instead.
+
endchoice
+config ARM64_FORCE_52BIT
+ bool "Force 52-bit virtual addresses for userspace"
+ depends on ARM64_USER_VA_BITS_52 && EXPERT
+ help
+ For systems with 52-bit userspace VAs enabled, the kernel will attempt
+ to maintain compatibility with older software by providing 48-bit VAs
+ unless a hint is supplied to mmap.
+
+ This configuration option disables the 48-bit compatibility logic, and
+ forces all userspace addresses to be 52-bit on HW that supports it. One
+ should only enable this configuration option for stress testing userspace
+ memory management code. If unsure say N here.
+
config ARM64_VA_BITS
int
default 36 if ARM64_VA_BITS_36
default 39 if ARM64_VA_BITS_39
default 42 if ARM64_VA_BITS_42
default 47 if ARM64_VA_BITS_47
- default 48 if ARM64_VA_BITS_48
+ default 48 if ARM64_VA_BITS_48 || ARM64_USER_VA_BITS_52
choice
prompt "Physical address space size"
@@ -970,6 +1005,20 @@ config ARM64_SSBD
If unsure, say Y.
+config RODATA_FULL_DEFAULT_ENABLED
+ bool "Apply r/o permissions of VM areas also to their linear aliases"
+ default y
+ help
+ Apply read-only attributes of VM areas to the linear alias of
+ the backing pages as well. This prevents code or read-only data
+ from being modified (inadvertently or intentionally) via another
+ mapping of the same memory page. This additional enhancement can
+ be turned off at runtime by passing rodata=[off|on] (and turned on
+ with rodata=full if this option is set to 'n')
+
+ This requires the linear region to be mapped down to pages,
+ which may adversely affect performance in some cases.
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
depends on COMPAT
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 6cb9fc7e9382..8978f60779c4 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -18,7 +18,7 @@ ifeq ($(CONFIG_RELOCATABLE), y)
# Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour
# for relative relocs, since this leads to better Image compression
# with the relocation offsets always being zero.
-LDFLAGS_vmlinux += -pie -shared -Bsymbolic \
+LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro \
$(call ld-option, --no-apply-dynamic-relocs)
endif
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 6cd5d77b6b44..1e17ea5c372b 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
generic-y += msi.h
-generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += rwsem.h
@@ -27,4 +26,3 @@ generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += user.h
generic-y += vga.h
-generic-y += xor.h
diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index 709208dfdc8b..2def77ec14be 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -22,12 +22,23 @@
#include <asm/tlbflush.h>
/* Macros for consistency checks of the GICC subtable of MADT */
-#define ACPI_MADT_GICC_LENGTH \
- (acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
+
+/*
+ * MADT GICC minimum length refers to the MADT GICC structure table length as
+ * defined in the earliest ACPI version supported on arm64, ie ACPI 5.1.
+ *
+ * The efficiency_class member was added to the
+ * struct acpi_madt_generic_interrupt to represent the MADT GICC structure
+ * "Processor Power Efficiency Class" field, added in ACPI 6.0 whose offset
+ * is therefore used to delimit the MADT GICC structure minimum length
+ * appropriately.
+ */
+#define ACPI_MADT_GICC_MIN_LENGTH ACPI_OFFSET( \
+ struct acpi_madt_generic_interrupt, efficiency_class)
#define BAD_MADT_GICC_ENTRY(entry, end) \
- (!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \
- (unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end))
+ (!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
+ (unsigned long)(entry) + (entry)->header.length > (end))
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 6142402c2eb4..ce985f13dce5 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -23,6 +23,8 @@
#ifndef __ASM_ASSEMBLER_H
#define __ASM_ASSEMBLER_H
+#include <asm-generic/export.h>
+
#include <asm/asm-offsets.h>
#include <asm/cpufeature.h>
#include <asm/debug-monitors.h>
@@ -123,6 +125,19 @@
.endm
/*
+ * Speculation barrier
+ */
+ .macro sb
+alternative_if_not ARM64_HAS_SB
+ dsb nsh
+ isb
+alternative_else
+ SB_BARRIER_INSN
+ nop
+alternative_endif
+ .endm
+
+/*
* Sanitise a 64-bit bounded index wrt speculation, returning zero if out
* of bounds.
*/
@@ -342,11 +357,10 @@ alternative_endif
.endm
/*
- * tcr_set_idmap_t0sz - update TCR.T0SZ so that we can load the ID map
+ * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
*/
- .macro tcr_set_idmap_t0sz, valreg, tmpreg
- ldr_l \tmpreg, idmap_t0sz
- bfi \valreg, \tmpreg, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
+ .macro tcr_set_t0sz, valreg, t0sz
+ bfi \valreg, \t0sz, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
.endm
/*
@@ -377,27 +391,33 @@ alternative_endif
* size: size of the region
* Corrupts: kaddr, size, tmp1, tmp2
*/
+ .macro __dcache_op_workaround_clean_cache, op, kaddr
+alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
+ dc \op, \kaddr
+alternative_else
+ dc civac, \kaddr
+alternative_endif
+ .endm
+
.macro dcache_by_line_op op, domain, kaddr, size, tmp1, tmp2
dcache_line_size \tmp1, \tmp2
add \size, \kaddr, \size
sub \tmp2, \tmp1, #1
bic \kaddr, \kaddr, \tmp2
9998:
- .if (\op == cvau || \op == cvac)
-alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
- dc \op, \kaddr
-alternative_else
- dc civac, \kaddr
-alternative_endif
- .elseif (\op == cvap)
-alternative_if ARM64_HAS_DCPOP
- sys 3, c7, c12, 1, \kaddr // dc cvap
-alternative_else
- dc cvac, \kaddr
-alternative_endif
+ .ifc \op, cvau
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvac
+ __dcache_op_workaround_clean_cache \op, \kaddr
+ .else
+ .ifc \op, cvap
+ sys 3, c7, c12, 1, \kaddr // dc cvap
.else
dc \op, \kaddr
.endif
+ .endif
+ .endif
add \kaddr, \kaddr, \tmp1
cmp \kaddr, \size
b.lo 9998b
@@ -477,6 +497,13 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
#else
#define NOKPROBE(x)
#endif
+
+#ifdef CONFIG_KASAN
+#define EXPORT_SYMBOL_NOKASAN(name)
+#else
+#define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
+#endif
+
/*
* Emit a 64-bit absolute little endian symbol reference in a way that
* ensures that it will be resolved at build time, even when building a
@@ -516,6 +543,29 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
.endm
/*
+ * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
+ * orr is used as it can cover the immediate value (and is idempotent).
+ * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
+ * ttbr: Value of ttbr to set, modified.
+ */
+ .macro offset_ttbr1, ttbr
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+ orr \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
+#endif
+ .endm
+
+/*
+ * Perform the reverse of offset_ttbr1.
+ * bic is used as it can cover the immediate value and, in future, won't need
+ * to be nop'ed out when dealing with 52-bit kernel VAs.
+ */
+ .macro restore_ttbr1, ttbr
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+ bic \ttbr, \ttbr, #TTBR1_BADDR_4852_OFFSET
+#endif
+ .endm
+
+/*
* Arrange a physical address in a TTBR register, taking care of 52-bit
* addresses.
*
diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h
index f5a2d09afb38..af7b99005453 100644
--- a/arch/arm64/include/asm/atomic_ll_sc.h
+++ b/arch/arm64/include/asm/atomic_ll_sc.h
@@ -248,48 +248,57 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
}
__LL_SC_EXPORT(atomic64_dec_if_positive);
-#define __CMPXCHG_CASE(w, sz, name, mb, acq, rel, cl) \
-__LL_SC_INLINE unsigned long \
-__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \
- unsigned long old, \
- unsigned long new)) \
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, acq, rel, cl) \
+__LL_SC_INLINE u##sz \
+__LL_SC_PREFIX(__cmpxchg_case_##name##sz(volatile void *ptr, \
+ unsigned long old, \
+ u##sz new)) \
{ \
- unsigned long tmp, oldval; \
+ unsigned long tmp; \
+ u##sz oldval; \
+ \
+ /* \
+ * Sub-word sizes require explicit casting so that the compare \
+ * part of the cmpxchg doesn't end up interpreting non-zero \
+ * upper bits of the register containing "old". \
+ */ \
+ if (sz < 32) \
+ old = (u##sz)old; \
\
asm volatile( \
" prfm pstl1strm, %[v]\n" \
- "1: ld" #acq "xr" #sz "\t%" #w "[oldval], %[v]\n" \
+ "1: ld" #acq "xr" #sfx "\t%" #w "[oldval], %[v]\n" \
" eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \
" cbnz %" #w "[tmp], 2f\n" \
- " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \
+ " st" #rel "xr" #sfx "\t%w[tmp], %" #w "[new], %[v]\n" \
" cbnz %w[tmp], 1b\n" \
" " #mb "\n" \
"2:" \
: [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \
- [v] "+Q" (*(unsigned long *)ptr) \
- : [old] "Lr" (old), [new] "r" (new) \
+ [v] "+Q" (*(u##sz *)ptr) \
+ : [old] "Kr" (old), [new] "r" (new) \
: cl); \
\
return oldval; \
} \
-__LL_SC_EXPORT(__cmpxchg_case_##name);
+__LL_SC_EXPORT(__cmpxchg_case_##name##sz);
-__CMPXCHG_CASE(w, b, 1, , , , )
-__CMPXCHG_CASE(w, h, 2, , , , )
-__CMPXCHG_CASE(w, , 4, , , , )
-__CMPXCHG_CASE( , , 8, , , , )
-__CMPXCHG_CASE(w, b, acq_1, , a, , "memory")
-__CMPXCHG_CASE(w, h, acq_2, , a, , "memory")
-__CMPXCHG_CASE(w, , acq_4, , a, , "memory")
-__CMPXCHG_CASE( , , acq_8, , a, , "memory")
-__CMPXCHG_CASE(w, b, rel_1, , , l, "memory")
-__CMPXCHG_CASE(w, h, rel_2, , , l, "memory")
-__CMPXCHG_CASE(w, , rel_4, , , l, "memory")
-__CMPXCHG_CASE( , , rel_8, , , l, "memory")
-__CMPXCHG_CASE(w, b, mb_1, dmb ish, , l, "memory")
-__CMPXCHG_CASE(w, h, mb_2, dmb ish, , l, "memory")
-__CMPXCHG_CASE(w, , mb_4, dmb ish, , l, "memory")
-__CMPXCHG_CASE( , , mb_8, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, b, , 8, , , , )
+__CMPXCHG_CASE(w, h, , 16, , , , )
+__CMPXCHG_CASE(w, , , 32, , , , )
+__CMPXCHG_CASE( , , , 64, , , , )
+__CMPXCHG_CASE(w, b, acq_, 8, , a, , "memory")
+__CMPXCHG_CASE(w, h, acq_, 16, , a, , "memory")
+__CMPXCHG_CASE(w, , acq_, 32, , a, , "memory")
+__CMPXCHG_CASE( , , acq_, 64, , a, , "memory")
+__CMPXCHG_CASE(w, b, rel_, 8, , , l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16, , , l, "memory")
+__CMPXCHG_CASE(w, , rel_, 32, , , l, "memory")
+__CMPXCHG_CASE( , , rel_, 64, , , l, "memory")
+__CMPXCHG_CASE(w, b, mb_, 8, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, h, mb_, 16, dmb ish, , l, "memory")
+__CMPXCHG_CASE(w, , mb_, 32, dmb ish, , l, "memory")
+__CMPXCHG_CASE( , , mb_, 64, dmb ish, , l, "memory")
#undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h
index f9b0b09153e0..a424355240c5 100644
--- a/arch/arm64/include/asm/atomic_lse.h
+++ b/arch/arm64/include/asm/atomic_lse.h
@@ -446,22 +446,22 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
#define __LL_SC_CMPXCHG(op) __LL_SC_CALL(__cmpxchg_case_##op)
-#define __CMPXCHG_CASE(w, sz, name, mb, cl...) \
-static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
- unsigned long old, \
- unsigned long new) \
+#define __CMPXCHG_CASE(w, sfx, name, sz, mb, cl...) \
+static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \
+ u##sz old, \
+ u##sz new) \
{ \
register unsigned long x0 asm ("x0") = (unsigned long)ptr; \
- register unsigned long x1 asm ("x1") = old; \
- register unsigned long x2 asm ("x2") = new; \
+ register u##sz x1 asm ("x1") = old; \
+ register u##sz x2 asm ("x2") = new; \
\
asm volatile(ARM64_LSE_ATOMIC_INSN( \
/* LL/SC */ \
- __LL_SC_CMPXCHG(name) \
+ __LL_SC_CMPXCHG(name##sz) \
__nops(2), \
/* LSE atomics */ \
" mov " #w "30, %" #w "[old]\n" \
- " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \
+ " cas" #mb #sfx "\t" #w "30, %" #w "[new], %[v]\n" \
" mov %" #w "[ret], " #w "30") \
: [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \
: [old] "r" (x1), [new] "r" (x2) \
@@ -470,22 +470,22 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \
return x0; \
}
-__CMPXCHG_CASE(w, b, 1, )
-__CMPXCHG_CASE(w, h, 2, )
-__CMPXCHG_CASE(w, , 4, )
-__CMPXCHG_CASE(x, , 8, )
-__CMPXCHG_CASE(w, b, acq_1, a, "memory")
-__CMPXCHG_CASE(w, h, acq_2, a, "memory")
-__CMPXCHG_CASE(w, , acq_4, a, "memory")
-__CMPXCHG_CASE(x, , acq_8, a, "memory")
-__CMPXCHG_CASE(w, b, rel_1, l, "memory")
-__CMPXCHG_CASE(w, h, rel_2, l, "memory")
-__CMPXCHG_CASE(w, , rel_4, l, "memory")
-__CMPXCHG_CASE(x, , rel_8, l, "memory")
-__CMPXCHG_CASE(w, b, mb_1, al, "memory")
-__CMPXCHG_CASE(w, h, mb_2, al, "memory")
-__CMPXCHG_CASE(w, , mb_4, al, "memory")
-__CMPXCHG_CASE(x, , mb_8, al, "memory")
+__CMPXCHG_CASE(w, b, , 8, )
+__CMPXCHG_CASE(w, h, , 16, )
+__CMPXCHG_CASE(w, , , 32, )
+__CMPXCHG_CASE(x, , , 64, )
+__CMPXCHG_CASE(w, b, acq_, 8, a, "memory")
+__CMPXCHG_CASE(w, h, acq_, 16, a, "memory")
+__CMPXCHG_CASE(w, , acq_, 32, a, "memory")
+__CMPXCHG_CASE(x, , acq_, 64, a, "memory")
+__CMPXCHG_CASE(w, b, rel_, 8, l, "memory")
+__CMPXCHG_CASE(w, h, rel_, 16, l, "memory")
+__CMPXCHG_CASE(w, , rel_, 32, l, "memory")
+__CMPXCHG_CASE(x, , rel_, 64, l, "memory")
+__CMPXCHG_CASE(w, b, mb_, 8, al, "memory")
+__CMPXCHG_CASE(w, h, mb_, 16, al, "memory")
+__CMPXCHG_CASE(w, , mb_, 32, al, "memory")
+__CMPXCHG_CASE(x, , mb_, 64, al, "memory")
#undef __LL_SC_CMPXCHG
#undef __CMPXCHG_CASE
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 822a9192c551..f66bb04fdf2d 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -34,6 +34,10 @@
#define psb_csync() asm volatile("hint #17" : : : "memory")
#define csdb() asm volatile("hint #20" : : : "memory")
+#define spec_bar() asm volatile(ALTERNATIVE("dsb nsh\nisb\n", \
+ SB_BARRIER_INSN"nop\n", \
+ ARM64_HAS_SB))
+
#define mb() dsb(sy)
#define rmb() dsb(ld)
#define wmb() dsb(st)
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index 3b0938281541..3f9376f1c409 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -30,46 +30,46 @@
* barrier case is generated as release+dmb for the former and
* acquire+release for the latter.
*/
-#define __XCHG_CASE(w, sz, name, mb, nop_lse, acq, acq_lse, rel, cl) \
-static inline unsigned long __xchg_case_##name(unsigned long x, \
- volatile void *ptr) \
-{ \
- unsigned long ret, tmp; \
- \
- asm volatile(ARM64_LSE_ATOMIC_INSN( \
- /* LL/SC */ \
- " prfm pstl1strm, %2\n" \
- "1: ld" #acq "xr" #sz "\t%" #w "0, %2\n" \
- " st" #rel "xr" #sz "\t%w1, %" #w "3, %2\n" \
- " cbnz %w1, 1b\n" \
- " " #mb, \
- /* LSE atomics */ \
- " swp" #acq_lse #rel #sz "\t%" #w "3, %" #w "0, %2\n" \
- __nops(3) \
- " " #nop_lse) \
- : "=&r" (ret), "=&r" (tmp), "+Q" (*(unsigned long *)ptr) \
- : "r" (x) \
- : cl); \
- \
- return ret; \
+#define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \
+static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \
+{ \
+ u##sz ret; \
+ unsigned long tmp; \
+ \
+ asm volatile(ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ " prfm pstl1strm, %2\n" \
+ "1: ld" #acq "xr" #sfx "\t%" #w "0, %2\n" \
+ " st" #rel "xr" #sfx "\t%w1, %" #w "3, %2\n" \
+ " cbnz %w1, 1b\n" \
+ " " #mb, \
+ /* LSE atomics */ \
+ " swp" #acq_lse #rel #sfx "\t%" #w "3, %" #w "0, %2\n" \
+ __nops(3) \
+ " " #nop_lse) \
+ : "=&r" (ret), "=&r" (tmp), "+Q" (*(u##sz *)ptr) \
+ : "r" (x) \
+ : cl); \
+ \
+ return ret; \
}
-__XCHG_CASE(w, b, 1, , , , , , )
-__XCHG_CASE(w, h, 2, , , , , , )
-__XCHG_CASE(w, , 4, , , , , , )
-__XCHG_CASE( , , 8, , , , , , )
-__XCHG_CASE(w, b, acq_1, , , a, a, , "memory")
-__XCHG_CASE(w, h, acq_2, , , a, a, , "memory")
-__XCHG_CASE(w, , acq_4, , , a, a, , "memory")
-__XCHG_CASE( , , acq_8, , , a, a, , "memory")
-__XCHG_CASE(w, b, rel_1, , , , , l, "memory")
-__XCHG_CASE(w, h, rel_2, , , , , l, "memory")
-__XCHG_CASE(w, , rel_4, , , , , l, "memory")
-__XCHG_CASE( , , rel_8, , , , , l, "memory")
-__XCHG_CASE(w, b, mb_1, dmb ish, nop, , a, l, "memory")
-__XCHG_CASE(w, h, mb_2, dmb ish, nop, , a, l, "memory")
-__XCHG_CASE(w, , mb_4, dmb ish, nop, , a, l, "memory")
-__XCHG_CASE( , , mb_8, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, b, , 8, , , , , , )
+__XCHG_CASE(w, h, , 16, , , , , , )
+__XCHG_CASE(w, , , 32, , , , , , )
+__XCHG_CASE( , , , 64, , , , , , )
+__XCHG_CASE(w, b, acq_, 8, , , a, a, , "memory")
+__XCHG_CASE(w, h, acq_, 16, , , a, a, , "memory")
+__XCHG_CASE(w, , acq_, 32, , , a, a, , "memory")
+__XCHG_CASE( , , acq_, 64, , , a, a, , "memory")
+__XCHG_CASE(w, b, rel_, 8, , , , , l, "memory")
+__XCHG_CASE(w, h, rel_, 16, , , , , l, "memory")
+__XCHG_CASE(w, , rel_, 32, , , , , l, "memory")
+__XCHG_CASE( , , rel_, 64, , , , , l, "memory")
+__XCHG_CASE(w, b, mb_, 8, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, h, mb_, 16, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE(w, , mb_, 32, dmb ish, nop, , a, l, "memory")
+__XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory")
#undef __XCHG_CASE
@@ -80,13 +80,13 @@ static inline unsigned long __xchg##sfx(unsigned long x, \
{ \
switch (size) { \
case 1: \
- return __xchg_case##sfx##_1(x, ptr); \
+ return __xchg_case##sfx##_8(x, ptr); \
case 2: \
- return __xchg_case##sfx##_2(x, ptr); \
+ return __xchg_case##sfx##_16(x, ptr); \
case 4: \
- return __xchg_case##sfx##_4(x, ptr); \
+ return __xchg_case##sfx##_32(x, ptr); \
case 8: \
- return __xchg_case##sfx##_8(x, ptr); \
+ return __xchg_case##sfx##_64(x, ptr); \
default: \
BUILD_BUG(); \
} \
@@ -123,13 +123,13 @@ static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \
{ \
switch (size) { \
case 1: \
- return __cmpxchg_case##sfx##_1(ptr, (u8)old, new); \
+ return __cmpxchg_case##sfx##_8(ptr, old, new); \
case 2: \
- return __cmpxchg_case##sfx##_2(ptr, (u16)old, new); \
+ return __cmpxchg_case##sfx##_16(ptr, old, new); \
case 4: \
- return __cmpxchg_case##sfx##_4(ptr, old, new); \
+ return __cmpxchg_case##sfx##_32(ptr, old, new); \
case 8: \
- return __cmpxchg_case##sfx##_8(ptr, old, new); \
+ return __cmpxchg_case##sfx##_64(ptr, old, new); \
default: \
BUILD_BUG(); \
} \
@@ -197,16 +197,16 @@ __CMPXCHG_GEN(_mb)
__ret; \
})
-#define __CMPWAIT_CASE(w, sz, name) \
-static inline void __cmpwait_case_##name(volatile void *ptr, \
- unsigned long val) \
+#define __CMPWAIT_CASE(w, sfx, sz) \
+static inline void __cmpwait_case_##sz(volatile void *ptr, \
+ unsigned long val) \
{ \
unsigned long tmp; \
\
asm volatile( \
" sevl\n" \
" wfe\n" \
- " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \
+ " ldxr" #sfx "\t%" #w "[tmp], %[v]\n" \
" eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
" cbnz %" #w "[tmp], 1f\n" \
" wfe\n" \
@@ -215,10 +215,10 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \
: [val] "r" (val)); \
}
-__CMPWAIT_CASE(w, b, 1);
-__CMPWAIT_CASE(w, h, 2);
-__CMPWAIT_CASE(w, , 4);
-__CMPWAIT_CASE( , , 8);
+__CMPWAIT_CASE(w, b, 8);
+__CMPWAIT_CASE(w, h, 16);
+__CMPWAIT_CASE(w, , 32);
+__CMPWAIT_CASE( , , 64);
#undef __CMPWAIT_CASE
@@ -229,13 +229,13 @@ static inline void __cmpwait##sfx(volatile void *ptr, \
{ \
switch (size) { \
case 1: \
- return __cmpwait_case##sfx##_1(ptr, (u8)val); \
+ return __cmpwait_case##sfx##_8(ptr, (u8)val); \
case 2: \
- return __cmpwait_case##sfx##_2(ptr, (u16)val); \
+ return __cmpwait_case##sfx##_16(ptr, (u16)val); \
case 4: \
- return __cmpwait_case##sfx##_4(ptr, val); \
+ return __cmpwait_case##sfx##_32(ptr, val); \
case 8: \
- return __cmpwait_case##sfx##_8(ptr, val); \
+ return __cmpwait_case##sfx##_64(ptr, val); \
default: \
BUILD_BUG(); \
} \
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 62d8cd15fdf2..a89f587d4842 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -54,8 +54,9 @@
#define ARM64_HAS_CRC32 33
#define ARM64_SSBS 34
#define ARM64_WORKAROUND_1188873 35
-#define ARM64_WORKAROUND_1165522 36
+#define ARM64_HAS_SB 36
+#define ARM64_WORKAROUND_1165522 37
-#define ARM64_NCAPS 37
+#define ARM64_NCAPS 38
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 7e2ec64aa414..0a15e2c55f1b 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -357,6 +357,9 @@ extern DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS];
extern struct static_key_false arm64_const_caps_ready;
+#define for_each_available_cap(cap) \
+ for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
+
bool this_cpu_has_cap(unsigned int cap);
static inline bool cpu_have_feature(unsigned int num)
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 12f93e4d2452..951ed1a4e5c9 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -151,6 +151,8 @@ struct midr_range {
.rv_max = MIDR_CPU_VAR_REV(v_max, r_max), \
}
+#define MIDR_REV_RANGE(m, v, r_min, r_max) MIDR_RANGE(m, v, r_min, v, r_max)
+#define MIDR_REV(m, v, r) MIDR_RANGE(m, v, r, v, r)
#define MIDR_ALL_VERSIONS(m) MIDR_RANGE(m, 0, 0, 0xf, 0xf)
static inline bool is_midr_in_range(u32 midr, struct midr_range const *range)
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 433b9554c6a1..6adc1a90e7e6 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -117,7 +117,11 @@
* 64-bit, this is above 4GB to leave the entire 32-bit address
* space open for things that want to use the area for 32-bit pointers.
*/
+#ifdef CONFIG_ARM64_FORCE_52BIT
#define ELF_ET_DYN_BASE (2 * TASK_SIZE_64 / 3)
+#else
+#define ELF_ET_DYN_BASE (2 * DEFAULT_MAP_WINDOW_64 / 3)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index caa955f10e19..6795c147cbcc 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -13,6 +13,7 @@
#include <asm/insn.h>
+#define HAVE_FUNCTION_GRAPH_FP_TEST
#define MCOUNT_ADDR ((unsigned long)_mcount)
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index c6802dea6cab..9c01f04db64d 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -261,6 +261,11 @@ enum aarch64_insn_prfm_policy {
AARCH64_INSN_PRFM_POLICY_STRM,
};
+enum aarch64_insn_adr_type {
+ AARCH64_INSN_ADR_TYPE_ADRP,
+ AARCH64_INSN_ADR_TYPE_ADR,
+};
+
#define __AARCH64_INSN_FUNCS(abbr, mask, val) \
static __always_inline bool aarch64_insn_is_##abbr(u32 code) \
{ return (code & (mask)) == (val); } \
@@ -393,6 +398,9 @@ u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant,
enum aarch64_insn_adsb_type type);
+u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_adr_type type);
u32 aarch64_insn_gen_bitfield(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int immr, int imms,
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 9f8b915af3a7..ee723835c1f4 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -104,7 +104,23 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
}
/* IO barriers */
-#define __iormb() rmb()
+#define __iormb(v) \
+({ \
+ unsigned long tmp; \
+ \
+ rmb(); \
+ \
+ /* \
+ * Create a dummy control dependency from the IO read to any \
+ * later instructions. This ensures that a subsequent call to \
+ * udelay() will be ordered due to the ISB in get_cycles(). \
+ */ \
+ asm volatile("eor %0, %1, %1\n" \
+ "cbnz %0, ." \
+ : "=r" (tmp) : "r" ((unsigned long)(v)) \
+ : "memory"); \
+})
+
#define __iowmb() wmb()
#define mmiowb() do { } while (0)
@@ -129,10 +145,10 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
* following Normal memory access. Writes are ordered relative to any prior
* Normal memory access.
*/
-#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(); __v; })
-#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(); __v; })
-#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(); __v; })
-#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(); __v; })
+#define readb(c) ({ u8 __v = readb_relaxed(c); __iormb(__v); __v; })
+#define readw(c) ({ u16 __v = readw_relaxed(c); __iormb(__v); __v; })
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); })
#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); })
@@ -183,9 +199,9 @@ extern void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size);
/*
* io{read,write}{16,32,64}be() macros
*/
-#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(); __v; })
-#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(); __v; })
-#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(); __v; })
+#define ioread16be(p) ({ __u16 __v = be16_to_cpu((__force __be16)__raw_readw(p)); __iormb(__v); __v; })
+#define ioread32be(p) ({ __u32 __v = be32_to_cpu((__force __be32)__raw_readl(p)); __iormb(__v); __v; })
+#define ioread64be(p) ({ __u64 __v = be64_to_cpu((__force __be64)__raw_readq(p)); __iormb(__v); __v; })
#define iowrite16be(v,p) ({ __iowmb(); __raw_writew((__force __u16)cpu_to_be16(v), p); })
#define iowrite32be(v,p) ({ __iowmb(); __raw_writel((__force __u32)cpu_to_be32(v), p); })
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index b96442960aea..56562ff01076 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -76,12 +76,17 @@
/*
* KASAN requires 1/8th of the kernel virtual address space for the shadow
* region. KASAN can bloat the stack significantly, so double the (minimum)
- * stack size when KASAN is in use.
+ * stack size when KASAN is in use, and then double it again if KASAN_EXTRA is
+ * on.
*/
#ifdef CONFIG_KASAN
#define KASAN_SHADOW_SCALE_SHIFT 3
#define KASAN_SHADOW_SIZE (UL(1) << (VA_BITS - KASAN_SHADOW_SCALE_SHIFT))
+#ifdef CONFIG_KASAN_EXTRA
+#define KASAN_THREAD_SHIFT 2
+#else
#define KASAN_THREAD_SHIFT 1
+#endif /* CONFIG_KASAN_EXTRA */
#else
#define KASAN_SHADOW_SIZE (0)
#define KASAN_THREAD_SHIFT 0
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index 1e58bf58c22b..2da3e478fd8f 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -35,6 +35,8 @@
#include <asm/sysreg.h>
#include <asm/tlbflush.h>
+extern bool rodata_full;
+
static inline void contextidr_thread_switch(struct task_struct *next)
{
if (!IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR))
@@ -72,6 +74,9 @@ extern u64 idmap_ptrs_per_pgd;
static inline bool __cpu_uses_extended_idmap(void)
{
+ if (IS_ENABLED(CONFIG_ARM64_USER_VA_BITS_52))
+ return false;
+
return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
}
diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h
index 97d0ef12e2ff..905e1bb0e7bd 100644
--- a/arch/arm64/include/asm/module.h
+++ b/arch/arm64/include/asm/module.h
@@ -22,7 +22,7 @@
#ifdef CONFIG_ARM64_MODULE_PLTS
struct mod_plt_sec {
- struct elf64_shdr *plt;
+ int plt_shndx;
int plt_num_entries;
int plt_max_entries;
};
@@ -36,10 +36,12 @@ struct mod_arch_specific {
};
#endif
-u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
+u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym);
-u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val);
+u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, u64 val);
#ifdef CONFIG_RANDOMIZE_BASE
extern u64 module_alloc_base;
@@ -56,39 +58,19 @@ struct plt_entry {
* is exactly what we are dealing with here, we are free to use x16
* as a scratch register in the PLT veneers.
*/
- __le32 mov0; /* movn x16, #0x.... */
- __le32 mov1; /* movk x16, #0x...., lsl #16 */
- __le32 mov2; /* movk x16, #0x...., lsl #32 */
+ __le32 adrp; /* adrp x16, .... */
+ __le32 add; /* add x16, x16, #0x.... */
__le32 br; /* br x16 */
};
-static inline struct plt_entry get_plt_entry(u64 val)
+static inline bool is_forbidden_offset_for_adrp(void *place)
{
- /*
- * MOVK/MOVN/MOVZ opcode:
- * +--------+------------+--------+-----------+-------------+---------+
- * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
- * +--------+------------+--------+-----------+-------------+---------+
- *
- * Rd := 0x10 (x16)
- * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
- * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
- * sf := 1 (64-bit variant)
- */
- return (struct plt_entry){
- cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
- cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
- cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
- cpu_to_le32(0xd61f0200)
- };
+ return IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_843419) &&
+ ((u64)place & 0xfff) >= 0xff8;
}
-static inline bool plt_entries_equal(const struct plt_entry *a,
- const struct plt_entry *b)
-{
- return a->mov0 == b->mov0 &&
- a->mov1 == b->mov1 &&
- a->mov2 == b->mov2;
-}
+struct plt_entry get_plt_entry(u64 dst, void *pc);
+bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b);
#endif /* __ASM_MODULE_H */
diff --git a/arch/arm64/include/asm/neon-intrinsics.h b/arch/arm64/include/asm/neon-intrinsics.h
new file mode 100644
index 000000000000..2ba6c6b9541f
--- /dev/null
+++ b/arch/arm64/include/asm/neon-intrinsics.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_NEON_INTRINSICS_H
+#define __ASM_NEON_INTRINSICS_H
+
+#include <asm-generic/int-ll64.h>
+
+/*
+ * In the kernel, u64/s64 are [un]signed long long, not [un]signed long.
+ * So by redefining these macros to the former, we can force gcc-stdint.h
+ * to define uint64_t / in64_t in a compatible manner.
+ */
+
+#ifdef __INT64_TYPE__
+#undef __INT64_TYPE__
+#define __INT64_TYPE__ long long
+#endif
+
+#ifdef __UINT64_TYPE__
+#undef __UINT64_TYPE__
+#define __UINT64_TYPE__ unsigned long long
+#endif
+
+/*
+ * genksyms chokes on the ARM NEON instrinsics system header, but we
+ * don't export anything it defines anyway, so just disregard when
+ * genksyms execute.
+ */
+#ifndef __GENKSYMS__
+#include <arm_neon.h>
+#endif
+
+#endif /* __ASM_NEON_INTRINSICS_H */
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 21a81b59a0cc..f7b1bbbb6f12 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -48,263 +48,193 @@ static inline unsigned long __my_cpu_offset(void)
}
#define __my_cpu_offset __my_cpu_offset()
-#define PERCPU_OP(op, asm_op) \
-static inline unsigned long __percpu_##op(void *ptr, \
- unsigned long val, int size) \
+#define PERCPU_RW_OPS(sz) \
+static inline unsigned long __percpu_read_##sz(void *ptr) \
{ \
- unsigned long loop, ret; \
+ return READ_ONCE(*(u##sz *)ptr); \
+} \
\
- switch (size) { \
- case 1: \
- asm ("//__per_cpu_" #op "_1\n" \
- "1: ldxrb %w[ret], %[ptr]\n" \
- #asm_op " %w[ret], %w[ret], %w[val]\n" \
- " stxrb %w[loop], %w[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u8 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- case 2: \
- asm ("//__per_cpu_" #op "_2\n" \
- "1: ldxrh %w[ret], %[ptr]\n" \
- #asm_op " %w[ret], %w[ret], %w[val]\n" \
- " stxrh %w[loop], %w[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u16 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- case 4: \
- asm ("//__per_cpu_" #op "_4\n" \
- "1: ldxr %w[ret], %[ptr]\n" \
- #asm_op " %w[ret], %w[ret], %w[val]\n" \
- " stxr %w[loop], %w[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u32 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- case 8: \
- asm ("//__per_cpu_" #op "_8\n" \
- "1: ldxr %[ret], %[ptr]\n" \
- #asm_op " %[ret], %[ret], %[val]\n" \
- " stxr %w[loop], %[ret], %[ptr]\n" \
- " cbnz %w[loop], 1b" \
- : [loop] "=&r" (loop), [ret] "=&r" (ret), \
- [ptr] "+Q"(*(u64 *)ptr) \
- : [val] "Ir" (val)); \
- break; \
- default: \
- ret = 0; \
- BUILD_BUG(); \
- } \
- \
- return ret; \
-}
-
-PERCPU_OP(add, add)
-PERCPU_OP(and, and)
-PERCPU_OP(or, orr)
-#undef PERCPU_OP
-
-static inline unsigned long __percpu_read(void *ptr, int size)
-{
- unsigned long ret;
-
- switch (size) {
- case 1:
- ret = READ_ONCE(*(u8 *)ptr);
- break;
- case 2:
- ret = READ_ONCE(*(u16 *)ptr);
- break;
- case 4:
- ret = READ_ONCE(*(u32 *)ptr);
- break;
- case 8:
- ret = READ_ONCE(*(u64 *)ptr);
- break;
- default:
- ret = 0;
- BUILD_BUG();
- }
-
- return ret;
+static inline void __percpu_write_##sz(void *ptr, unsigned long val) \
+{ \
+ WRITE_ONCE(*(u##sz *)ptr, (u##sz)val); \
}
-static inline void __percpu_write(void *ptr, unsigned long val, int size)
-{
- switch (size) {
- case 1:
- WRITE_ONCE(*(u8 *)ptr, (u8)val);
- break;
- case 2:
- WRITE_ONCE(*(u16 *)ptr, (u16)val);
- break;
- case 4:
- WRITE_ONCE(*(u32 *)ptr, (u32)val);
- break;
- case 8:
- WRITE_ONCE(*(u64 *)ptr, (u64)val);
- break;
- default:
- BUILD_BUG();
- }
+#define __PERCPU_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
+static inline void \
+__percpu_##name##_case_##sz(void *ptr, unsigned long val) \
+{ \
+ unsigned int loop; \
+ u##sz tmp; \
+ \
+ asm volatile (ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ "1: ldxr" #sfx "\t%" #w "[tmp], %[ptr]\n" \
+ #op_llsc "\t%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \
+ " stxr" #sfx "\t%w[loop], %" #w "[tmp], %[ptr]\n" \
+ " cbnz %w[loop], 1b", \
+ /* LSE atomics */ \
+ #op_lse "\t%" #w "[val], %[ptr]\n" \
+ __nops(3)) \
+ : [loop] "=&r" (loop), [tmp] "=&r" (tmp), \
+ [ptr] "+Q"(*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val))); \
}
-static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
- int size)
-{
- unsigned long ret, loop;
-
- switch (size) {
- case 1:
- asm ("//__percpu_xchg_1\n"
- "1: ldxrb %w[ret], %[ptr]\n"
- " stxrb %w[loop], %w[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u8 *)ptr)
- : [val] "r" (val));
- break;
- case 2:
- asm ("//__percpu_xchg_2\n"
- "1: ldxrh %w[ret], %[ptr]\n"
- " stxrh %w[loop], %w[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u16 *)ptr)
- : [val] "r" (val));
- break;
- case 4:
- asm ("//__percpu_xchg_4\n"
- "1: ldxr %w[ret], %[ptr]\n"
- " stxr %w[loop], %w[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u32 *)ptr)
- : [val] "r" (val));
- break;
- case 8:
- asm ("//__percpu_xchg_8\n"
- "1: ldxr %[ret], %[ptr]\n"
- " stxr %w[loop], %[val], %[ptr]\n"
- " cbnz %w[loop], 1b"
- : [loop] "=&r"(loop), [ret] "=&r"(ret),
- [ptr] "+Q"(*(u64 *)ptr)
- : [val] "r" (val));
- break;
- default:
- ret = 0;
- BUILD_BUG();
- }
-
- return ret;
+#define __PERCPU_RET_OP_CASE(w, sfx, name, sz, op_llsc, op_lse) \
+static inline u##sz \
+__percpu_##name##_return_case_##sz(void *ptr, unsigned long val) \
+{ \
+ unsigned int loop; \
+ u##sz ret; \
+ \
+ asm volatile (ARM64_LSE_ATOMIC_INSN( \
+ /* LL/SC */ \
+ "1: ldxr" #sfx "\t%" #w "[ret], %[ptr]\n" \
+ #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
+ " stxr" #sfx "\t%w[loop], %" #w "[ret], %[ptr]\n" \
+ " cbnz %w[loop], 1b", \
+ /* LSE atomics */ \
+ #op_lse "\t%" #w "[ret], %" #w "[val], %[ptr]\n" \
+ #op_llsc "\t%" #w "[ret], %" #w "[ret], %" #w "[val]\n" \
+ __nops(2)) \
+ : [loop] "=&r" (loop), [ret] "=&r" (ret), \
+ [ptr] "+Q"(*(u##sz *)ptr) \
+ : [val] "r" ((u##sz)(val))); \
+ \
+ return ret; \
}
-/* this_cpu_cmpxchg */
-#define _protect_cmpxchg_local(pcp, o, n) \
-({ \
- typeof(*raw_cpu_ptr(&(pcp))) __ret; \
- preempt_disable(); \
- __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \
- preempt_enable(); \
- __ret; \
-})
-
-#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define PERCPU_OP(name, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
+ __PERCPU_OP_CASE(w, , name, 32, op_llsc, op_lse) \
+ __PERCPU_OP_CASE( , , name, 64, op_llsc, op_lse)
+
+#define PERCPU_RET_OP(name, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, b, name, 8, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, h, name, 16, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE(w, , name, 32, op_llsc, op_lse) \
+ __PERCPU_RET_OP_CASE( , , name, 64, op_llsc, op_lse)
+
+PERCPU_RW_OPS(8)
+PERCPU_RW_OPS(16)
+PERCPU_RW_OPS(32)
+PERCPU_RW_OPS(64)
+PERCPU_OP(add, add, stadd)
+PERCPU_OP(andnot, bic, stclr)
+PERCPU_OP(or, orr, stset)
+PERCPU_RET_OP(add, add, ldadd)
+
+#undef PERCPU_RW_OPS
+#undef __PERCPU_OP_CASE
+#undef __PERCPU_RET_OP_CASE
+#undef PERCPU_OP
+#undef PERCPU_RET_OP
+/*
+ * It would be nice to avoid the conditional call into the scheduler when
+ * re-enabling preemption for preemptible kernels, but doing that in a way
+ * which builds inside a module would mean messing directly with the preempt
+ * count. If you do this, peterz and tglx will hunt you down.
+ */
#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
({ \
int __ret; \
- preempt_disable(); \
+ preempt_disable_notrace(); \
__ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
raw_cpu_ptr(&(ptr2)), \
o1, o2, n1, n2); \
- preempt_enable(); \
+ preempt_enable_notrace(); \
__ret; \
})
-#define _percpu_read(pcp) \
+#define _pcp_protect(op, pcp, ...) \
({ \
- typeof(pcp) __retval; \
preempt_disable_notrace(); \
- __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \
- sizeof(pcp)); \
+ op(raw_cpu_ptr(&(pcp)), __VA_ARGS__); \
preempt_enable_notrace(); \
- __retval; \
})
-#define _percpu_write(pcp, val) \
-do { \
+#define _pcp_protect_return(op, pcp, args...) \
+({ \
+ typeof(pcp) __retval; \
preempt_disable_notrace(); \
- __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \
- sizeof(pcp)); \
+ __retval = (typeof(pcp))op(raw_cpu_ptr(&(pcp)), ##args); \
preempt_enable_notrace(); \
-} while(0) \
-
-#define _pcp_protect(operation, pcp, val) \
-({ \
- typeof(pcp) __retval; \
- preempt_disable(); \
- __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \
- (val), sizeof(pcp)); \
- preempt_enable(); \
- __retval; \
+ __retval; \
})
-#define _percpu_add(pcp, val) \
- _pcp_protect(__percpu_add, pcp, val)
-
-#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
-
-#define _percpu_and(pcp, val) \
- _pcp_protect(__percpu_and, pcp, val)
-
-#define _percpu_or(pcp, val) \
- _pcp_protect(__percpu_or, pcp, val)
-
-#define _percpu_xchg(pcp, val) (typeof(pcp)) \
- _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
-
-#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_4(pcp, val) _percpu_add(pcp, val)
-#define this_cpu_add_8(pcp, val) _percpu_add(pcp, val)
-
-#define this_cpu_add_return_1(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_2(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_4(pcp, val) _percpu_add_return(pcp, val)
-#define this_cpu_add_return_8(pcp, val) _percpu_add_return(pcp, val)
-
-#define this_cpu_and_1(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_2(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_4(pcp, val) _percpu_and(pcp, val)
-#define this_cpu_and_8(pcp, val) _percpu_and(pcp, val)
-
-#define this_cpu_or_1(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_2(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_4(pcp, val) _percpu_or(pcp, val)
-#define this_cpu_or_8(pcp, val) _percpu_or(pcp, val)
-
-#define this_cpu_read_1(pcp) _percpu_read(pcp)
-#define this_cpu_read_2(pcp) _percpu_read(pcp)
-#define this_cpu_read_4(pcp) _percpu_read(pcp)
-#define this_cpu_read_8(pcp) _percpu_read(pcp)
-
-#define this_cpu_write_1(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_2(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_4(pcp, val) _percpu_write(pcp, val)
-#define this_cpu_write_8(pcp, val) _percpu_write(pcp, val)
-
-#define this_cpu_xchg_1(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_2(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_4(pcp, val) _percpu_xchg(pcp, val)
-#define this_cpu_xchg_8(pcp, val) _percpu_xchg(pcp, val)
+#define this_cpu_read_1(pcp) \
+ _pcp_protect_return(__percpu_read_8, pcp)
+#define this_cpu_read_2(pcp) \
+ _pcp_protect_return(__percpu_read_16, pcp)
+#define this_cpu_read_4(pcp) \
+ _pcp_protect_return(__percpu_read_32, pcp)
+#define this_cpu_read_8(pcp) \
+ _pcp_protect_return(__percpu_read_64, pcp)
+
+#define this_cpu_write_1(pcp, val) \
+ _pcp_protect(__percpu_write_8, pcp, (unsigned long)val)
+#define this_cpu_write_2(pcp, val) \
+ _pcp_protect(__percpu_write_16, pcp, (unsigned long)val)
+#define this_cpu_write_4(pcp, val) \
+ _pcp_protect(__percpu_write_32, pcp, (unsigned long)val)
+#define this_cpu_write_8(pcp, val) \
+ _pcp_protect(__percpu_write_64, pcp, (unsigned long)val)
+
+#define this_cpu_add_1(pcp, val) \
+ _pcp_protect(__percpu_add_case_8, pcp, val)
+#define this_cpu_add_2(pcp, val) \
+ _pcp_protect(__percpu_add_case_16, pcp, val)
+#define this_cpu_add_4(pcp, val) \
+ _pcp_protect(__percpu_add_case_32, pcp, val)
+#define this_cpu_add_8(pcp, val) \
+ _pcp_protect(__percpu_add_case_64, pcp, val)
+
+#define this_cpu_add_return_1(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_8, pcp, val)
+#define this_cpu_add_return_2(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_16, pcp, val)
+#define this_cpu_add_return_4(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_32, pcp, val)
+#define this_cpu_add_return_8(pcp, val) \
+ _pcp_protect_return(__percpu_add_return_case_64, pcp, val)
+
+#define this_cpu_and_1(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_8, pcp, ~val)
+#define this_cpu_and_2(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_16, pcp, ~val)
+#define this_cpu_and_4(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_32, pcp, ~val)
+#define this_cpu_and_8(pcp, val) \
+ _pcp_protect(__percpu_andnot_case_64, pcp, ~val)
+
+#define this_cpu_or_1(pcp, val) \
+ _pcp_protect(__percpu_or_case_8, pcp, val)
+#define this_cpu_or_2(pcp, val) \
+ _pcp_protect(__percpu_or_case_16, pcp, val)
+#define this_cpu_or_4(pcp, val) \
+ _pcp_protect(__percpu_or_case_32, pcp, val)
+#define this_cpu_or_8(pcp, val) \
+ _pcp_protect(__percpu_or_case_64, pcp, val)
+
+#define this_cpu_xchg_1(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_2(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_4(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+#define this_cpu_xchg_8(pcp, val) \
+ _pcp_protect_return(xchg_relaxed, pcp, val)
+
+#define this_cpu_cmpxchg_1(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_2(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_4(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
+#define this_cpu_cmpxchg_8(pcp, o, n) \
+ _pcp_protect_return(cmpxchg_relaxed, pcp, o, n)
#include <asm-generic/percpu.h>
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index a7d5d6e459eb..2a700f7b12d2 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -80,7 +80,11 @@
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS)
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+#define PTRS_PER_PGD (1 << (52 - PGDIR_SHIFT))
+#else
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT))
+#endif
/*
* Section address mask and size definitions.
@@ -310,4 +314,10 @@
#define TTBR_BADDR_MASK_52 (((UL(1) << 46) - 1) << 2)
#endif
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+/* Must be at least 64-byte aligned to prevent corruption of the TTBR */
+#define TTBR1_BADDR_4852_OFFSET (((UL(1) << (52 - PGDIR_SHIFT)) - \
+ (UL(1) << (48 - PGDIR_SHIFT))) * 8)
+#endif
+
#endif
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 50b1ef8584c0..5bbb59c81920 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -22,6 +22,7 @@
#include <asm/memory.h>
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable-prot.h>
+#include <asm/tlbflush.h>
/*
* VMALLOC range.
@@ -685,6 +686,27 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
return __ptep_test_and_clear_young(ptep);
}
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
+ unsigned long address, pte_t *ptep)
+{
+ int young = ptep_test_and_clear_young(vma, address, ptep);
+
+ if (young) {
+ /*
+ * We can elide the trailing DSB here since the worst that can
+ * happen is that a CPU continues to use the young entry in its
+ * TLB and we mistakenly reclaim the associated page. The
+ * window for such an event is bounded by the next
+ * context-switch, which provides a DSB to complete the TLB
+ * invalidation.
+ */
+ flush_tlb_page_nosync(vma, address);
+ }
+
+ return young;
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h
new file mode 100644
index 000000000000..d49951647014
--- /dev/null
+++ b/arch/arm64/include/asm/preempt.h
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_PREEMPT_H
+#define __ASM_PREEMPT_H
+
+#include <linux/thread_info.h>
+
+#define PREEMPT_NEED_RESCHED BIT(32)
+#define PREEMPT_ENABLED (PREEMPT_NEED_RESCHED)
+
+static inline int preempt_count(void)
+{
+ return READ_ONCE(current_thread_info()->preempt.count);
+}
+
+static inline void preempt_count_set(u64 pc)
+{
+ /* Preserve existing value of PREEMPT_NEED_RESCHED */
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+#define init_task_preempt_count(p) do { \
+ task_thread_info(p)->preempt_count = FORK_PREEMPT_COUNT; \
+} while (0)
+
+#define init_idle_preempt_count(p, cpu) do { \
+ task_thread_info(p)->preempt_count = PREEMPT_ENABLED; \
+} while (0)
+
+static inline void set_preempt_need_resched(void)
+{
+ current_thread_info()->preempt.need_resched = 0;
+}
+
+static inline void clear_preempt_need_resched(void)
+{
+ current_thread_info()->preempt.need_resched = 1;
+}
+
+static inline bool test_preempt_need_resched(void)
+{
+ return !current_thread_info()->preempt.need_resched;
+}
+
+static inline void __preempt_count_add(int val)
+{
+ u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+ pc += val;
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline void __preempt_count_sub(int val)
+{
+ u32 pc = READ_ONCE(current_thread_info()->preempt.count);
+ pc -= val;
+ WRITE_ONCE(current_thread_info()->preempt.count, pc);
+}
+
+static inline bool __preempt_count_dec_and_test(void)
+{
+ struct thread_info *ti = current_thread_info();
+ u64 pc = READ_ONCE(ti->preempt_count);
+
+ /* Update only the count field, leaving need_resched unchanged */
+ WRITE_ONCE(ti->preempt.count, --pc);
+
+ /*
+ * If we wrote back all zeroes, then we're preemptible and in
+ * need of a reschedule. Otherwise, we need to reload the
+ * preempt_count in case the need_resched flag was cleared by an
+ * interrupt occurring between the non-atomic READ_ONCE/WRITE_ONCE
+ * pair.
+ */
+ return !pc || !READ_ONCE(ti->preempt_count);
+}
+
+static inline bool should_resched(int preempt_offset)
+{
+ u64 pc = READ_ONCE(current_thread_info()->preempt_count);
+ return pc == preempt_offset;
+}
+
+#ifdef CONFIG_PREEMPT
+void preempt_schedule(void);
+#define __preempt_schedule() preempt_schedule()
+void preempt_schedule_notrace(void);
+#define __preempt_schedule_notrace() preempt_schedule_notrace()
+#endif /* CONFIG_PREEMPT */
+
+#endif /* __ASM_PREEMPT_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 6b0d4dff5012..538ecbc15067 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -19,10 +19,12 @@
#ifndef __ASM_PROCESSOR_H
#define __ASM_PROCESSOR_H
-#define TASK_SIZE_64 (UL(1) << VA_BITS)
-
-#define KERNEL_DS UL(-1)
-#define USER_DS (TASK_SIZE_64 - 1)
+#define KERNEL_DS UL(-1)
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+#define USER_DS ((UL(1) << 52) - 1)
+#else
+#define USER_DS ((UL(1) << VA_BITS) - 1)
+#endif /* CONFIG_ARM64_USER_VA_BITS_52 */
/*
* On arm64 systems, unaligned accesses by the CPU are cheap, and so there is
@@ -53,19 +55,33 @@
* TASK_SIZE - the maximum size of a user space task.
* TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area.
*/
+
+#define DEFAULT_MAP_WINDOW_64 (UL(1) << VA_BITS)
+
+extern u64 vabits_user;
+#define TASK_SIZE_64 (UL(1) << vabits_user)
+
#ifdef CONFIG_COMPAT
#define TASK_SIZE_32 UL(0x100000000)
#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
+#define DEFAULT_MAP_WINDOW (test_thread_flag(TIF_32BIT) ? \
+ TASK_SIZE_32 : DEFAULT_MAP_WINDOW_64)
#else
#define TASK_SIZE TASK_SIZE_64
+#define DEFAULT_MAP_WINDOW DEFAULT_MAP_WINDOW_64
#endif /* CONFIG_COMPAT */
+#ifdef CONFIG_ARM64_FORCE_52BIT
+#define STACK_TOP_MAX TASK_SIZE_64
#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 4))
+#else
+#define STACK_TOP_MAX DEFAULT_MAP_WINDOW_64
+#define TASK_UNMAPPED_BASE (PAGE_ALIGN(DEFAULT_MAP_WINDOW / 4))
+#endif /* CONFIG_ARM64_FORCE_52BIT */
-#define STACK_TOP_MAX TASK_SIZE_64
#ifdef CONFIG_COMPAT
#define AARCH32_VECTORS_BASE 0xffff0000
#define STACK_TOP (test_thread_flag(TIF_32BIT) ? \
@@ -74,6 +90,15 @@
#define STACK_TOP STACK_TOP_MAX
#endif /* CONFIG_COMPAT */
+#ifndef CONFIG_ARM64_FORCE_52BIT
+#define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\
+ DEFAULT_MAP_WINDOW)
+
+#define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
+ base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
+ base)
+#endif /* CONFIG_ARM64_FORCE_52BIT */
+
extern phys_addr_t arm64_dma_phys_limit;
#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)
diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h
index f82b447bd34f..1895561839a9 100644
--- a/arch/arm64/include/asm/smp.h
+++ b/arch/arm64/include/asm/smp.h
@@ -17,15 +17,20 @@
#define __ASM_SMP_H
/* Values for secondary_data.status */
+#define CPU_STUCK_REASON_SHIFT (8)
+#define CPU_BOOT_STATUS_MASK ((1U << CPU_STUCK_REASON_SHIFT) - 1)
-#define CPU_MMU_OFF (-1)
-#define CPU_BOOT_SUCCESS (0)
+#define CPU_MMU_OFF (-1)
+#define CPU_BOOT_SUCCESS (0)
/* The cpu invoked ops->cpu_die, synchronise it with cpu_kill */
-#define CPU_KILL_ME (1)
+#define CPU_KILL_ME (1)
/* The cpu couldn't die gracefully and is looping in the kernel */
-#define CPU_STUCK_IN_KERNEL (2)
+#define CPU_STUCK_IN_KERNEL (2)
/* Fatal system error detected by secondary CPU, crash the system */
-#define CPU_PANIC_KERNEL (3)
+#define CPU_PANIC_KERNEL (3)
+
+#define CPU_STUCK_REASON_52_BIT_VA (1U << CPU_STUCK_REASON_SHIFT)
+#define CPU_STUCK_REASON_NO_GRAN (2U << CPU_STUCK_REASON_SHIFT)
#ifndef __ASSEMBLY__
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 0c909c4a932f..78f15858535e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -104,6 +104,11 @@
#define SET_PSTATE_UAO(x) __emit_inst(0xd500401f | PSTATE_UAO | ((!!x) << PSTATE_Imm_shift))
#define SET_PSTATE_SSBS(x) __emit_inst(0xd500401f | PSTATE_SSBS | ((!!x) << PSTATE_Imm_shift))
+#define __SYS_BARRIER_INSN(CRm, op2, Rt) \
+ __emit_inst(0xd5000000 | sys_insn(0, 3, 3, (CRm), (op2)) | ((Rt) & 0x1f))
+
+#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)
+
#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
@@ -528,6 +533,7 @@
#define ID_AA64ISAR0_AES_SHIFT 4
/* id_aa64isar1 */
+#define ID_AA64ISAR1_SB_SHIFT 36
#define ID_AA64ISAR1_LRCPC_SHIFT 20
#define ID_AA64ISAR1_FCMA_SHIFT 16
#define ID_AA64ISAR1_JSCVT_SHIFT 12
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index cb2c10a8f0a8..bbca68b54732 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -42,7 +42,18 @@ struct thread_info {
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
u64 ttbr0; /* saved TTBR0_EL1 */
#endif
- int preempt_count; /* 0 => preemptable, <0 => bug */
+ union {
+ u64 preempt_count; /* 0 => preemptible, <0 => bug */
+ struct {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ u32 need_resched;
+ u32 count;
+#else
+ u32 count;
+ u32 need_resched;
+#endif
+ } preempt;
+ };
};
#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index c3c0387aee18..bb4507a11b1b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -21,6 +21,7 @@
#ifndef __ASSEMBLY__
+#include <linux/mm_types.h>
#include <linux/sched.h>
#include <asm/cputype.h>
#include <asm/mmu.h>
@@ -164,14 +165,20 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
dsb(ish);
}
-static inline void flush_tlb_page(struct vm_area_struct *vma,
- unsigned long uaddr)
+static inline void flush_tlb_page_nosync(struct vm_area_struct *vma,
+ unsigned long uaddr)
{
unsigned long addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm));
dsb(ishst);
__tlbi(vale1is, addr);
__tlbi_user(vale1is, addr);
+}
+
+static inline void flush_tlb_page(struct vm_area_struct *vma,
+ unsigned long uaddr)
+{
+ flush_tlb_page_nosync(vma, uaddr);
dsb(ish);
}
@@ -179,7 +186,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
* necessarily a performance improvement.
*/
-#define MAX_TLBI_OPS 1024UL
+#define MAX_TLBI_OPS PTRS_PER_PTE
static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end,
@@ -188,7 +195,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
unsigned long asid = ASID(vma->vm_mm);
unsigned long addr;
- if ((end - start) > (MAX_TLBI_OPS * stride)) {
+ if ((end - start) >= (MAX_TLBI_OPS * stride)) {
flush_tlb_mm(vma->vm_mm);
return;
}
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 07c34087bd5e..fad33f5fde47 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -45,8 +45,7 @@ static inline void set_fs(mm_segment_t fs)
* Prevent a mispredicted conditional call to set_fs from forwarding
* the wrong address limit to access_ok under speculation.
*/
- dsb(nsh);
- isb();
+ spec_bar();
/* On user-mode return, check fs is correct */
set_thread_flag(TIF_FSCHECK);
diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h
new file mode 100644
index 000000000000..856386ad076c
--- /dev/null
+++ b/arch/arm64/include/asm/xor.h
@@ -0,0 +1,73 @@
+/*
+ * arch/arm64/include/asm/xor.h
+ *
+ * Authors: Jackie Liu <liuyun01@kylinos.cn>
+ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+extern struct xor_block_template const xor_block_inner_neon;
+
+static void
+xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_2(bytes, p1, p2);
+ kernel_neon_end();
+}
+
+static void
+xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_3(bytes, p1, p2, p3);
+ kernel_neon_end();
+}
+
+static void
+xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
+ kernel_neon_end();
+}
+
+static void
+xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
+ unsigned long *p3, unsigned long *p4, unsigned long *p5)
+{
+ kernel_neon_begin();
+ xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
+ kernel_neon_end();
+}
+
+static struct xor_block_template xor_block_arm64 = {
+ .name = "arm64_neon",
+ .do_2 = xor_neon_2,
+ .do_3 = xor_neon_3,
+ .do_4 = xor_neon_4,
+ .do_5 = xor_neon_5
+};
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES \
+ do { \
+ xor_speed(&xor_block_8regs); \
+ xor_speed(&xor_block_32regs); \
+ if (cpu_has_neon()) { \
+ xor_speed(&xor_block_arm64);\
+ } \
+ } while (0)
+
+#endif /* ! CONFIG_KERNEL_MODE_NEON */
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 2bcd6e4f3474..7784f7cba16c 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -49,5 +49,6 @@
#define HWCAP_ILRCPC (1 << 26)
#define HWCAP_FLAGM (1 << 27)
#define HWCAP_SSBS (1 << 28)
+#define HWCAP_SB (1 << 29)
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4c8b13bede80..e069f957ec28 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -30,7 +30,7 @@ $(obj)/%.stub.o: $(obj)/%.o FORCE
arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
sys_compat.o
arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
-arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o
+arm64-obj-$(CONFIG_MODULES) += module.o
arm64-obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o
diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c
deleted file mode 100644
index 72f63a59b008..000000000000
--- a/arch/arm64/kernel/arm64ksyms.c
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Based on arch/arm/kernel/armksyms.c
- *
- * Copyright (C) 2000 Russell King
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/export.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/cryptohash.h>
-#include <linux/delay.h>
-#include <linux/in6.h>
-#include <linux/syscalls.h>
-#include <linux/uaccess.h>
-#include <linux/io.h>
-#include <linux/arm-smccc.h>
-#include <linux/kprobes.h>
-
-#include <asm/checksum.h>
-
-EXPORT_SYMBOL(copy_page);
-EXPORT_SYMBOL(clear_page);
-
- /* user mem (segment) */
-EXPORT_SYMBOL(__arch_copy_from_user);
-EXPORT_SYMBOL(__arch_copy_to_user);
-EXPORT_SYMBOL(__arch_clear_user);
-EXPORT_SYMBOL(__arch_copy_in_user);
-
- /* physical memory */
-EXPORT_SYMBOL(memstart_addr);
-
- /* string / mem functions */
-#ifndef CONFIG_KASAN
-EXPORT_SYMBOL(strchr);
-EXPORT_SYMBOL(strrchr);
-EXPORT_SYMBOL(strcmp);
-EXPORT_SYMBOL(strncmp);
-EXPORT_SYMBOL(strlen);
-EXPORT_SYMBOL(strnlen);
-EXPORT_SYMBOL(memcmp);
-EXPORT_SYMBOL(memchr);
-#endif
-
-EXPORT_SYMBOL(memset);
-EXPORT_SYMBOL(memcpy);
-EXPORT_SYMBOL(memmove);
-EXPORT_SYMBOL(__memset);
-EXPORT_SYMBOL(__memcpy);
-EXPORT_SYMBOL(__memmove);
-
- /* atomic bitops */
-EXPORT_SYMBOL(set_bit);
-EXPORT_SYMBOL(test_and_set_bit);
-EXPORT_SYMBOL(clear_bit);
-EXPORT_SYMBOL(test_and_clear_bit);
-EXPORT_SYMBOL(change_bit);
-EXPORT_SYMBOL(test_and_change_bit);
-
-#ifdef CONFIG_FUNCTION_TRACER
-EXPORT_SYMBOL(_mcount);
-NOKPROBE_SYMBOL(_mcount);
-#endif
-
- /* arm-smccc */
-EXPORT_SYMBOL(__arm_smccc_smc);
-EXPORT_SYMBOL(__arm_smccc_hvc);
-
- /* tishift.S */
-extern long long __ashlti3(long long a, int b);
-EXPORT_SYMBOL(__ashlti3);
-extern long long __ashrti3(long long a, int b);
-EXPORT_SYMBOL(__ashrti3);
-extern long long __lshrti3(long long a, int b);
-EXPORT_SYMBOL(__lshrti3);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 476e738e6c46..e3123055e552 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -135,7 +135,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
const char *hyp_vecs_end)
{
- static DEFINE_SPINLOCK(bp_lock);
+ static DEFINE_RAW_SPINLOCK(bp_lock);
int cpu, slot = -1;
/*
@@ -147,7 +147,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
return;
}
- spin_lock(&bp_lock);
+ raw_spin_lock(&bp_lock);
for_each_possible_cpu(cpu) {
if (per_cpu(bp_hardening_data.fn, cpu) == fn) {
slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
@@ -163,7 +163,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
__this_cpu_write(bp_hardening_data.fn, fn);
- spin_unlock(&bp_lock);
+ raw_spin_unlock(&bp_lock);
}
#else
#define __smccc_workaround_1_smc_start NULL
@@ -570,24 +570,63 @@ static const struct midr_range arm64_harden_el2_vectors[] = {
#endif
-const struct arm64_cpu_capabilities arm64_errata[] = {
+#ifdef CONFIG_CAVIUM_ERRATUM_27456
+static const struct midr_range cavium_erratum_27456_cpus[] = {
+ /* Cavium ThunderX, T88 pass 1.x - 2.1 */
+ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 1),
+ /* Cavium ThunderX, T81 pass 1.0 */
+ MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
+ {},
+};
+#endif
+
+#ifdef CONFIG_CAVIUM_ERRATUM_30115
+static const struct midr_range cavium_erratum_30115_cpus[] = {
+ /* Cavium ThunderX, T88 pass 1.x - 2.2 */
+ MIDR_RANGE(MIDR_THUNDERX, 0, 0, 1, 2),
+ /* Cavium ThunderX, T81 pass 1.0 - 1.2 */
+ MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2),
+ /* Cavium ThunderX, T83 pass 1.0 */
+ MIDR_REV(MIDR_THUNDERX_83XX, 0, 0),
+ {},
+};
+#endif
+
+#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
+static const struct arm64_cpu_capabilities qcom_erratum_1003_list[] = {
+ {
+ ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0),
+ },
+ {
+ .midr_range.model = MIDR_QCOM_KRYO,
+ .matches = is_kryo_midr,
+ },
+ {},
+};
+#endif
+
+#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
+static const struct midr_range workaround_clean_cache[] = {
#if defined(CONFIG_ARM64_ERRATUM_826319) || \
defined(CONFIG_ARM64_ERRATUM_827319) || \
defined(CONFIG_ARM64_ERRATUM_824069)
- {
- /* Cortex-A53 r0p[012] */
- .desc = "ARM errata 826319, 827319, 824069",
- .capability = ARM64_WORKAROUND_CLEAN_CACHE,
- ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2),
- .cpu_enable = cpu_enable_cache_maint_trap,
- },
+ /* Cortex-A53 r0p[012]: ARM errata 826319, 827319, 824069 */
+ MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 2),
#endif
-#ifdef CONFIG_ARM64_ERRATUM_819472
+#ifdef CONFIG_ARM64_ERRATUM_819472
+ /* Cortex-A53 r0p[01] : ARM errata 819472 */
+ MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1),
+#endif
+ {},
+};
+#endif
+
+const struct arm64_cpu_capabilities arm64_errata[] = {
+#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
- /* Cortex-A53 r0p[01] */
- .desc = "ARM errata 819472",
+ .desc = "ARM errata 826319, 827319, 824069, 819472",
.capability = ARM64_WORKAROUND_CLEAN_CACHE,
- ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A53, 0, 0, 1),
+ ERRATA_MIDR_RANGE_LIST(workaround_clean_cache),
.cpu_enable = cpu_enable_cache_maint_trap,
},
#endif
@@ -638,40 +677,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_27456
{
- /* Cavium ThunderX, T88 pass 1.x - 2.1 */
- .desc = "Cavium erratum 27456",
- .capability = ARM64_WORKAROUND_CAVIUM_27456,
- ERRATA_MIDR_RANGE(MIDR_THUNDERX,
- 0, 0,
- 1, 1),
- },
- {
- /* Cavium ThunderX, T81 pass 1.0 */
.desc = "Cavium erratum 27456",
.capability = ARM64_WORKAROUND_CAVIUM_27456,
- ERRATA_MIDR_REV(MIDR_THUNDERX_81XX, 0, 0),
+ ERRATA_MIDR_RANGE_LIST(cavium_erratum_27456_cpus),
},
#endif
#ifdef CONFIG_CAVIUM_ERRATUM_30115
{
- /* Cavium ThunderX, T88 pass 1.x - 2.2 */
.desc = "Cavium erratum 30115",
.capability = ARM64_WORKAROUND_CAVIUM_30115,
- ERRATA_MIDR_RANGE(MIDR_THUNDERX,
- 0, 0,
- 1, 2),
- },
- {
- /* Cavium ThunderX, T81 pass 1.0 - 1.2 */
- .desc = "Cavium erratum 30115",
- .capability = ARM64_WORKAROUND_CAVIUM_30115,
- ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX_81XX, 0, 0, 2),
- },
- {
- /* Cavium ThunderX, T83 pass 1.0 */
- .desc = "Cavium erratum 30115",
- .capability = ARM64_WORKAROUND_CAVIUM_30115,
- ERRATA_MIDR_REV(MIDR_THUNDERX_83XX, 0, 0),
+ ERRATA_MIDR_RANGE_LIST(cavium_erratum_30115_cpus),
},
#endif
{
@@ -683,16 +698,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
},
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1003
{
- .desc = "Qualcomm Technologies Falkor erratum 1003",
+ .desc = "Qualcomm Technologies Falkor/Kryo erratum 1003",
.capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
- ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0),
- },
- {
- .desc = "Qualcomm Technologies Kryo erratum 1003",
- .capability = ARM64_WORKAROUND_QCOM_FALKOR_E1003,
- .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
- .midr_range.model = MIDR_QCOM_KRYO,
- .matches = is_kryo_midr,
+ .matches = multi_entry_cap_matches,
+ .match_list = qcom_erratum_1003_list,
},
#endif
#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index af50064dea51..e6467e64ee91 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -52,6 +52,7 @@ unsigned int compat_elf_hwcap2 __read_mostly;
DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS);
EXPORT_SYMBOL(cpu_hwcaps);
+static struct arm64_cpu_capabilities const __ro_after_init *cpu_hwcaps_ptrs[ARM64_NCAPS];
/*
* Flag to indicate if we have computed the system wide
@@ -141,6 +142,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = {
};
static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_SB_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_LRCPC_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_FCMA_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_JSCVT_SHIFT, 4, 0),
@@ -518,6 +520,29 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new)
}
extern const struct arm64_cpu_capabilities arm64_errata[];
+static const struct arm64_cpu_capabilities arm64_features[];
+
+static void __init
+init_cpu_hwcaps_indirect_list_from_array(const struct arm64_cpu_capabilities *caps)
+{
+ for (; caps->matches; caps++) {
+ if (WARN(caps->capability >= ARM64_NCAPS,
+ "Invalid capability %d\n", caps->capability))
+ continue;
+ if (WARN(cpu_hwcaps_ptrs[caps->capability],
+ "Duplicate entry for capability %d\n",
+ caps->capability))
+ continue;
+ cpu_hwcaps_ptrs[caps->capability] = caps;
+ }
+}
+
+static void __init init_cpu_hwcaps_indirect_list(void)
+{
+ init_cpu_hwcaps_indirect_list_from_array(arm64_features);
+ init_cpu_hwcaps_indirect_list_from_array(arm64_errata);
+}
+
static void __init setup_boot_cpu_capabilities(void);
void __init init_cpu_features(struct cpuinfo_arm64 *info)
@@ -564,6 +589,12 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
}
/*
+ * Initialize the indirect array of CPU hwcaps capabilities pointers
+ * before we handle the boot CPU below.
+ */
+ init_cpu_hwcaps_indirect_list();
+
+ /*
* Detect and enable early CPU capabilities based on the boot CPU,
* after we have initialised the CPU feature infrastructure.
*/
@@ -1368,6 +1399,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.cpu_enable = cpu_enable_cnp,
},
#endif
+ {
+ .desc = "Speculation barrier (SB)",
+ .capability = ARM64_HAS_SB,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR1_EL1,
+ .field_pos = ID_AA64ISAR1_SB_SHIFT,
+ .sign = FTR_UNSIGNED,
+ .min_field_value = 1,
+ },
{},
};
@@ -1409,6 +1450,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SB),
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
@@ -1482,52 +1524,46 @@ static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
cap_set_elf_hwcap(hwcaps);
}
-/*
- * Check if the current CPU has a given feature capability.
- * Should be called from non-preemptible context.
- */
-static bool __this_cpu_has_cap(const struct arm64_cpu_capabilities *cap_array,
- unsigned int cap)
+static void update_cpu_capabilities(u16 scope_mask)
{
+ int i;
const struct arm64_cpu_capabilities *caps;
- if (WARN_ON(preemptible()))
- return false;
-
- for (caps = cap_array; caps->matches; caps++)
- if (caps->capability == cap)
- return caps->matches(caps, SCOPE_LOCAL_CPU);
-
- return false;
-}
-
-static void __update_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
- u16 scope_mask, const char *info)
-{
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
- for (; caps->matches; caps++) {
- if (!(caps->type & scope_mask) ||
+ for (i = 0; i < ARM64_NCAPS; i++) {
+ caps = cpu_hwcaps_ptrs[i];
+ if (!caps || !(caps->type & scope_mask) ||
+ cpus_have_cap(caps->capability) ||
!caps->matches(caps, cpucap_default_scope(caps)))
continue;
- if (!cpus_have_cap(caps->capability) && caps->desc)
- pr_info("%s %s\n", info, caps->desc);
+ if (caps->desc)
+ pr_info("detected: %s\n", caps->desc);
cpus_set_cap(caps->capability);
}
}
-static void update_cpu_capabilities(u16 scope_mask)
+/*
+ * Enable all the available capabilities on this CPU. The capabilities
+ * with BOOT_CPU scope are handled separately and hence skipped here.
+ */
+static int cpu_enable_non_boot_scope_capabilities(void *__unused)
{
- __update_cpu_capabilities(arm64_errata, scope_mask,
- "enabling workaround for");
- __update_cpu_capabilities(arm64_features, scope_mask, "detected:");
-}
+ int i;
+ u16 non_boot_scope = SCOPE_ALL & ~SCOPE_BOOT_CPU;
-static int __enable_cpu_capability(void *arg)
-{
- const struct arm64_cpu_capabilities *cap = arg;
+ for_each_available_cap(i) {
+ const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[i];
+
+ if (WARN_ON(!cap))
+ continue;
+
+ if (!(cap->type & non_boot_scope))
+ continue;
- cap->cpu_enable(cap);
+ if (cap->cpu_enable)
+ cap->cpu_enable(cap);
+ }
return 0;
}
@@ -1535,21 +1571,29 @@ static int __enable_cpu_capability(void *arg)
* Run through the enabled capabilities and enable() it on all active
* CPUs
*/
-static void __init
-__enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
- u16 scope_mask)
+static void __init enable_cpu_capabilities(u16 scope_mask)
{
+ int i;
+ const struct arm64_cpu_capabilities *caps;
+ bool boot_scope;
+
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
- for (; caps->matches; caps++) {
- unsigned int num = caps->capability;
+ boot_scope = !!(scope_mask & SCOPE_BOOT_CPU);
- if (!(caps->type & scope_mask) || !cpus_have_cap(num))
+ for (i = 0; i < ARM64_NCAPS; i++) {
+ unsigned int num;
+
+ caps = cpu_hwcaps_ptrs[i];
+ if (!caps || !(caps->type & scope_mask))
+ continue;
+ num = caps->capability;
+ if (!cpus_have_cap(num))
continue;
/* Ensure cpus_have_const_cap(num) works */
static_branch_enable(&cpu_hwcap_keys[num]);
- if (caps->cpu_enable) {
+ if (boot_scope && caps->cpu_enable)
/*
* Capabilities with SCOPE_BOOT_CPU scope are finalised
* before any secondary CPU boots. Thus, each secondary
@@ -1558,25 +1602,19 @@ __enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps,
* the boot CPU, for which the capability must be
* enabled here. This approach avoids costly
* stop_machine() calls for this case.
- *
- * Otherwise, use stop_machine() as it schedules the
- * work allowing us to modify PSTATE, instead of
- * on_each_cpu() which uses an IPI, giving us a PSTATE
- * that disappears when we return.
*/
- if (scope_mask & SCOPE_BOOT_CPU)
- caps->cpu_enable(caps);
- else
- stop_machine(__enable_cpu_capability,
- (void *)caps, cpu_online_mask);
- }
+ caps->cpu_enable(caps);
}
-}
-static void __init enable_cpu_capabilities(u16 scope_mask)
-{
- __enable_cpu_capabilities(arm64_errata, scope_mask);
- __enable_cpu_capabilities(arm64_features, scope_mask);
+ /*
+ * For all non-boot scope capabilities, use stop_machine()
+ * as it schedules the work allowing us to modify PSTATE,
+ * instead of on_each_cpu() which uses an IPI, giving us a
+ * PSTATE that disappears when we return.
+ */
+ if (!boot_scope)
+ stop_machine(cpu_enable_non_boot_scope_capabilities,
+ NULL, cpu_online_mask);
}
/*
@@ -1586,16 +1624,17 @@ static void __init enable_cpu_capabilities(u16 scope_mask)
*
* Returns "false" on conflicts.
*/
-static bool
-__verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps,
- u16 scope_mask)
+static bool verify_local_cpu_caps(u16 scope_mask)
{
+ int i;
bool cpu_has_cap, system_has_cap;
+ const struct arm64_cpu_capabilities *caps;
scope_mask &= ARM64_CPUCAP_SCOPE_MASK;
- for (; caps->matches; caps++) {
- if (!(caps->type & scope_mask))
+ for (i = 0; i < ARM64_NCAPS; i++) {
+ caps = cpu_hwcaps_ptrs[i];
+ if (!caps || !(caps->type & scope_mask))
continue;
cpu_has_cap = caps->matches(caps, SCOPE_LOCAL_CPU);
@@ -1626,7 +1665,7 @@ __verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps,
}
}
- if (caps->matches) {
+ if (i < ARM64_NCAPS) {
pr_crit("CPU%d: Detected conflict for capability %d (%s), System: %d, CPU: %d\n",
smp_processor_id(), caps->capability,
caps->desc, system_has_cap, cpu_has_cap);
@@ -1636,12 +1675,6 @@ __verify_local_cpu_caps(const struct arm64_cpu_capabilities *caps,
return true;
}
-static bool verify_local_cpu_caps(u16 scope_mask)
-{
- return __verify_local_cpu_caps(arm64_errata, scope_mask) &&
- __verify_local_cpu_caps(arm64_features, scope_mask);
-}
-
/*
* Check for CPU features that are used in early boot
* based on the Boot CPU value.
@@ -1750,12 +1783,16 @@ static void __init mark_const_caps_ready(void)
static_branch_enable(&arm64_const_caps_ready);
}
-extern const struct arm64_cpu_capabilities arm64_errata[];
-
-bool this_cpu_has_cap(unsigned int cap)
+bool this_cpu_has_cap(unsigned int n)
{
- return (__this_cpu_has_cap(arm64_features, cap) ||
- __this_cpu_has_cap(arm64_errata, cap));
+ if (!WARN_ON(preemptible()) && n < ARM64_NCAPS) {
+ const struct arm64_cpu_capabilities *cap = cpu_hwcaps_ptrs[n];
+
+ if (cap)
+ return cap->matches(cap, SCOPE_LOCAL_CPU);
+ }
+
+ return false;
}
static void __init setup_system_capabilities(void)
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index bcc2831399cb..7cb0b08ab0a7 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -82,6 +82,7 @@ static const char *const hwcap_str[] = {
"ilrcpc",
"flagm",
"ssbs",
+ "sb",
NULL
};
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S
index 1175f5827ae1..81b8eb5c4633 100644
--- a/arch/arm64/kernel/entry-ftrace.S
+++ b/arch/arm64/kernel/entry-ftrace.S
@@ -79,7 +79,6 @@
.macro mcount_get_lr reg
ldr \reg, [x29]
ldr \reg, [\reg, #8]
- mcount_adjust_addr \reg, \reg
.endm
.macro mcount_get_lr_addr reg
@@ -121,6 +120,8 @@ skip_ftrace_call: // }
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
mcount_exit
ENDPROC(_mcount)
+EXPORT_SYMBOL(_mcount)
+NOKPROBE(_mcount)
#else /* CONFIG_DYNAMIC_FTRACE */
/*
@@ -132,6 +133,8 @@ ENDPROC(_mcount)
ENTRY(_mcount)
ret
ENDPROC(_mcount)
+EXPORT_SYMBOL(_mcount)
+NOKPROBE(_mcount)
/*
* void ftrace_caller(unsigned long return_address)
@@ -148,14 +151,12 @@ ENTRY(ftrace_caller)
mcount_get_pc0 x0 // function's pc
mcount_get_lr x1 // function's lr
- .global ftrace_call
-ftrace_call: // tracer(pc, lr);
+GLOBAL(ftrace_call) // tracer(pc, lr);
nop // This will be replaced with "bl xxx"
// where xxx can be any kind of tracer.
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- .global ftrace_graph_call
-ftrace_graph_call: // ftrace_graph_caller();
+GLOBAL(ftrace_graph_call) // ftrace_graph_caller();
nop // If enabled, this will be replaced
// "b ftrace_graph_caller"
#endif
@@ -169,24 +170,6 @@ ENTRY(ftrace_stub)
ENDPROC(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* save return value regs*/
- .macro save_return_regs
- sub sp, sp, #64
- stp x0, x1, [sp]
- stp x2, x3, [sp, #16]
- stp x4, x5, [sp, #32]
- stp x6, x7, [sp, #48]
- .endm
-
- /* restore return value regs*/
- .macro restore_return_regs
- ldp x0, x1, [sp]
- ldp x2, x3, [sp, #16]
- ldp x4, x5, [sp, #32]
- ldp x6, x7, [sp, #48]
- add sp, sp, #64
- .endm
-
/*
* void ftrace_graph_caller(void)
*
@@ -197,10 +180,10 @@ ENDPROC(ftrace_stub)
* and run return_to_handler() later on its exit.
*/
ENTRY(ftrace_graph_caller)
- mcount_get_lr_addr x0 // pointer to function's saved lr
- mcount_get_pc x1 // function's pc
+ mcount_get_pc x0 // function's pc
+ mcount_get_lr_addr x1 // pointer to function's saved lr
mcount_get_parent_fp x2 // parent's fp
- bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp)
+ bl prepare_ftrace_return // prepare_ftrace_return(pc, &lr, fp)
mcount_exit
ENDPROC(ftrace_graph_caller)
@@ -209,15 +192,27 @@ ENDPROC(ftrace_graph_caller)
* void return_to_handler(void)
*
* Run ftrace_return_to_handler() before going back to parent.
- * @fp is checked against the value passed by ftrace_graph_caller()
- * only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
+ * @fp is checked against the value passed by ftrace_graph_caller().
*/
ENTRY(return_to_handler)
- save_return_regs
+ /* save return value regs */
+ sub sp, sp, #64
+ stp x0, x1, [sp]
+ stp x2, x3, [sp, #16]
+ stp x4, x5, [sp, #32]
+ stp x6, x7, [sp, #48]
+
mov x0, x29 // parent's fp
bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp);
mov x30, x0 // restore the original return address
- restore_return_regs
+
+ /* restore return value regs */
+ ldp x0, x1, [sp]
+ ldp x2, x3, [sp, #16]
+ ldp x4, x5, [sp, #32]
+ ldp x6, x7, [sp, #48]
+ add sp, sp, #64
+
ret
END(return_to_handler)
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 039144ecbcb2..c7d050207e53 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -344,10 +344,6 @@ alternative_else_nop_endif
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
- /*
- * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization
- * when returning from IPI handler, and when returning to user-space.
- */
.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
@@ -363,6 +359,7 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
.else
eret
.endif
+ sb
.endm
.macro irq_stack_entry
@@ -1006,6 +1003,7 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
mrs x30, far_el1
.endif
eret
+ sb
.endm
.align 11
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index 50986e388d2b..b4bd46bdc4d2 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -104,7 +104,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* is added in the future, but for now, the pr_err() below
* deals with a theoretical issue only.
*/
- trampoline = get_plt_entry(addr);
+ trampoline = get_plt_entry(addr, mod->arch.ftrace_trampoline);
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
&trampoline)) {
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
@@ -211,7 +211,7 @@ int __init ftrace_dyn_arch_init(void)
*
* Note that @frame_pointer is used only for sanity check later.
*/
-void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
unsigned long frame_pointer)
{
unsigned long return_hooker = (unsigned long)&return_to_handler;
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 4471f570a295..eaa68ce6a06d 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -318,6 +318,19 @@ __create_page_tables:
adrp x0, idmap_pg_dir
adrp x3, __idmap_text_start // __pa(__idmap_text_start)
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+ mrs_s x6, SYS_ID_AA64MMFR2_EL1
+ and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+ mov x5, #52
+ cbnz x6, 1f
+#endif
+ mov x5, #VA_BITS
+1:
+ adr_l x6, vabits_user
+ str x5, [x6]
+ dmb sy
+ dc ivac, x6 // Invalidate potentially stale cache line
+
/*
* VA_BITS may be too small to allow for an ID mapping to be created
* that covers system RAM if that is located sufficiently high in the
@@ -707,6 +720,7 @@ secondary_startup:
/*
* Common entry point for secondary CPUs.
*/
+ bl __cpu_secondary_check52bitva
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
bl __enable_mmu
@@ -769,6 +783,7 @@ ENTRY(__enable_mmu)
phys_to_ttbr x1, x1
phys_to_ttbr x2, x2
msr ttbr0_el1, x2 // load TTBR0
+ offset_ttbr1 x1
msr ttbr1_el1, x1 // load TTBR1
isb
msr sctlr_el1, x0
@@ -784,9 +799,30 @@ ENTRY(__enable_mmu)
ret
ENDPROC(__enable_mmu)
+ENTRY(__cpu_secondary_check52bitva)
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+ ldr_l x0, vabits_user
+ cmp x0, #52
+ b.ne 2f
+
+ mrs_s x0, SYS_ID_AA64MMFR2_EL1
+ and x0, x0, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
+ cbnz x0, 2f
+
+ update_early_cpu_boot_status \
+ CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_52_BIT_VA, x0, x1
+1: wfe
+ wfi
+ b 1b
+
+#endif
+2: ret
+ENDPROC(__cpu_secondary_check52bitva)
+
__no_granule_support:
/* Indicate that this CPU can't boot and is stuck in the kernel */
- update_early_cpu_boot_status CPU_STUCK_IN_KERNEL, x1, x2
+ update_early_cpu_boot_status \
+ CPU_STUCK_IN_KERNEL | CPU_STUCK_REASON_NO_GRAN, x1, x2
1:
wfe
wfi
diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S
index dd14ab8c9f72..fe36d85c60bd 100644
--- a/arch/arm64/kernel/hibernate-asm.S
+++ b/arch/arm64/kernel/hibernate-asm.S
@@ -40,6 +40,7 @@
tlbi vmalle1
dsb nsh
phys_to_ttbr \tmp, \page_table
+ offset_ttbr1 \tmp
msr ttbr1_el1, \tmp
isb
.endm
diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h
index a820ed07fb80..8da289dc843a 100644
--- a/arch/arm64/kernel/image.h
+++ b/arch/arm64/kernel/image.h
@@ -76,16 +76,6 @@
__efistub_stext_offset = stext - _text;
/*
- * Prevent the symbol aliases below from being emitted into the kallsyms
- * table, by forcing them to be absolute symbols (which are conveniently
- * ignored by scripts/kallsyms) rather than section relative symbols.
- * The distinction is only relevant for partial linking, and only for symbols
- * that are defined within a section declaration (which is not the case for
- * the definitions below) so the resulting values will be identical.
- */
-#define KALLSYMS_HIDE(sym) ABSOLUTE(sym)
-
-/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
* isolate it from the kernel proper. The following symbols are legally
* accessed by the stub, so provide some aliases to make them accessible.
@@ -94,28 +84,28 @@ __efistub_stext_offset = stext - _text;
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
-__efistub_memcmp = KALLSYMS_HIDE(__pi_memcmp);
-__efistub_memchr = KALLSYMS_HIDE(__pi_memchr);
-__efistub_memcpy = KALLSYMS_HIDE(__pi_memcpy);
-__efistub_memmove = KALLSYMS_HIDE(__pi_memmove);
-__efistub_memset = KALLSYMS_HIDE(__pi_memset);
-__efistub_strlen = KALLSYMS_HIDE(__pi_strlen);
-__efistub_strnlen = KALLSYMS_HIDE(__pi_strnlen);
-__efistub_strcmp = KALLSYMS_HIDE(__pi_strcmp);
-__efistub_strncmp = KALLSYMS_HIDE(__pi_strncmp);
-__efistub_strrchr = KALLSYMS_HIDE(__pi_strrchr);
-__efistub___flush_dcache_area = KALLSYMS_HIDE(__pi___flush_dcache_area);
+__efistub_memcmp = __pi_memcmp;
+__efistub_memchr = __pi_memchr;
+__efistub_memcpy = __pi_memcpy;
+__efistub_memmove = __pi_memmove;
+__efistub_memset = __pi_memset;
+__efistub_strlen = __pi_strlen;
+__efistub_strnlen = __pi_strnlen;
+__efistub_strcmp = __pi_strcmp;
+__efistub_strncmp = __pi_strncmp;
+__efistub_strrchr = __pi_strrchr;
+__efistub___flush_dcache_area = __pi___flush_dcache_area;
#ifdef CONFIG_KASAN
-__efistub___memcpy = KALLSYMS_HIDE(__pi_memcpy);
-__efistub___memmove = KALLSYMS_HIDE(__pi_memmove);
-__efistub___memset = KALLSYMS_HIDE(__pi_memset);
+__efistub___memcpy = __pi_memcpy;
+__efistub___memmove = __pi_memmove;
+__efistub___memset = __pi_memset;
#endif
-__efistub__text = KALLSYMS_HIDE(_text);
-__efistub__end = KALLSYMS_HIDE(_end);
-__efistub__edata = KALLSYMS_HIDE(_edata);
-__efistub_screen_info = KALLSYMS_HIDE(screen_info);
+__efistub__text = _text;
+__efistub__end = _end;
+__efistub__edata = _edata;
+__efistub_screen_info = screen_info;
#endif
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 2b3413549734..7820a4a688fa 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -1239,6 +1239,35 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst,
return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift);
}
+u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr,
+ enum aarch64_insn_register reg,
+ enum aarch64_insn_adr_type type)
+{
+ u32 insn;
+ s32 offset;
+
+ switch (type) {
+ case AARCH64_INSN_ADR_TYPE_ADR:
+ insn = aarch64_insn_get_adr_value();
+ offset = addr - pc;
+ break;
+ case AARCH64_INSN_ADR_TYPE_ADRP:
+ insn = aarch64_insn_get_adrp_value();
+ offset = (addr - ALIGN_DOWN(pc, SZ_4K)) >> 12;
+ break;
+ default:
+ pr_err("%s: unknown adr encoding %d\n", __func__, type);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ if (offset < -SZ_1M || offset >= SZ_1M)
+ return AARCH64_BREAK_FAULT;
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RD, insn, reg);
+
+ return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_ADR, insn, offset);
+}
+
/*
* Decode the imm field of a branch, and return the byte offset as a
* signed value (so it can be used when computing a new branch
diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c
index f0690c2ca3e0..255941394941 100644
--- a/arch/arm64/kernel/module-plts.c
+++ b/arch/arm64/kernel/module-plts.c
@@ -11,31 +11,91 @@
#include <linux/module.h>
#include <linux/sort.h>
+static struct plt_entry __get_adrp_add_pair(u64 dst, u64 pc,
+ enum aarch64_insn_register reg)
+{
+ u32 adrp, add;
+
+ adrp = aarch64_insn_gen_adr(pc, dst, reg, AARCH64_INSN_ADR_TYPE_ADRP);
+ add = aarch64_insn_gen_add_sub_imm(reg, reg, dst % SZ_4K,
+ AARCH64_INSN_VARIANT_64BIT,
+ AARCH64_INSN_ADSB_ADD);
+
+ return (struct plt_entry){ cpu_to_le32(adrp), cpu_to_le32(add) };
+}
+
+struct plt_entry get_plt_entry(u64 dst, void *pc)
+{
+ struct plt_entry plt;
+ static u32 br;
+
+ if (!br)
+ br = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16,
+ AARCH64_INSN_BRANCH_NOLINK);
+
+ plt = __get_adrp_add_pair(dst, (u64)pc, AARCH64_INSN_REG_16);
+ plt.br = cpu_to_le32(br);
+
+ return plt;
+}
+
+bool plt_entries_equal(const struct plt_entry *a, const struct plt_entry *b)
+{
+ u64 p, q;
+
+ /*
+ * Check whether both entries refer to the same target:
+ * do the cheapest checks first.
+ * If the 'add' or 'br' opcodes are different, then the target
+ * cannot be the same.
+ */
+ if (a->add != b->add || a->br != b->br)
+ return false;
+
+ p = ALIGN_DOWN((u64)a, SZ_4K);
+ q = ALIGN_DOWN((u64)b, SZ_4K);
+
+ /*
+ * If the 'adrp' opcodes are the same then we just need to check
+ * that they refer to the same 4k region.
+ */
+ if (a->adrp == b->adrp && p == q)
+ return true;
+
+ return (p + aarch64_insn_adrp_get_offset(le32_to_cpu(a->adrp))) ==
+ (q + aarch64_insn_adrp_get_offset(le32_to_cpu(b->adrp)));
+}
+
static bool in_init(const struct module *mod, void *loc)
{
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
}
-u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
+u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, const Elf64_Rela *rela,
Elf64_Sym *sym)
{
struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
&mod->arch.init;
- struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+ struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
int i = pltsec->plt_num_entries;
+ int j = i - 1;
u64 val = sym->st_value + rela->r_addend;
- plt[i] = get_plt_entry(val);
+ if (is_forbidden_offset_for_adrp(&plt[i].adrp))
+ i++;
+
+ plt[i] = get_plt_entry(val, &plt[i]);
/*
* Check if the entry we just created is a duplicate. Given that the
* relocations are sorted, this will be the last entry we allocated.
* (if one exists).
*/
- if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
- return (u64)&plt[i - 1];
+ if (j >= 0 && plt_entries_equal(plt + i, plt + j))
+ return (u64)&plt[j];
- pltsec->plt_num_entries++;
+ pltsec->plt_num_entries += i - j;
if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
return 0;
@@ -43,41 +103,31 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
}
#ifdef CONFIG_ARM64_ERRATUM_843419
-u64 module_emit_veneer_for_adrp(struct module *mod, void *loc, u64 val)
+u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+ void *loc, u64 val)
{
struct mod_plt_sec *pltsec = !in_init(mod, loc) ? &mod->arch.core :
&mod->arch.init;
- struct plt_entry *plt = (struct plt_entry *)pltsec->plt->sh_addr;
+ struct plt_entry *plt = (struct plt_entry *)sechdrs[pltsec->plt_shndx].sh_addr;
int i = pltsec->plt_num_entries++;
- u32 mov0, mov1, mov2, br;
+ u32 br;
int rd;
if (WARN_ON(pltsec->plt_num_entries > pltsec->plt_max_entries))
return 0;
+ if (is_forbidden_offset_for_adrp(&plt[i].adrp))
+ i = pltsec->plt_num_entries++;
+
/* get the destination register of the ADRP instruction */
rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD,
le32_to_cpup((__le32 *)loc));
- /* generate the veneer instructions */
- mov0 = aarch64_insn_gen_movewide(rd, (u16)~val, 0,
- AARCH64_INSN_VARIANT_64BIT,
- AARCH64_INSN_MOVEWIDE_INVERSE);
- mov1 = aarch64_insn_gen_movewide(rd, (u16)(val >> 16), 16,
- AARCH64_INSN_VARIANT_64BIT,
- AARCH64_INSN_MOVEWIDE_KEEP);
- mov2 = aarch64_insn_gen_movewide(rd, (u16)(val >> 32), 32,
- AARCH64_INSN_VARIANT_64BIT,
- AARCH64_INSN_MOVEWIDE_KEEP);
br = aarch64_insn_gen_branch_imm((u64)&plt[i].br, (u64)loc + 4,
AARCH64_INSN_BRANCH_NOLINK);
- plt[i] = (struct plt_entry){
- cpu_to_le32(mov0),
- cpu_to_le32(mov1),
- cpu_to_le32(mov2),
- cpu_to_le32(br)
- };
+ plt[i] = __get_adrp_add_pair(val, (u64)&plt[i], rd);
+ plt[i].br = cpu_to_le32(br);
return (u64)&plt[i];
}
@@ -193,6 +243,15 @@ static unsigned int count_plts(Elf64_Sym *syms, Elf64_Rela *rela, int num,
break;
}
}
+
+ if (IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) &&
+ cpus_have_const_cap(ARM64_WORKAROUND_843419))
+ /*
+ * Add some slack so we can skip PLT slots that may trigger
+ * the erratum due to the placement of the ADRP instruction.
+ */
+ ret += DIV_ROUND_UP(ret, (SZ_4K / sizeof(struct plt_entry)));
+
return ret;
}
@@ -202,7 +261,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
unsigned long core_plts = 0;
unsigned long init_plts = 0;
Elf64_Sym *syms = NULL;
- Elf_Shdr *tramp = NULL;
+ Elf_Shdr *pltsec, *tramp = NULL;
int i;
/*
@@ -211,9 +270,9 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
*/
for (i = 0; i < ehdr->e_shnum; i++) {
if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
- mod->arch.core.plt = sechdrs + i;
+ mod->arch.core.plt_shndx = i;
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
- mod->arch.init.plt = sechdrs + i;
+ mod->arch.init.plt_shndx = i;
else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
!strcmp(secstrings + sechdrs[i].sh_name,
".text.ftrace_trampoline"))
@@ -222,7 +281,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
}
- if (!mod->arch.core.plt || !mod->arch.init.plt) {
+ if (!mod->arch.core.plt_shndx || !mod->arch.init.plt_shndx) {
pr_err("%s: module PLT section(s) missing\n", mod->name);
return -ENOEXEC;
}
@@ -254,17 +313,19 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
sechdrs[i].sh_info, dstsec);
}
- mod->arch.core.plt->sh_type = SHT_NOBITS;
- mod->arch.core.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
- mod->arch.core.plt->sh_addralign = L1_CACHE_BYTES;
- mod->arch.core.plt->sh_size = (core_plts + 1) * sizeof(struct plt_entry);
+ pltsec = sechdrs + mod->arch.core.plt_shndx;
+ pltsec->sh_type = SHT_NOBITS;
+ pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ pltsec->sh_addralign = L1_CACHE_BYTES;
+ pltsec->sh_size = (core_plts + 1) * sizeof(struct plt_entry);
mod->arch.core.plt_num_entries = 0;
mod->arch.core.plt_max_entries = core_plts;
- mod->arch.init.plt->sh_type = SHT_NOBITS;
- mod->arch.init.plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
- mod->arch.init.plt->sh_addralign = L1_CACHE_BYTES;
- mod->arch.init.plt->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
+ pltsec = sechdrs + mod->arch.init.plt_shndx;
+ pltsec->sh_type = SHT_NOBITS;
+ pltsec->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ pltsec->sh_addralign = L1_CACHE_BYTES;
+ pltsec->sh_size = (init_plts + 1) * sizeof(struct plt_entry);
mod->arch.init.plt_num_entries = 0;
mod->arch.init.plt_max_entries = init_plts;
diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c
index f0f27aeefb73..f713e2fc4d75 100644
--- a/arch/arm64/kernel/module.c
+++ b/arch/arm64/kernel/module.c
@@ -198,13 +198,12 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val,
return 0;
}
-static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val)
+static int reloc_insn_adrp(struct module *mod, Elf64_Shdr *sechdrs,
+ __le32 *place, u64 val)
{
u32 insn;
- if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_843419) ||
- !cpus_have_const_cap(ARM64_WORKAROUND_843419) ||
- ((u64)place & 0xfff) < 0xff8)
+ if (!is_forbidden_offset_for_adrp(place))
return reloc_insn_imm(RELOC_OP_PAGE, place, val, 12, 21,
AARCH64_INSN_IMM_ADR);
@@ -215,7 +214,7 @@ static int reloc_insn_adrp(struct module *mod, __le32 *place, u64 val)
insn &= ~BIT(31);
} else {
/* out of range for ADR -> emit a veneer */
- val = module_emit_veneer_for_adrp(mod, place, val & ~0xfff);
+ val = module_emit_veneer_for_adrp(mod, sechdrs, place, val & ~0xfff);
if (!val)
return -ENOEXEC;
insn = aarch64_insn_gen_branch_imm((u64)place, val,
@@ -368,7 +367,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_AARCH64_ADR_PREL_PG_HI21_NC:
overflow_check = false;
case R_AARCH64_ADR_PREL_PG_HI21:
- ovf = reloc_insn_adrp(me, loc, val);
+ ovf = reloc_insn_adrp(me, sechdrs, loc, val);
if (ovf && ovf != -ERANGE)
return ovf;
break;
@@ -413,7 +412,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) &&
ovf == -ERANGE) {
- val = module_emit_plt_entry(me, loc, &rel[i], sym);
+ val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym);
if (!val)
return -ENOEXEC;
ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2,
diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S
index 62522342e1e4..184332286a81 100644
--- a/arch/arm64/kernel/smccc-call.S
+++ b/arch/arm64/kernel/smccc-call.S
@@ -13,7 +13,9 @@
*/
#include <linux/linkage.h>
#include <linux/arm-smccc.h>
+
#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
.macro SMCCC instr
.cfi_startproc
@@ -40,6 +42,7 @@
ENTRY(__arm_smccc_smc)
SMCCC smc
ENDPROC(__arm_smccc_smc)
+EXPORT_SYMBOL(__arm_smccc_smc)
/*
* void arm_smccc_hvc(unsigned long a0, unsigned long a1, unsigned long a2,
@@ -50,3 +53,4 @@ ENDPROC(__arm_smccc_smc)
ENTRY(__arm_smccc_hvc)
SMCCC hvc
ENDPROC(__arm_smccc_hvc)
+EXPORT_SYMBOL(__arm_smccc_hvc)
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 96b8f2f51ab2..1598d6f7200a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -141,6 +141,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
}
} else {
pr_err("CPU%u: failed to boot: %d\n", cpu, ret);
+ return ret;
}
secondary_data.task = NULL;
@@ -151,7 +152,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
if (status == CPU_MMU_OFF)
status = READ_ONCE(__early_cpu_boot_status);
- switch (status) {
+ switch (status & CPU_BOOT_STATUS_MASK) {
default:
pr_err("CPU%u: failed in unknown state : 0x%lx\n",
cpu, status);
@@ -165,6 +166,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
pr_crit("CPU%u: may not have shut down cleanly\n", cpu);
case CPU_STUCK_IN_KERNEL:
pr_crit("CPU%u: is stuck in kernel\n", cpu);
+ if (status & CPU_STUCK_REASON_52_BIT_VA)
+ pr_crit("CPU%u: does not support 52-bit VAs\n", cpu);
+ if (status & CPU_STUCK_REASON_NO_GRAN)
+ pr_crit("CPU%u: does not support %luK granule \n", cpu, PAGE_SIZE / SZ_1K);
cpus_stuck_in_kernel++;
break;
case CPU_PANIC_KERNEL:
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 03b00007553d..7fa008374907 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -99,7 +99,8 @@ SECTIONS
*(.discard)
*(.discard.*)
*(.interp .dynamic)
- *(.dynsym .dynstr .hash)
+ *(.dynsym .dynstr .hash .gnu.hash)
+ *(.eh_frame)
}
. = KIMAGE_VADDR + TEXT_OFFSET;
@@ -192,12 +193,12 @@ SECTIONS
PERCPU_SECTION(L1_CACHE_BYTES)
- .rela : ALIGN(8) {
+ .rela.dyn : ALIGN(8) {
*(.rela .rela*)
}
- __rela_offset = ABSOLUTE(ADDR(.rela) - KIMAGE_VADDR);
- __rela_size = SIZEOF(.rela);
+ __rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
+ __rela_size = SIZEOF(.rela.dyn);
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index fad1e164fe48..675fdc186e3b 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -83,6 +83,7 @@ ENTRY(__guest_enter)
// Do not touch any register after this!
eret
+ sb
ENDPROC(__guest_enter)
ENTRY(__guest_exit)
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index b1f14f736962..73c1b483ec39 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -96,6 +96,7 @@ el1_sync: // Guest trapped into EL2
do_el2_call
eret
+ sb
el1_hvc_guest:
/*
@@ -146,6 +147,7 @@ wa_epilogue:
mov x0, xzr
add sp, sp, #16
eret
+ sb
el1_trap:
get_vcpu_ptr x1, x0
@@ -199,6 +201,7 @@ el2_error:
b.ne __hyp_panic
mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT)
eret
+ sb
ENTRY(__hyp_do_panic)
mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
@@ -207,6 +210,7 @@ ENTRY(__hyp_do_panic)
ldr lr, =panic
msr elr_el2, lr
eret
+ sb
ENDPROC(__hyp_do_panic)
ENTRY(__hyp_panic)
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 69ff9887f724..5540a1638baf 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -5,6 +5,12 @@ lib-y := clear_user.o delay.o copy_from_user.o \
memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \
strchr.o strrchr.o tishift.o
+ifeq ($(CONFIG_KERNEL_MODE_NEON), y)
+obj-$(CONFIG_XOR_BLOCKS) += xor-neon.o
+CFLAGS_REMOVE_xor-neon.o += -mgeneral-regs-only
+CFLAGS_xor-neon.o += -ffreestanding
+endif
+
# Tell the compiler to treat all general purpose registers (with the
# exception of the IP registers, which are already handled by the caller
# in case of a PLT) as callee-saved, which allows for efficient runtime
diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S
index ef08e905e35b..6d13b0d64ad5 100644
--- a/arch/arm64/lib/clear_page.S
+++ b/arch/arm64/lib/clear_page.S
@@ -37,3 +37,4 @@ ENTRY(clear_page)
b.ne 1b
ret
ENDPROC(clear_page)
+EXPORT_SYMBOL(clear_page)
diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S
index 21ba0b29621b..feb225bd4b80 100644
--- a/arch/arm64/lib/clear_user.S
+++ b/arch/arm64/lib/clear_user.S
@@ -18,6 +18,7 @@
#include <linux/linkage.h>
#include <asm/asm-uaccess.h>
+#include <asm/assembler.h>
.text
@@ -53,6 +54,7 @@ uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
uaccess_disable_not_uao x2, x3
ret
ENDPROC(__arch_clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
.section .fixup,"ax"
.align 2
diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S
index 20305d485046..dea6c762d52f 100644
--- a/arch/arm64/lib/copy_from_user.S
+++ b/arch/arm64/lib/copy_from_user.S
@@ -16,8 +16,9 @@
#include <linux/linkage.h>
-#include <asm/cache.h>
#include <asm/asm-uaccess.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Copy from user space to a kernel buffer (alignment handled by the hardware)
@@ -71,6 +72,7 @@ ENTRY(__arch_copy_from_user)
mov x0, #0 // Nothing to copy
ret
ENDPROC(__arch_copy_from_user)
+EXPORT_SYMBOL(__arch_copy_from_user)
.section .fixup,"ax"
.align 2
diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S
index 54b75deb1d16..a84227fbf716 100644
--- a/arch/arm64/lib/copy_in_user.S
+++ b/arch/arm64/lib/copy_in_user.S
@@ -18,8 +18,9 @@
#include <linux/linkage.h>
-#include <asm/cache.h>
#include <asm/asm-uaccess.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Copy from user space to user space (alignment handled by the hardware)
@@ -73,6 +74,7 @@ ENTRY(__arch_copy_in_user)
mov x0, #0
ret
ENDPROC(__arch_copy_in_user)
+EXPORT_SYMBOL(__arch_copy_in_user)
.section .fixup,"ax"
.align 2
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index 076c43715e64..98313e24a987 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -87,3 +87,4 @@ alternative_else_nop_endif
ret
ENDPROC(copy_page)
+EXPORT_SYMBOL(copy_page)
diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S
index fda6172d6b88..ef44c7ca3ffb 100644
--- a/arch/arm64/lib/copy_to_user.S
+++ b/arch/arm64/lib/copy_to_user.S
@@ -16,8 +16,9 @@
#include <linux/linkage.h>
-#include <asm/cache.h>
#include <asm/asm-uaccess.h>
+#include <asm/assembler.h>
+#include <asm/cache.h>
/*
* Copy to user space from a kernel buffer (alignment handled by the hardware)
@@ -70,6 +71,7 @@ ENTRY(__arch_copy_to_user)
mov x0, #0
ret
ENDPROC(__arch_copy_to_user)
+EXPORT_SYMBOL(__arch_copy_to_user)
.section .fixup,"ax"
.align 2
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index 5bc1e85b4e1c..f132f2a7522e 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -15,15 +15,59 @@
.cpu generic+crc
.macro __crc32, c
-0: subs x2, x2, #16
- b.mi 8f
- ldp x3, x4, [x1], #16
+ cmp x2, #16
+ b.lt 8f // less than 16 bytes
+
+ and x7, x2, #0x1f
+ and x2, x2, #~0x1f
+ cbz x7, 32f // multiple of 32 bytes
+
+ and x8, x7, #0xf
+ ldp x3, x4, [x1]
+ add x8, x8, x1
+ add x1, x1, x7
+ ldp x5, x6, [x8]
CPU_BE( rev x3, x3 )
CPU_BE( rev x4, x4 )
+CPU_BE( rev x5, x5 )
+CPU_BE( rev x6, x6 )
+
+ tst x7, #8
+ crc32\c\()x w8, w0, x3
+ csel x3, x3, x4, eq
+ csel w0, w0, w8, eq
+ tst x7, #4
+ lsr x4, x3, #32
+ crc32\c\()w w8, w0, w3
+ csel x3, x3, x4, eq
+ csel w0, w0, w8, eq
+ tst x7, #2
+ lsr w4, w3, #16
+ crc32\c\()h w8, w0, w3
+ csel w3, w3, w4, eq
+ csel w0, w0, w8, eq
+ tst x7, #1
+ crc32\c\()b w8, w0, w3
+ csel w0, w0, w8, eq
+ tst x7, #16
+ crc32\c\()x w8, w0, x5
+ crc32\c\()x w8, w8, x6
+ csel w0, w0, w8, eq
+ cbz x2, 0f
+
+32: ldp x3, x4, [x1], #32
+ sub x2, x2, #32
+ ldp x5, x6, [x1, #-16]
+CPU_BE( rev x3, x3 )
+CPU_BE( rev x4, x4 )
+CPU_BE( rev x5, x5 )
+CPU_BE( rev x6, x6 )
crc32\c\()x w0, w0, x3
crc32\c\()x w0, w0, x4
- b.ne 0b
- ret
+ crc32\c\()x w0, w0, x5
+ crc32\c\()x w0, w0, x6
+ cbnz x2, 32b
+0: ret
8: tbz x2, #3, 4f
ldr x3, [x1], #8
diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S
index 0f164a4baf52..f146b7ecd28f 100644
--- a/arch/arm64/lib/memchr.S
+++ b/arch/arm64/lib/memchr.S
@@ -42,3 +42,4 @@ WEAK(memchr)
2: mov x0, #0
ret
ENDPIPROC(memchr)
+EXPORT_SYMBOL_NOKASAN(memchr)
diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S
index fb295f52e9f8..e2e629b09049 100644
--- a/arch/arm64/lib/memcmp.S
+++ b/arch/arm64/lib/memcmp.S
@@ -256,3 +256,4 @@ CPU_LE( rev data2, data2 )
mov result, #0
ret
ENDPIPROC(memcmp)
+EXPORT_SYMBOL_NOKASAN(memcmp)
diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S
index 67613937711f..b4f82888ed60 100644
--- a/arch/arm64/lib/memcpy.S
+++ b/arch/arm64/lib/memcpy.S
@@ -74,4 +74,6 @@ ENTRY(memcpy)
#include "copy_template.S"
ret
ENDPIPROC(memcpy)
+EXPORT_SYMBOL(memcpy)
ENDPROC(__memcpy)
+EXPORT_SYMBOL(__memcpy)
diff --git a/arch/arm64/lib/memmove.S b/arch/arm64/lib/memmove.S
index a5a4459013b1..ef12f719d99d 100644
--- a/arch/arm64/lib/memmove.S
+++ b/arch/arm64/lib/memmove.S
@@ -197,4 +197,6 @@ ENTRY(memmove)
b.ne .Ltail63
ret
ENDPIPROC(memmove)
+EXPORT_SYMBOL(memmove)
ENDPROC(__memmove)
+EXPORT_SYMBOL(__memmove)
diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S
index f2670a9f218c..a79cf118d6d0 100644
--- a/arch/arm64/lib/memset.S
+++ b/arch/arm64/lib/memset.S
@@ -216,4 +216,6 @@ ENTRY(memset)
b.ne .Ltail_maybe_long
ret
ENDPIPROC(memset)
+EXPORT_SYMBOL(memset)
ENDPROC(__memset)
+EXPORT_SYMBOL(__memset)
diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S
index 7c83091d1bcd..b179421f46c7 100644
--- a/arch/arm64/lib/strchr.S
+++ b/arch/arm64/lib/strchr.S
@@ -40,3 +40,4 @@ WEAK(strchr)
csel x0, x0, xzr, eq
ret
ENDPROC(strchr)
+EXPORT_SYMBOL_NOKASAN(strchr)
diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S
index 7d5d15398bfb..c306c7b88574 100644
--- a/arch/arm64/lib/strcmp.S
+++ b/arch/arm64/lib/strcmp.S
@@ -232,3 +232,4 @@ CPU_BE( orr syndrome, diff, has_nul )
sub result, data1, data2, lsr #56
ret
ENDPIPROC(strcmp)
+EXPORT_SYMBOL_NOKASAN(strcmp)
diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S
index 8e0b14205dcb..2a0240937416 100644
--- a/arch/arm64/lib/strlen.S
+++ b/arch/arm64/lib/strlen.S
@@ -124,3 +124,4 @@ CPU_LE( lsr tmp2, tmp2, tmp1 ) /* Shift (tmp1 & 63). */
csel data2, data2, data2a, le
b .Lrealigned
ENDPIPROC(strlen)
+EXPORT_SYMBOL_NOKASAN(strlen)
diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S
index 66bd145935d9..c5d567afb039 100644
--- a/arch/arm64/lib/strncmp.S
+++ b/arch/arm64/lib/strncmp.S
@@ -308,3 +308,4 @@ CPU_BE( orr syndrome, diff, has_nul )
mov result, #0
ret
ENDPIPROC(strncmp)
+EXPORT_SYMBOL_NOKASAN(strncmp)
diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S
index 355be04441fe..e21e536d420e 100644
--- a/arch/arm64/lib/strnlen.S
+++ b/arch/arm64/lib/strnlen.S
@@ -169,3 +169,4 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */
mov len, limit
ret
ENDPIPROC(strnlen)
+EXPORT_SYMBOL_NOKASAN(strnlen)
diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S
index ea84924d5990..47e1593016dc 100644
--- a/arch/arm64/lib/strrchr.S
+++ b/arch/arm64/lib/strrchr.S
@@ -41,3 +41,4 @@ WEAK(strrchr)
2: mov x0, x3
ret
ENDPIPROC(strrchr)
+EXPORT_SYMBOL_NOKASAN(strrchr)
diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S
index 0fdff97794de..047622536535 100644
--- a/arch/arm64/lib/tishift.S
+++ b/arch/arm64/lib/tishift.S
@@ -5,6 +5,8 @@
#include <linux/linkage.h>
+#include <asm/assembler.h>
+
ENTRY(__ashlti3)
cbz x2, 1f
mov x3, #64
@@ -25,6 +27,7 @@ ENTRY(__ashlti3)
mov x0, x2
ret
ENDPROC(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
ENTRY(__ashrti3)
cbz x2, 1f
@@ -46,6 +49,7 @@ ENTRY(__ashrti3)
mov x1, x2
ret
ENDPROC(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
ENTRY(__lshrti3)
cbz x2, 1f
@@ -67,3 +71,4 @@ ENTRY(__lshrti3)
mov x1, x2
ret
ENDPROC(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)
diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c
new file mode 100644
index 000000000000..131c60c27dff
--- /dev/null
+++ b/arch/arm64/lib/xor-neon.c
@@ -0,0 +1,184 @@
+/*
+ * arch/arm64/lib/xor-neon.c
+ *
+ * Authors: Jackie Liu <liuyun01@kylinos.cn>
+ * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/raid/xor.h>
+#include <linux/module.h>
+#include <asm/neon-intrinsics.h>
+
+void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2)
+{
+ uint64_t *dp1 = (uint64_t *)p1;
+ uint64_t *dp2 = (uint64_t *)p2;
+
+ register uint64x2_t v0, v1, v2, v3;
+ long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+ do {
+ /* p1 ^= p2 */
+ v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
+ v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
+ v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
+ v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
+
+ /* store */
+ vst1q_u64(dp1 + 0, v0);
+ vst1q_u64(dp1 + 2, v1);
+ vst1q_u64(dp1 + 4, v2);
+ vst1q_u64(dp1 + 6, v3);
+
+ dp1 += 8;
+ dp2 += 8;
+ } while (--lines > 0);
+}
+
+void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3)
+{
+ uint64_t *dp1 = (uint64_t *)p1;
+ uint64_t *dp2 = (uint64_t *)p2;
+ uint64_t *dp3 = (uint64_t *)p3;
+
+ register uint64x2_t v0, v1, v2, v3;
+ long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+ do {
+ /* p1 ^= p2 */
+ v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
+ v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
+ v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
+ v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
+
+ /* p1 ^= p3 */
+ v0 = veorq_u64(v0, vld1q_u64(dp3 + 0));
+ v1 = veorq_u64(v1, vld1q_u64(dp3 + 2));
+ v2 = veorq_u64(v2, vld1q_u64(dp3 + 4));
+ v3 = veorq_u64(v3, vld1q_u64(dp3 + 6));
+
+ /* store */
+ vst1q_u64(dp1 + 0, v0);
+ vst1q_u64(dp1 + 2, v1);
+ vst1q_u64(dp1 + 4, v2);
+ vst1q_u64(dp1 + 6, v3);
+
+ dp1 += 8;
+ dp2 += 8;
+ dp3 += 8;
+ } while (--lines > 0);
+}
+
+void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3, unsigned long *p4)
+{
+ uint64_t *dp1 = (uint64_t *)p1;
+ uint64_t *dp2 = (uint64_t *)p2;
+ uint64_t *dp3 = (uint64_t *)p3;
+ uint64_t *dp4 = (uint64_t *)p4;
+
+ register uint64x2_t v0, v1, v2, v3;
+ long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+ do {
+ /* p1 ^= p2 */
+ v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
+ v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
+ v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
+ v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
+
+ /* p1 ^= p3 */
+ v0 = veorq_u64(v0, vld1q_u64(dp3 + 0));
+ v1 = veorq_u64(v1, vld1q_u64(dp3 + 2));
+ v2 = veorq_u64(v2, vld1q_u64(dp3 + 4));
+ v3 = veorq_u64(v3, vld1q_u64(dp3 + 6));
+
+ /* p1 ^= p4 */
+ v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
+ v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
+ v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
+ v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
+
+ /* store */
+ vst1q_u64(dp1 + 0, v0);
+ vst1q_u64(dp1 + 2, v1);
+ vst1q_u64(dp1 + 4, v2);
+ vst1q_u64(dp1 + 6, v3);
+
+ dp1 += 8;
+ dp2 += 8;
+ dp3 += 8;
+ dp4 += 8;
+ } while (--lines > 0);
+}
+
+void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
+ unsigned long *p2, unsigned long *p3,
+ unsigned long *p4, unsigned long *p5)
+{
+ uint64_t *dp1 = (uint64_t *)p1;
+ uint64_t *dp2 = (uint64_t *)p2;
+ uint64_t *dp3 = (uint64_t *)p3;
+ uint64_t *dp4 = (uint64_t *)p4;
+ uint64_t *dp5 = (uint64_t *)p5;
+
+ register uint64x2_t v0, v1, v2, v3;
+ long lines = bytes / (sizeof(uint64x2_t) * 4);
+
+ do {
+ /* p1 ^= p2 */
+ v0 = veorq_u64(vld1q_u64(dp1 + 0), vld1q_u64(dp2 + 0));
+ v1 = veorq_u64(vld1q_u64(dp1 + 2), vld1q_u64(dp2 + 2));
+ v2 = veorq_u64(vld1q_u64(dp1 + 4), vld1q_u64(dp2 + 4));
+ v3 = veorq_u64(vld1q_u64(dp1 + 6), vld1q_u64(dp2 + 6));
+
+ /* p1 ^= p3 */
+ v0 = veorq_u64(v0, vld1q_u64(dp3 + 0));
+ v1 = veorq_u64(v1, vld1q_u64(dp3 + 2));
+ v2 = veorq_u64(v2, vld1q_u64(dp3 + 4));
+ v3 = veorq_u64(v3, vld1q_u64(dp3 + 6));
+
+ /* p1 ^= p4 */
+ v0 = veorq_u64(v0, vld1q_u64(dp4 + 0));
+ v1 = veorq_u64(v1, vld1q_u64(dp4 + 2));
+ v2 = veorq_u64(v2, vld1q_u64(dp4 + 4));
+ v3 = veorq_u64(v3, vld1q_u64(dp4 + 6));
+
+ /* p1 ^= p5 */
+ v0 = veorq_u64(v0, vld1q_u64(dp5 + 0));
+ v1 = veorq_u64(v1, vld1q_u64(dp5 + 2));
+ v2 = veorq_u64(v2, vld1q_u64(dp5 + 4));
+ v3 = veorq_u64(v3, vld1q_u64(dp5 + 6));
+
+ /* store */
+ vst1q_u64(dp1 + 0, v0);
+ vst1q_u64(dp1 + 2, v1);
+ vst1q_u64(dp1 + 4, v2);
+ vst1q_u64(dp1 + 6, v3);
+
+ dp1 += 8;
+ dp2 += 8;
+ dp3 += 8;
+ dp4 += 8;
+ dp5 += 8;
+ } while (--lines > 0);
+}
+
+struct xor_block_template const xor_block_inner_neon = {
+ .name = "__inner_neon__",
+ .do_2 = xor_arm64_neon_2,
+ .do_3 = xor_arm64_neon_3,
+ .do_4 = xor_arm64_neon_4,
+ .do_5 = xor_arm64_neon_5,
+};
+EXPORT_SYMBOL(xor_block_inner_neon);
+
+MODULE_AUTHOR("Jackie Liu <liuyun01@kylinos.cn>");
+MODULE_DESCRIPTION("ARMv8 XOR Extensions");
+MODULE_LICENSE("GPL");
diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S
index 0c22ede52f90..a194fd0e837f 100644
--- a/arch/arm64/mm/cache.S
+++ b/arch/arm64/mm/cache.S
@@ -212,6 +212,9 @@ ENDPROC(__dma_clean_area)
* - size - size in question
*/
ENTRY(__clean_dcache_area_pop)
+ alternative_if_not ARM64_HAS_DCPOP
+ b __clean_dcache_area_poc
+ alternative_else_nop_endif
dcache_by_line_op cvap, sy, x0, x1, x2, x3
ret
ENDPIPROC(__clean_dcache_area_pop)
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 7d9571f4ae3d..5fe6d2e40e9b 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -160,7 +160,7 @@ void show_pte(unsigned long addr)
pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
- VA_BITS, mm->pgd);
+ mm == &init_mm ? VA_BITS : (int) vabits_user, mm->pgd);
pgdp = pgd_offset(mm, addr);
pgd = READ_ONCE(*pgdp);
pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index f58ea503ad01..28cbc22d7e30 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -429,6 +429,27 @@ void huge_ptep_clear_flush(struct vm_area_struct *vma,
clear_flush(vma->vm_mm, addr, ptep, pgsize, ncontig);
}
+static void __init add_huge_page_size(unsigned long size)
+{
+ if (size_to_hstate(size))
+ return;
+
+ hugetlb_add_hstate(ilog2(size) - PAGE_SHIFT);
+}
+
+static int __init hugetlbpage_init(void)
+{
+#ifdef CONFIG_ARM64_4K_PAGES
+ add_huge_page_size(PUD_SIZE);
+#endif
+ add_huge_page_size(PMD_SIZE * CONT_PMDS);
+ add_huge_page_size(PMD_SIZE);
+ add_huge_page_size(PAGE_SIZE * CONT_PTES);
+
+ return 0;
+}
+arch_initcall(hugetlbpage_init);
+
static __init int setup_hugepagesz(char *opt)
{
unsigned long ps = memparse(opt, &opt);
@@ -440,7 +461,7 @@ static __init int setup_hugepagesz(char *opt)
case PMD_SIZE * CONT_PMDS:
case PMD_SIZE:
case PAGE_SIZE * CONT_PTES:
- hugetlb_add_hstate(ilog2(ps) - PAGE_SHIFT);
+ add_huge_page_size(ps);
return 1;
}
@@ -449,13 +470,3 @@ static __init int setup_hugepagesz(char *opt)
return 0;
}
__setup("hugepagesz=", setup_hugepagesz);
-
-#ifdef CONFIG_ARM64_64K_PAGES
-static __init int add_default_hugepagesz(void)
-{
- if (size_to_hstate(CONT_PTES * PAGE_SIZE) == NULL)
- hugetlb_add_hstate(CONT_PTE_SHIFT);
- return 0;
-}
-arch_initcall(add_default_hugepagesz);
-#endif
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 9b432d9fcada..6cde00554e9b 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -59,6 +59,8 @@
* that cannot be mistaken for a real physical address.
*/
s64 memstart_addr __ro_after_init = -1;
+EXPORT_SYMBOL(memstart_addr);
+
phys_addr_t arm64_dma_phys_limit __ro_after_init;
#ifdef CONFIG_BLK_DEV_INITRD
@@ -607,7 +609,7 @@ void __init mem_init(void)
* detected at build time already.
*/
#ifdef CONFIG_COMPAT
- BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64);
+ BUILD_BUG_ON(TASK_SIZE_32 > DEFAULT_MAP_WINDOW_64);
#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index d1d6601b385d..0d3eacc4bfbb 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -52,6 +52,7 @@
u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
+u64 vabits_user __ro_after_init;
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
@@ -451,7 +452,7 @@ static void __init map_mem(pgd_t *pgdp)
struct memblock_region *reg;
int flags = 0;
- if (debug_pagealloc_enabled())
+ if (rodata_full || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
/*
@@ -552,7 +553,19 @@ static void __init map_kernel_segment(pgd_t *pgdp, void *va_start, void *va_end,
static int __init parse_rodata(char *arg)
{
- return strtobool(arg, &rodata_enabled);
+ int ret = strtobool(arg, &rodata_enabled);
+ if (!ret) {
+ rodata_full = false;
+ return 0;
+ }
+
+ /* permit 'full' in addition to boolean options */
+ if (strcmp(arg, "full"))
+ return -EINVAL;
+
+ rodata_enabled = true;
+ rodata_full = true;
+ return 0;
}
early_param("rodata", parse_rodata);
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index a56359373d8b..6cd645edcf35 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -25,6 +25,8 @@ struct page_change_data {
pgprot_t clear_mask;
};
+bool rodata_full __ro_after_init = IS_ENABLED(CONFIG_RODATA_FULL_DEFAULT_ENABLED);
+
static int change_page_range(pte_t *ptep, pgtable_t token, unsigned long addr,
void *data)
{
@@ -64,6 +66,7 @@ static int change_memory_common(unsigned long addr, int numpages,
unsigned long size = PAGE_SIZE*numpages;
unsigned long end = start + size;
struct vm_struct *area;
+ int i;
if (!PAGE_ALIGNED(addr)) {
start &= PAGE_MASK;
@@ -93,6 +96,24 @@ static int change_memory_common(unsigned long addr, int numpages,
if (!numpages)
return 0;
+ /*
+ * If we are manipulating read-only permissions, apply the same
+ * change to the linear mapping of the pages that back this VM area.
+ */
+ if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY ||
+ pgprot_val(clear_mask) == PTE_RDONLY)) {
+ for (i = 0; i < area->nr_pages; i++) {
+ __change_memory_common((u64)page_address(area->pages[i]),
+ PAGE_SIZE, set_mask, clear_mask);
+ }
+ }
+
+ /*
+ * Get rid of potentially aliasing lazily unmapped vm areas that may
+ * have permissions set that deviate from the ones we are setting here.
+ */
+ vm_unmap_aliases();
+
return __change_memory_common(start, size, set_mask, clear_mask);
}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 2c75b0b903ae..e05b3ce1db6b 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -182,6 +182,7 @@ ENDPROC(cpu_do_switch_mm)
.macro __idmap_cpu_set_reserved_ttbr1, tmp1, tmp2
adrp \tmp1, empty_zero_page
phys_to_ttbr \tmp2, \tmp1
+ offset_ttbr1 \tmp2
msr ttbr1_el1, \tmp2
isb
tlbi vmalle1
@@ -200,6 +201,7 @@ ENTRY(idmap_cpu_replace_ttbr1)
__idmap_cpu_set_reserved_ttbr1 x1, x3
+ offset_ttbr1 x0
msr ttbr1_el1, x0
isb
@@ -254,6 +256,7 @@ ENTRY(idmap_kpti_install_ng_mappings)
pte .req x16
mrs swapper_ttb, ttbr1_el1
+ restore_ttbr1 swapper_ttb
adr flag_ptr, __idmap_kpti_flag
cbnz cpu, __idmap_kpti_secondary
@@ -373,6 +376,7 @@ __idmap_kpti_secondary:
cbnz w18, 1b
/* All done, act like nothing happened */
+ offset_ttbr1 swapper_ttb
msr ttbr1_el1, swapper_ttb
isb
ret
@@ -446,7 +450,15 @@ ENTRY(__cpu_setup)
ldr x10, =TCR_TxSZ(VA_BITS) | TCR_CACHE_FLAGS | TCR_SMP_FLAGS | \
TCR_TG_FLAGS | TCR_KASLR_FLAGS | TCR_ASID16 | \
TCR_TBI0 | TCR_A1
- tcr_set_idmap_t0sz x10, x9
+
+#ifdef CONFIG_ARM64_USER_VA_BITS_52
+ ldr_l x9, vabits_user
+ sub x9, xzr, x9
+ add x9, x9, #64
+#else
+ ldr_l x9, idmap_t0sz
+#endif
+ tcr_set_t0sz x10, x9
/*
* Set the IPS bits in TCR_EL1.
diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h
index 23a14d187fb1..b5ea9e14c017 100644
--- a/arch/s390/include/asm/preempt.h
+++ b/arch/s390/include/asm/preempt.h
@@ -8,6 +8,8 @@
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED 0x80000000
#define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED)
static inline int preempt_count(void)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 90cb2f36c042..99a7fa9ab0a3 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -8,6 +8,9 @@
DECLARE_PER_CPU(int, __preempt_count);
+/* We use the MSB mostly because its available */
+#define PREEMPT_NEED_RESCHED 0x80000000
+
/*
* We use the PREEMPT_NEED_RESCHED bit as an inverted NEED_RESCHED such
* that a decrement hitting 0 means we can and should reschedule.
diff --git a/drivers/firmware/efi/arm-runtime.c b/drivers/firmware/efi/arm-runtime.c
index a00934d263c5..23ea1ed409d1 100644
--- a/drivers/firmware/efi/arm-runtime.c
+++ b/drivers/firmware/efi/arm-runtime.c
@@ -38,7 +38,7 @@ static struct ptdump_info efi_ptdump_info = {
.mm = &efi_mm,
.markers = (struct addr_marker[]){
{ 0, "UEFI runtime start" },
- { TASK_SIZE_64, "UEFI runtime end" }
+ { DEFAULT_MAP_WINDOW_64, "UEFI runtime end" }
},
.base_addr = 0,
};
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 3d36142cf812..6640942a1c0d 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -33,7 +33,7 @@
#define EFI_RT_VIRTUAL_SIZE SZ_512M
#ifdef CONFIG_ARM64
-# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE_64
+# define EFI_RT_VIRTUAL_LIMIT DEFAULT_MAP_WINDOW_64
#else
# define EFI_RT_VIRTUAL_LIMIT TASK_SIZE
#endif
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 7c47b1a471d4..7e020782ade2 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -79,6 +79,12 @@
#define ALIGN __ALIGN
#define ALIGN_STR __ALIGN_STR
+#ifndef GLOBAL
+#define GLOBAL(name) \
+ .globl name ASM_NL \
+ name:
+#endif
+
#ifndef ENTRY
#define ENTRY(name) \
.globl name ASM_NL \
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index c01813c3fbe9..dd92b1a93919 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -53,9 +53,6 @@
#define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET)
-/* We use the MSB mostly because its available */
-#define PREEMPT_NEED_RESCHED 0x80000000
-
#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index 6c04292e16a7..7bb64381e77c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2066,6 +2066,15 @@ found_highest:
return gap_end;
}
+
+#ifndef arch_get_mmap_end
+#define arch_get_mmap_end(addr) (TASK_SIZE)
+#endif
+
+#ifndef arch_get_mmap_base
+#define arch_get_mmap_base(addr, base) (base)
+#endif
+
/* Get an address range which is currently unmapped.
* For shmat() with addr=0.
*
@@ -2085,8 +2094,9 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct vm_unmapped_area_info info;
+ const unsigned long mmap_end = arch_get_mmap_end(addr);
- if (len > TASK_SIZE - mmap_min_addr)
+ if (len > mmap_end - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
@@ -2095,7 +2105,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma_prev(mm, addr, &prev);
- if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ if (mmap_end - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)) &&
(!prev || addr >= vm_end_gap(prev)))
return addr;
@@ -2104,7 +2114,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = mm->mmap_base;
- info.high_limit = TASK_SIZE;
+ info.high_limit = mmap_end;
info.align_mask = 0;
return vm_unmapped_area(&info);
}
@@ -2124,9 +2134,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
struct vm_unmapped_area_info info;
+ const unsigned long mmap_end = arch_get_mmap_end(addr);
/* requested length too big for entire address space */
- if (len > TASK_SIZE - mmap_min_addr)
+ if (len > mmap_end - mmap_min_addr)
return -ENOMEM;
if (flags & MAP_FIXED)
@@ -2136,7 +2147,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (addr) {
addr = PAGE_ALIGN(addr);
vma = find_vma_prev(mm, addr, &prev);
- if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
+ if (mmap_end - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vm_start_gap(vma)) &&
(!prev || addr >= vm_end_gap(prev)))
return addr;
@@ -2145,7 +2156,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = mm->mmap_base;
+ info.high_limit = arch_get_mmap_base(addr, mm->mmap_base);
info.align_mask = 0;
addr = vm_unmapped_area(&info);
@@ -2159,7 +2170,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = TASK_SIZE;
+ info.high_limit = mmap_end;
addr = vm_unmapped_area(&info);
}