From e9a2f8b599d0bc22a1b13e69527246ac39c697b4 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 11 Oct 2020 10:20:16 +0100
Subject: ARM: 9011/1: centralize phys-to-virt conversion of DT/ATAGS address

Before moving the DT mapping out of the linear region, let's prepare
for this change by removing all the phys-to-virt translations of the
__atags_pointer variable, and perform this translation only once at
setup time.

Tested-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/prom.h   |  4 ++--
 arch/arm/kernel/atags.h       |  4 ++--
 arch/arm/kernel/atags_parse.c |  6 +++---
 arch/arm/kernel/devtree.c     |  6 +++---
 arch/arm/kernel/setup.c       | 14 +++++++++-----
 arch/arm/mm/mmu.c             |  4 ++--
 6 files changed, 21 insertions(+), 17 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h
index 1e36c40533c1..402e3f34c7ed 100644
--- a/arch/arm/include/asm/prom.h
+++ b/arch/arm/include/asm/prom.h
@@ -9,12 +9,12 @@
 
 #ifdef CONFIG_OF
 
-extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys);
+extern const struct machine_desc *setup_machine_fdt(void *dt_virt);
 extern void __init arm_dt_init_cpu_maps(void);
 
 #else /* CONFIG_OF */
 
-static inline const struct machine_desc *setup_machine_fdt(unsigned int dt_phys)
+static inline const struct machine_desc *setup_machine_fdt(void *dt_virt)
 {
 	return NULL;
 }
diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h
index 067e12edc341..f2819c25b602 100644
--- a/arch/arm/kernel/atags.h
+++ b/arch/arm/kernel/atags.h
@@ -2,11 +2,11 @@
 void convert_to_tag_list(struct tag *tags);
 
 #ifdef CONFIG_ATAGS
-const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer,
+const struct machine_desc *setup_machine_tags(void *__atags_vaddr,
 	unsigned int machine_nr);
 #else
 static inline const struct machine_desc * __init __noreturn
-setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
+setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr)
 {
 	early_print("no ATAGS support: can't continue\n");
 	while (true);
diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c
index 6c12d9fe694e..373b61f9a4f0 100644
--- a/arch/arm/kernel/atags_parse.c
+++ b/arch/arm/kernel/atags_parse.c
@@ -174,7 +174,7 @@ static void __init squash_mem_tags(struct tag *tag)
 }
 
 const struct machine_desc * __init
-setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
+setup_machine_tags(void *atags_vaddr, unsigned int machine_nr)
 {
 	struct tag *tags = (struct tag *)&default_tags;
 	const struct machine_desc *mdesc = NULL, *p;
@@ -195,8 +195,8 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr)
 	if (!mdesc)
 		return NULL;
 
-	if (__atags_pointer)
-		tags = phys_to_virt(__atags_pointer);
+	if (atags_vaddr)
+		tags = atags_vaddr;
 	else if (mdesc->atag_offset)
 		tags = (void *)(PAGE_OFFSET + mdesc->atag_offset);
 
diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c
index 7f0745a97e20..28311dd0fee6 100644
--- a/arch/arm/kernel/devtree.c
+++ b/arch/arm/kernel/devtree.c
@@ -203,12 +203,12 @@ static const void * __init arch_get_next_mach(const char *const **match)
 
 /**
  * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
- * @dt_phys: physical address of dt blob
+ * @dt_virt: virtual address of dt blob
  *
  * If a dtb was passed to the kernel in r2, then use it to choose the
  * correct machine_desc and to setup the system.
  */
-const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
+const struct machine_desc * __init setup_machine_fdt(void *dt_virt)
 {
 	const struct machine_desc *mdesc, *mdesc_best = NULL;
 
@@ -221,7 +221,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
 	mdesc_best = &__mach_desc_GENERIC_DT;
 #endif
 
-	if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys)))
+	if (!dt_virt || !early_init_dt_verify(dt_virt))
 		return NULL;
 
 	mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 3f65d0ac9f63..306bcd9844be 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -89,6 +89,7 @@ unsigned int cacheid __read_mostly;
 EXPORT_SYMBOL(cacheid);
 
 unsigned int __atags_pointer __initdata;
+void *atags_vaddr __initdata;
 
 unsigned int system_rev;
 EXPORT_SYMBOL(system_rev);
@@ -1081,19 +1082,22 @@ void __init hyp_mode_check(void)
 
 void __init setup_arch(char **cmdline_p)
 {
-	const struct machine_desc *mdesc;
+	const struct machine_desc *mdesc = NULL;
+
+	if (__atags_pointer)
+		atags_vaddr = phys_to_virt(__atags_pointer);
 
 	setup_processor();
-	mdesc = setup_machine_fdt(__atags_pointer);
+	if (atags_vaddr)
+		mdesc = setup_machine_fdt(atags_vaddr);
 	if (!mdesc)
-		mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type);
+		mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type);
 	if (!mdesc) {
 		early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n");
 		early_print("  r1=0x%08x, r2=0x%08x\n", __machine_arch_type,
 			    __atags_pointer);
 		if (__atags_pointer)
-			early_print("  r2[]=%*ph\n", 16,
-				    phys_to_virt(__atags_pointer));
+			early_print("  r2[]=%*ph\n", 16, atags_vaddr);
 		dump_machine_table();
 	}
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index ab69250a86bc..55991fe60054 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1489,7 +1489,7 @@ static void __init map_lowmem(void)
 }
 
 #ifdef CONFIG_ARM_PV_FIXUP
-extern unsigned long __atags_pointer;
+extern void *atags_vaddr;
 typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata);
 pgtables_remap lpae_pgtables_remap_asm;
 
@@ -1520,7 +1520,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
 	 */
 	lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm);
 	pa_pgd = __pa(swapper_pg_dir);
-	boot_data = __va(__atags_pointer);
+	boot_data = atags_vaddr;
 	barrier();
 
 	pr_info("Switching physical address space to 0x%08llx\n",
-- 
cgit v1.2.3


From 7a1be318f5795cb66fa0dc86b3ace427fe68057f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 11 Oct 2020 10:21:37 +0100
Subject: ARM: 9012/1: move device tree mapping out of linear region

On ARM, setting up the linear region is tricky, given the constraints
around placement and alignment of the memblocks, and how the kernel
itself as well as the DT are placed in physical memory.

Let's simplify matters a bit, by moving the device tree mapping to the
top of the address space, right between the end of the vmalloc region
and the start of the the fixmap region, and create a read-only mapping
for it that is independent of the size of the linear region, and how it
is organized.

Since this region was formerly used as a guard region, which will now be
populated fully on LPAE builds by this read-only mapping (which will
still be able to function as a guard region for stray writes), bump the
start of the [underutilized] fixmap region by 512 KB as well, to ensure
that there is always a proper guard region here. Doing so still leaves
ample room for the fixmap space, even with NR_CPUS set to its maximum
value of 32.

Tested-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/fixmap.h |  2 +-
 arch/arm/include/asm/memory.h |  5 +++++
 arch/arm/kernel/head.S        |  5 ++---
 arch/arm/kernel/setup.c       | 11 ++++++++---
 arch/arm/mm/init.c            |  1 -
 arch/arm/mm/mmu.c             | 20 ++++++++++++++------
 arch/arm/mm/pv-fixup-asm.S    |  4 ++--
 7 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h
index fc56fc3e1931..9575b404019c 100644
--- a/arch/arm/include/asm/fixmap.h
+++ b/arch/arm/include/asm/fixmap.h
@@ -2,7 +2,7 @@
 #ifndef _ASM_FIXMAP_H
 #define _ASM_FIXMAP_H
 
-#define FIXADDR_START		0xffc00000UL
+#define FIXADDR_START		0xffc80000UL
 #define FIXADDR_END		0xfff00000UL
 #define FIXADDR_TOP		(FIXADDR_END - PAGE_SIZE)
 
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 99035b5891ef..bb79e52aeb90 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -67,6 +67,10 @@
  */
 #define XIP_VIRT_ADDR(physaddr)  (MODULES_VADDR + ((physaddr) & 0x000fffff))
 
+#define FDT_FIXED_BASE		UL(0xff800000)
+#define FDT_FIXED_SIZE		(2 * PMD_SIZE)
+#define FDT_VIRT_ADDR(physaddr)	((void *)(FDT_FIXED_BASE | (physaddr) % PMD_SIZE))
+
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
 /*
  * Allow 16MB-aligned ioremap pages
@@ -107,6 +111,7 @@ extern unsigned long vectors_base;
 #define MODULES_VADDR		PAGE_OFFSET
 
 #define XIP_VIRT_ADDR(physaddr)  (physaddr)
+#define FDT_VIRT_ADDR(physaddr)  ((void *)(physaddr))
 
 #endif /* !CONFIG_MMU */
 
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f8904227e7fd..9b18d8c66129 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -275,9 +275,8 @@ __create_page_tables:
 	 */
 	mov	r0, r2, lsr #SECTION_SHIFT
 	movs	r0, r0, lsl #SECTION_SHIFT
-	subne	r3, r0, r8
-	addne	r3, r3, #PAGE_OFFSET
-	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
+	ldrne	r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER)
+	addne	r3, r3, r4
 	orrne	r6, r7, r0
 	strne	r6, [r3], #1 << PMD_ORDER
 	addne	r6, r6, #1 << SECTION_SHIFT
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 306bcd9844be..694aa6b4bd03 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -18,6 +18,7 @@
 #include <linux/of_platform.h>
 #include <linux/init.h>
 #include <linux/kexec.h>
+#include <linux/libfdt.h>
 #include <linux/of_fdt.h>
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
@@ -89,7 +90,6 @@ unsigned int cacheid __read_mostly;
 EXPORT_SYMBOL(cacheid);
 
 unsigned int __atags_pointer __initdata;
-void *atags_vaddr __initdata;
 
 unsigned int system_rev;
 EXPORT_SYMBOL(system_rev);
@@ -1083,13 +1083,18 @@ void __init hyp_mode_check(void)
 void __init setup_arch(char **cmdline_p)
 {
 	const struct machine_desc *mdesc = NULL;
+	void *atags_vaddr = NULL;
 
 	if (__atags_pointer)
-		atags_vaddr = phys_to_virt(__atags_pointer);
+		atags_vaddr = FDT_VIRT_ADDR(__atags_pointer);
 
 	setup_processor();
-	if (atags_vaddr)
+	if (atags_vaddr) {
 		mdesc = setup_machine_fdt(atags_vaddr);
+		if (mdesc)
+			memblock_reserve(__atags_pointer,
+					 fdt_totalsize(atags_vaddr));
+	}
 	if (!mdesc)
 		mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type);
 	if (!mdesc) {
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index d57112a276f5..a391804c7ce3 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -223,7 +223,6 @@ void __init arm_memblock_init(const struct machine_desc *mdesc)
 	if (mdesc->reserve)
 		mdesc->reserve();
 
-	early_init_fdt_reserve_self();
 	early_init_fdt_scan_reserved_mem();
 
 	/* reserve memory for DMA contiguous allocations */
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 55991fe60054..fa259825310c 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -39,6 +39,8 @@
 #include "mm.h"
 #include "tcm.h"
 
+extern unsigned long __atags_pointer;
+
 /*
  * empty_zero_page is a special page that is used for
  * zero-initialized data and COW.
@@ -946,7 +948,7 @@ static void __init create_mapping(struct map_desc *md)
 		return;
 	}
 
-	if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
+	if (md->type == MT_DEVICE &&
 	    md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START &&
 	    (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) {
 		pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n",
@@ -1333,6 +1335,15 @@ static void __init devicemaps_init(const struct machine_desc *mdesc)
 	for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
 
+	if (__atags_pointer) {
+		/* create a read-only mapping of the device tree */
+		map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK);
+		map.virtual = FDT_FIXED_BASE;
+		map.length = FDT_FIXED_SIZE;
+		map.type = MT_ROM;
+		create_mapping(&map);
+	}
+
 	/*
 	 * Map the kernel if it is XIP.
 	 * It is always first in the modulearea.
@@ -1489,8 +1500,7 @@ static void __init map_lowmem(void)
 }
 
 #ifdef CONFIG_ARM_PV_FIXUP
-extern void *atags_vaddr;
-typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata);
+typedef void pgtables_remap(long long offset, unsigned long pgd);
 pgtables_remap lpae_pgtables_remap_asm;
 
 /*
@@ -1503,7 +1513,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
 	unsigned long pa_pgd;
 	unsigned int cr, ttbcr;
 	long long offset;
-	void *boot_data;
 
 	if (!mdesc->pv_fixup)
 		return;
@@ -1520,7 +1529,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
 	 */
 	lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm);
 	pa_pgd = __pa(swapper_pg_dir);
-	boot_data = atags_vaddr;
 	barrier();
 
 	pr_info("Switching physical address space to 0x%08llx\n",
@@ -1556,7 +1564,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc)
 	 * needs to be assembly.  It's fairly simple, as we're using the
 	 * temporary tables setup by the initial assembly code.
 	 */
-	lpae_pgtables_remap(offset, pa_pgd, boot_data);
+	lpae_pgtables_remap(offset, pa_pgd);
 
 	/* Re-enable the caches and cacheable TLB walks */
 	asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr));
diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S
index 8eade0416739..5c5e1952000a 100644
--- a/arch/arm/mm/pv-fixup-asm.S
+++ b/arch/arm/mm/pv-fixup-asm.S
@@ -39,8 +39,8 @@ ENTRY(lpae_pgtables_remap_asm)
 
 	/* Update level 2 entries for the boot data */
 	add	r7, r2, #0x1000
-	add	r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER
-	bic	r7, r7, #(1 << L2_ORDER) - 1
+	movw	r3, #FDT_FIXED_BASE >> (SECTION_SHIFT - L2_ORDER)
+	add	r7, r7, r3
 	ldrd	r4, r5, [r7]
 	adds	r4, r4, r0
 	adc	r5, r5, r1
-- 
cgit v1.2.3


From d5d44e7e3507b0ad868f68e0c5bca6a57afa1b8b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Oct 2020 23:50:09 +0100
Subject: ARM: 9013/2: Disable KASan instrumentation for some code

Disable instrumentation for arch/arm/boot/compressed/*
since that code is executed before the kernel has even
set up its mappings and definately out of scope for
KASan.

Disable instrumentation of arch/arm/vdso/* because that code
is not linked with the kernel image, so the KASan management
code would fail to link.

Disable instrumentation of arch/arm/mm/physaddr.c. See commit
ec6d06efb0ba ("arm64: Add support for CONFIG_DEBUG_VIRTUAL")
for more details.

Disable kasan check in the function unwind_pop_register because
it does not matter that kasan checks failed when unwind_pop_register()
reads the stack memory of a task.

Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: kasan-dev@googlegroups.com
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Ard Biesheuvel <ardb@kernel.org> # QEMU/KVM/mach-virt/LPAE/8G
Tested-by: Florian Fainelli <f.fainelli@gmail.com> # Brahma SoCs
Tested-by: Ahmad Fatoum <a.fatoum@pengutronix.de> # i.MX6Q
Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Reported-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Abbott Liu <liuwenliang@huawei.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/Makefile | 1 +
 arch/arm/kernel/unwind.c          | 6 +++++-
 arch/arm/mm/Makefile              | 2 ++
 arch/arm/vdso/Makefile            | 2 ++
 4 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 47f001ca5499..a815b1ae990d 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -24,6 +24,7 @@ OBJS		+= hyp-stub.o
 endif
 
 GCOV_PROFILE		:= n
+KASAN_SANITIZE		:= n
 
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
 KCOV_INSTRUMENT		:= n
diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index d2bd0df2318d..f35eb584a18a 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -236,7 +236,11 @@ static int unwind_pop_register(struct unwind_ctrl_block *ctrl,
 		if (*vsp >= (unsigned long *)ctrl->sp_high)
 			return -URC_FAILURE;
 
-	ctrl->vrs[reg] = *(*vsp)++;
+	/* Use READ_ONCE_NOCHECK here to avoid this memory access
+	 * from being tracked by KASAN.
+	 */
+	ctrl->vrs[reg] = READ_ONCE_NOCHECK(*(*vsp));
+	(*vsp)++;
 	return URC_OK;
 }
 
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 7cb1699fbfc4..99699c32d8a5 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -7,6 +7,7 @@ obj-y				:= extable.o fault.o init.o iomap.o
 obj-y				+= dma-mapping$(MMUEXT).o
 obj-$(CONFIG_MMU)		+= fault-armv.o flush.o idmap.o ioremap.o \
 				   mmap.o pgd.o mmu.o pageattr.o
+KASAN_SANITIZE_mmu.o		:= n
 
 ifneq ($(CONFIG_MMU),y)
 obj-y				+= nommu.o
@@ -16,6 +17,7 @@ endif
 obj-$(CONFIG_ARM_PTDUMP_CORE)	+= dump.o
 obj-$(CONFIG_ARM_PTDUMP_DEBUGFS)	+= ptdump_debugfs.o
 obj-$(CONFIG_MODULES)		+= proc-syms.o
+KASAN_SANITIZE_physaddr.o	:= n
 obj-$(CONFIG_DEBUG_VIRTUAL)	+= physaddr.o
 
 obj-$(CONFIG_ALIGNMENT_TRAP)	+= alignment.o
diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile
index 150ce6e6a5d3..b558bee0e1f6 100644
--- a/arch/arm/vdso/Makefile
+++ b/arch/arm/vdso/Makefile
@@ -42,6 +42,8 @@ GCOV_PROFILE := n
 # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
 KCOV_INSTRUMENT := n
 
+KASAN_SANITIZE := n
+
 # Force dependency
 $(obj)/vdso.o : $(obj)/vdso.so
 
-- 
cgit v1.2.3


From d6d51a96c7d63b7450860a3037f2d62388286a52 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Oct 2020 23:52:08 +0100
Subject: ARM: 9014/2: Replace string mem* functions for KASan

Functions like memset()/memmove()/memcpy() do a lot of memory
accesses.

If a bad pointer is passed to one of these functions it is important
to catch this. Compiler instrumentation cannot do this since these
functions are written in assembly.

KASan replaces these memory functions with instrumented variants.

The original functions are declared as weak symbols so that
the strong definitions in mm/kasan/kasan.c can replace them.

The original functions have aliases with a '__' prefix in their
name, so we can call the non-instrumented variant if needed.

We must use __memcpy()/__memset() in place of memcpy()/memset()
when we copy .data to RAM and when we clear .bss, because
kasan_early_init cannot be called before the initialization of
.data and .bss.

For the kernel compression and EFI libstub's custom string
libraries we need a special quirk: even if these are built
without KASan enabled, they rely on the global headers for their
custom string libraries, which means that e.g. memcpy()
will be defined to __memcpy() and we get link failures.
Since these implementations are written i C rather than
assembly we use e.g. __alias(memcpy) to redirected any
users back to the local implementation.

Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: kasan-dev@googlegroups.com
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Ard Biesheuvel <ardb@kernel.org> # QEMU/KVM/mach-virt/LPAE/8G
Tested-by: Florian Fainelli <f.fainelli@gmail.com> # Brahma SoCs
Tested-by: Ahmad Fatoum <a.fatoum@pengutronix.de> # i.MX6Q
Reported-by: Russell King - ARM Linux <rmk+kernel@armlinux.org.uk>
Signed-off-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Signed-off-by: Abbott Liu <liuwenliang@huawei.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/string.c | 19 +++++++++++++++++++
 arch/arm/include/asm/string.h     | 26 ++++++++++++++++++++++++++
 arch/arm/kernel/head-common.S     |  4 ++--
 arch/arm/lib/memcpy.S             |  3 +++
 arch/arm/lib/memmove.S            |  5 ++++-
 arch/arm/lib/memset.S             |  3 +++
 6 files changed, 57 insertions(+), 3 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c
index ade5079bebbf..8c0fa276d994 100644
--- a/arch/arm/boot/compressed/string.c
+++ b/arch/arm/boot/compressed/string.c
@@ -7,6 +7,25 @@
 
 #include <linux/string.h>
 
+/*
+ * The decompressor is built without KASan but uses the same redirects as the
+ * rest of the kernel when CONFIG_KASAN is enabled, defining e.g. memcpy()
+ * to __memcpy() but since we are not linking with the main kernel string
+ * library in the decompressor, that will lead to link failures.
+ *
+ * Undefine KASan's versions, define the wrapped functions and alias them to
+ * the right names so that when e.g. __memcpy() appear in the code, it will
+ * still be linked to this local version of memcpy().
+ */
+#ifdef CONFIG_KASAN
+#undef memcpy
+#undef memmove
+#undef memset
+void *__memcpy(void *__dest, __const void *__src, size_t __n) __alias(memcpy);
+void *__memmove(void *__dest, __const void *__src, size_t count) __alias(memmove);
+void *__memset(void *s, int c, size_t count) __alias(memset);
+#endif
+
 void *memcpy(void *__dest, __const void *__src, size_t __n)
 {
 	int i = 0;
diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h
index 111a1d8a41dd..6c607c68f3ad 100644
--- a/arch/arm/include/asm/string.h
+++ b/arch/arm/include/asm/string.h
@@ -5,6 +5,9 @@
 /*
  * We don't do inline string functions, since the
  * optimised inline asm versions are not small.
+ *
+ * The __underscore versions of some functions are for KASan to be able
+ * to replace them with instrumented versions.
  */
 
 #define __HAVE_ARCH_STRRCHR
@@ -15,15 +18,18 @@ extern char * strchr(const char * s, int c);
 
 #define __HAVE_ARCH_MEMCPY
 extern void * memcpy(void *, const void *, __kernel_size_t);
+extern void *__memcpy(void *dest, const void *src, __kernel_size_t n);
 
 #define __HAVE_ARCH_MEMMOVE
 extern void * memmove(void *, const void *, __kernel_size_t);
+extern void *__memmove(void *dest, const void *src, __kernel_size_t n);
 
 #define __HAVE_ARCH_MEMCHR
 extern void * memchr(const void *, int, __kernel_size_t);
 
 #define __HAVE_ARCH_MEMSET
 extern void * memset(void *, int, __kernel_size_t);
+extern void *__memset(void *s, int c, __kernel_size_t n);
 
 #define __HAVE_ARCH_MEMSET32
 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
@@ -39,4 +45,24 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
 	return __memset64(p, v, n * 8, v >> 32);
 }
 
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * must use non-instrumented versions of the mem*
+ * functions named __memcpy() etc. All such kernel code has
+ * been tagged with KASAN_SANITIZE_file.o = n, which means
+ * that the address sanitization argument isn't passed to the
+ * compiler, and __SANITIZE_ADDRESS__ is not set. As a result
+ * these defines kick in.
+ */
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
+
 #endif
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 4a3982812a40..6840c7c60a85 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -95,7 +95,7 @@ __mmap_switched:
  THUMB(	ldmia	r4!, {r0, r1, r2, r3} )
  THUMB(	mov	sp, r3 )
 	sub	r2, r2, r1
-	bl	memcpy				@ copy .data to RAM
+	bl	__memcpy			@ copy .data to RAM
 #endif
 
    ARM(	ldmia	r4!, {r0, r1, sp} )
@@ -103,7 +103,7 @@ __mmap_switched:
  THUMB(	mov	sp, r3 )
 	sub	r2, r1, r0
 	mov	r1, #0
-	bl	memset				@ clear .bss
+	bl	__memset			@ clear .bss
 
 	ldmia	r4, {r0, r1, r2, r3}
 	str	r9, [r0]			@ Save processor ID
diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index 09a333153dc6..ad4625d16e11 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -58,6 +58,8 @@
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
+.weak memcpy
+ENTRY(__memcpy)
 ENTRY(mmiocpy)
 ENTRY(memcpy)
 
@@ -65,3 +67,4 @@ ENTRY(memcpy)
 
 ENDPROC(memcpy)
 ENDPROC(mmiocpy)
+ENDPROC(__memcpy)
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index b50e5770fb44..fd123ea5a5a4 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -24,12 +24,14 @@
  * occurring in the opposite direction.
  */
 
+.weak memmove
+ENTRY(__memmove)
 ENTRY(memmove)
 	UNWIND(	.fnstart			)
 
 		subs	ip, r0, r1
 		cmphi	r2, ip
-		bls	memcpy
+		bls	__memcpy
 
 		stmfd	sp!, {r0, r4, lr}
 	UNWIND(	.fnend				)
@@ -222,3 +224,4 @@ ENTRY(memmove)
 18:		backward_copy_shift	push=24	pull=8
 
 ENDPROC(memmove)
+ENDPROC(__memmove)
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 6ca4535c47fb..0e7ff0423f50 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -13,6 +13,8 @@
 	.text
 	.align	5
 
+.weak memset
+ENTRY(__memset)
 ENTRY(mmioset)
 ENTRY(memset)
 UNWIND( .fnstart         )
@@ -132,6 +134,7 @@ UNWIND( .fnstart            )
 UNWIND( .fnend   )
 ENDPROC(memset)
 ENDPROC(mmioset)
+ENDPROC(__memset)
 
 ENTRY(__memset32)
 UNWIND( .fnstart         )
-- 
cgit v1.2.3


From c12366ba441da2f6f2b915410aca2b5b39c16514 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Oct 2020 23:53:46 +0100
Subject: ARM: 9015/2: Define the virtual space of KASan's shadow region

Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for
the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB
addressable by a 32bit architecture) out of the virtual address
space to use as shadow memory for KASan as follows:

 +----+ 0xffffffff
 |    |
 |    | |-> Static kernel image (vmlinux) BSS and page table
 |    |/
 +----+ PAGE_OFFSET
 |    |
 |    | |->  Loadable kernel modules virtual address space area
 |    |/
 +----+ MODULES_VADDR = KASAN_SHADOW_END
 |    |
 |    | |-> The shadow area of kernel virtual address.
 |    |/
 +----+->  TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the
 |    |   shadow address of MODULES_VADDR
 |    | |
 |    | |
 |    | |-> The user space area in lowmem. The kernel address
 |    | |   sanitizer do not use this space, nor does it map it.
 |    | |
 |    | |
 |    | |
 |    | |
 |    |/
 ------ 0

0 .. TASK_SIZE is the memory that can be used by shared
userspace/kernelspace. It us used for userspace processes and for
passing parameters and memory buffers in system calls etc. We do not
need to shadow this area.

KASAN_SHADOW_START:
 This value begins with the MODULE_VADDR's shadow address. It is the
 start of kernel virtual space. Since we have modules to load, we need
 to cover also that area with shadow memory so we can find memory
 bugs in modules.

KASAN_SHADOW_END
 This value is the 0x100000000's shadow address: the mapping that would
 be after the end of the kernel memory at 0xffffffff. It is the end of
 kernel address sanitizer shadow area. It is also the start of the
 module area.

KASAN_SHADOW_OFFSET:
 This value is used to map an address to the corresponding shadow
 address by the following formula:

   shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;

 As you would expect, >> 3 is equal to dividing by 8, meaning each
 byte in the shadow memory covers 8 bytes of kernel memory, so one
 bit shadow memory per byte of kernel memory is used.

 The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending
 on the VMSPLIT layout of the system: the kernel and userspace can
 split up lowmem in different ways according to needs, so we calculate
 the shadow offset depending on this.

When kasan is enabled, the definition of TASK_SIZE is not an 8-bit
rotated constant, so we need to modify the TASK_SIZE access code in the
*.s file.

The kernel and modules may use different amounts of memory,
according to the VMSPLIT configuration, which in turn
determines the PAGE_OFFSET.

We use the following KASAN_SHADOW_OFFSETs depending on how the
virtual memory is split up:

- 0x1f000000 if we have 1G userspace / 3G kernelspace split:
  - The kernel address space is 3G (0xc0000000)
  - PAGE_OFFSET is then set to 0x40000000 so the kernel static
    image (vmlinux) uses addresses 0x40000000 .. 0xffffffff
  - On top of that we have the MODULES_VADDR which under
    the worst case (using ARM instructions) is
    PAGE_OFFSET - 16M (0x01000000) = 0x3f000000
    so the modules use addresses 0x3f000000 .. 0x3fffffff
  - So the addresses 0x3f000000 .. 0xffffffff need to be
    covered with shadow memory. That is 0xc1000000 bytes
    of memory.
  - 1/8 of that is needed for its shadow memory, so
    0x18200000 bytes of shadow memory is needed. We
    "steal" that from the remaining lowmem.
  - The KASAN_SHADOW_START becomes 0x26e00000, to
    KASAN_SHADOW_END at 0x3effffff.
  - Now we can calculate the KASAN_SHADOW_OFFSET for any
    kernel address as 0x3f000000 needs to map to the first
    byte of shadow memory and 0xffffffff needs to map to
    the last byte of shadow memory. Since:
    SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
    0x26e00000 = (0x3f000000 >> 3) + KASAN_SHADOW_OFFSET
    KASAN_SHADOW_OFFSET = 0x26e00000 - (0x3f000000 >> 3)
    KASAN_SHADOW_OFFSET = 0x26e00000 - 0x07e00000
    KASAN_SHADOW_OFFSET = 0x1f000000

- 0x5f000000 if we have 2G userspace / 2G kernelspace split:
  - The kernel space is 2G (0x80000000)
  - PAGE_OFFSET is set to 0x80000000 so the kernel static
    image uses 0x80000000 .. 0xffffffff.
  - On top of that we have the MODULES_VADDR which under
    the worst case (using ARM instructions) is
    PAGE_OFFSET - 16M (0x01000000) = 0x7f000000
    so the modules use addresses 0x7f000000 .. 0x7fffffff
  - So the addresses 0x7f000000 .. 0xffffffff need to be
    covered with shadow memory. That is 0x81000000 bytes
    of memory.
  - 1/8 of that is needed for its shadow memory, so
    0x10200000 bytes of shadow memory is needed. We
    "steal" that from the remaining lowmem.
  - The KASAN_SHADOW_START becomes 0x6ee00000, to
    KASAN_SHADOW_END at 0x7effffff.
  - Now we can calculate the KASAN_SHADOW_OFFSET for any
    kernel address as 0x7f000000 needs to map to the first
    byte of shadow memory and 0xffffffff needs to map to
    the last byte of shadow memory. Since:
    SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
    0x6ee00000 = (0x7f000000 >> 3) + KASAN_SHADOW_OFFSET
    KASAN_SHADOW_OFFSET = 0x6ee00000 - (0x7f000000 >> 3)
    KASAN_SHADOW_OFFSET = 0x6ee00000 - 0x0fe00000
    KASAN_SHADOW_OFFSET = 0x5f000000

- 0x9f000000 if we have 3G userspace / 1G kernelspace split,
  and this is the default split for ARM:
  - The kernel address space is 1GB (0x40000000)
  - PAGE_OFFSET is set to 0xc0000000 so the kernel static
    image uses 0xc0000000 .. 0xffffffff.
  - On top of that we have the MODULES_VADDR which under
    the worst case (using ARM instructions) is
    PAGE_OFFSET - 16M (0x01000000) = 0xbf000000
    so the modules use addresses 0xbf000000 .. 0xbfffffff
  - So the addresses 0xbf000000 .. 0xffffffff need to be
    covered with shadow memory. That is 0x41000000 bytes
    of memory.
  - 1/8 of that is needed for its shadow memory, so
    0x08200000 bytes of shadow memory is needed. We
    "steal" that from the remaining lowmem.
  - The KASAN_SHADOW_START becomes 0xb6e00000, to
    KASAN_SHADOW_END at 0xbfffffff.
  - Now we can calculate the KASAN_SHADOW_OFFSET for any
    kernel address as 0xbf000000 needs to map to the first
    byte of shadow memory and 0xffffffff needs to map to
    the last byte of shadow memory. Since:
    SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
    0xb6e00000 = (0xbf000000 >> 3) + KASAN_SHADOW_OFFSET
    KASAN_SHADOW_OFFSET = 0xb6e00000 - (0xbf000000 >> 3)
    KASAN_SHADOW_OFFSET = 0xb6e00000 - 0x17e00000
    KASAN_SHADOW_OFFSET = 0x9f000000

- 0x8f000000 if we have 3G userspace / 1G kernelspace with
  full 1 GB low memory (VMSPLIT_3G_OPT):
  - The kernel address space is 1GB (0x40000000)
  - PAGE_OFFSET is set to 0xb0000000 so the kernel static
    image uses 0xb0000000 .. 0xffffffff.
  - On top of that we have the MODULES_VADDR which under
    the worst case (using ARM instructions) is
    PAGE_OFFSET - 16M (0x01000000) = 0xaf000000
    so the modules use addresses 0xaf000000 .. 0xaffffff
  - So the addresses 0xaf000000 .. 0xffffffff need to be
    covered with shadow memory. That is 0x51000000 bytes
    of memory.
  - 1/8 of that is needed for its shadow memory, so
    0x0a200000 bytes of shadow memory is needed. We
    "steal" that from the remaining lowmem.
  - The KASAN_SHADOW_START becomes 0xa4e00000, to
    KASAN_SHADOW_END at 0xaeffffff.
  - Now we can calculate the KASAN_SHADOW_OFFSET for any
    kernel address as 0xaf000000 needs to map to the first
    byte of shadow memory and 0xffffffff needs to map to
    the last byte of shadow memory. Since:
    SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET
    0xa4e00000 = (0xaf000000 >> 3) + KASAN_SHADOW_OFFSET
    KASAN_SHADOW_OFFSET = 0xa4e00000 - (0xaf000000 >> 3)
    KASAN_SHADOW_OFFSET = 0xa4e00000 - 0x15e00000
    KASAN_SHADOW_OFFSET = 0x8f000000

- The default value of 0xffffffff for KASAN_SHADOW_OFFSET
  is an error value. We should always match one of the
  above shadow offsets.

When we do this, TASK_SIZE will sometimes get a bit odd values
that will not fit into immediate mov assembly instructions.
To account for this, we need to rewrite some assembly using
TASK_SIZE like this:

-       mov     r1, #TASK_SIZE
+       ldr     r1, =TASK_SIZE

or

-       cmp     r4, #TASK_SIZE
+       ldr     r0, =TASK_SIZE
+       cmp     r4, r0

this is done to avoid the immediate #TASK_SIZE that need to
fit into a limited number of bits.

Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Ard Biesheuvel <ardb@kernel.org> # QEMU/KVM/mach-virt/LPAE/8G
Tested-by: Florian Fainelli <f.fainelli@gmail.com> # Brahma SoCs
Tested-by: Ahmad Fatoum <a.fatoum@pengutronix.de> # i.MX6Q
Reported-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Abbott Liu <liuwenliang@huawei.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Kconfig                   |  9 +++++
 arch/arm/include/asm/kasan_def.h   | 81 ++++++++++++++++++++++++++++++++++++++
 arch/arm/include/asm/memory.h      |  5 +++
 arch/arm/include/asm/uaccess-asm.h |  2 +-
 arch/arm/kernel/entry-armv.S       |  3 +-
 arch/arm/kernel/entry-common.S     |  9 +++--
 arch/arm/mm/mmu.c                  | 18 +++++++++
 7 files changed, 122 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm/include/asm/kasan_def.h

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fe2f17eb2b50..194032568b7a 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1323,6 +1323,15 @@ config PAGE_OFFSET
 	default 0xB0000000 if VMSPLIT_3G_OPT
 	default 0xC0000000
 
+config KASAN_SHADOW_OFFSET
+	hex
+	depends on KASAN
+	default 0x1f000000 if PAGE_OFFSET=0x40000000
+	default 0x5f000000 if PAGE_OFFSET=0x80000000
+	default 0x9f000000 if PAGE_OFFSET=0xC0000000
+	default 0x8f000000 if PAGE_OFFSET=0xB0000000
+	default 0xffffffff
+
 config NR_CPUS
 	int "Maximum number of CPUs (2-32)"
 	range 2 32
diff --git a/arch/arm/include/asm/kasan_def.h b/arch/arm/include/asm/kasan_def.h
new file mode 100644
index 000000000000..5739605aa7cf
--- /dev/null
+++ b/arch/arm/include/asm/kasan_def.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  arch/arm/include/asm/kasan_def.h
+ *
+ *  Copyright (c) 2018 Huawei Technologies Co., Ltd.
+ *
+ *  Author: Abbott Liu <liuwenliang@huawei.com>
+ */
+
+#ifndef __ASM_KASAN_DEF_H
+#define __ASM_KASAN_DEF_H
+
+#ifdef CONFIG_KASAN
+
+/*
+ * Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for
+ * the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB
+ * addressable by a 32bit architecture) out of the virtual address
+ * space to use as shadow memory for KASan as follows:
+ *
+ * +----+ 0xffffffff
+ * |    |							\
+ * |    | |-> Static kernel image (vmlinux) BSS and page table
+ * |    |/
+ * +----+ PAGE_OFFSET
+ * |    |							\
+ * |    | |->  Loadable kernel modules virtual address space area
+ * |    |/
+ * +----+ MODULES_VADDR = KASAN_SHADOW_END
+ * |    |						\
+ * |    | |-> The shadow area of kernel virtual address.
+ * |    |/
+ * +----+->  TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the
+ * |    |\   shadow address of MODULES_VADDR
+ * |    | |
+ * |    | |
+ * |    | |-> The user space area in lowmem. The kernel address
+ * |    | |   sanitizer do not use this space, nor does it map it.
+ * |    | |
+ * |    | |
+ * |    | |
+ * |    | |
+ * |    |/
+ * ------ 0
+ *
+ * 1) KASAN_SHADOW_START
+ *   This value begins with the MODULE_VADDR's shadow address. It is the
+ *   start of kernel virtual space. Since we have modules to load, we need
+ *   to cover also that area with shadow memory so we can find memory
+ *   bugs in modules.
+ *
+ * 2) KASAN_SHADOW_END
+ *   This value is the 0x100000000's shadow address: the mapping that would
+ *   be after the end of the kernel memory at 0xffffffff. It is the end of
+ *   kernel address sanitizer shadow area. It is also the start of the
+ *   module area.
+ *
+ * 3) KASAN_SHADOW_OFFSET:
+ *   This value is used to map an address to the corresponding shadow
+ *   address by the following formula:
+ *
+ *	shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET;
+ *
+ *  As you would expect, >> 3 is equal to dividing by 8, meaning each
+ *  byte in the shadow memory covers 8 bytes of kernel memory, so one
+ *  bit shadow memory per byte of kernel memory is used.
+ *
+ *  The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending
+ *  on the VMSPLIT layout of the system: the kernel and userspace can
+ *  split up lowmem in different ways according to needs, so we calculate
+ *  the shadow offset depending on this.
+ */
+
+#define KASAN_SHADOW_SCALE_SHIFT	3
+#define KASAN_SHADOW_OFFSET	_AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
+#define KASAN_SHADOW_END	((UL(1) << (32 - KASAN_SHADOW_SCALE_SHIFT)) \
+				 + KASAN_SHADOW_OFFSET)
+#define KASAN_SHADOW_START      ((KASAN_SHADOW_END >> 3) + KASAN_SHADOW_OFFSET)
+
+#endif
+#endif
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index bb79e52aeb90..598dbdca2017 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,6 +18,7 @@
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
+#include <asm/kasan_def.h>
 
 /* PAGE_OFFSET - the virtual address of the start of the kernel image */
 #define PAGE_OFFSET		UL(CONFIG_PAGE_OFFSET)
@@ -28,7 +29,11 @@
  * TASK_SIZE - the maximum size of a user space task.
  * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area
  */
+#ifndef CONFIG_KASAN
 #define TASK_SIZE		(UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M))
+#else
+#define TASK_SIZE		(KASAN_SHADOW_START)
+#endif
 #define TASK_UNMAPPED_BASE	ALIGN(TASK_SIZE / 3, SZ_16M)
 
 /*
diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h
index 907571fd05c6..e6eb7a2aaf1e 100644
--- a/arch/arm/include/asm/uaccess-asm.h
+++ b/arch/arm/include/asm/uaccess-asm.h
@@ -85,7 +85,7 @@
 	 */
 	.macro	uaccess_entry, tsk, tmp0, tmp1, tmp2, disable
 	ldr	\tmp1, [\tsk, #TI_ADDR_LIMIT]
-	mov	\tmp2, #TASK_SIZE
+	ldr	\tmp2, =TASK_SIZE
 	str	\tmp2, [\tsk, #TI_ADDR_LIMIT]
  DACR(	mrc	p15, 0, \tmp0, c3, c0, 0)
  DACR(	str	\tmp0, [sp, #SVC_DACR])
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 55a47df04773..c4220f51fcf3 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -427,7 +427,8 @@ ENDPROC(__fiq_abt)
 	@ if it was interrupted in a critical region.  Here we
 	@ perform a quick test inline since it should be false
 	@ 99.9999% of the time.  The rest is done out of line.
-	cmp	r4, #TASK_SIZE
+	ldr	r0, =TASK_SIZE
+	cmp	r4, r0
 	blhs	kuser_cmpxchg64_fixup
 #endif
 #endif
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 271cb8a1eba1..fee279e28a72 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -50,7 +50,8 @@ __ret_fast_syscall:
  UNWIND(.cantunwind	)
 	disable_irq_notrace			@ disable interrupts
 	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	cmp	r2, #TASK_SIZE
+	ldr	r1, =TASK_SIZE
+	cmp	r2, r1
 	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
@@ -87,7 +88,8 @@ __ret_fast_syscall:
 #endif
 	disable_irq_notrace			@ disable interrupts
 	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	cmp	r2, #TASK_SIZE
+	ldr     r1, =TASK_SIZE
+	cmp     r2, r1
 	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]		@ re-check for syscall tracing
 	tst	r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK
@@ -128,7 +130,8 @@ ret_slow_syscall:
 	disable_irq_notrace			@ disable interrupts
 ENTRY(ret_to_user_from_irq)
 	ldr	r2, [tsk, #TI_ADDR_LIMIT]
-	cmp	r2, #TASK_SIZE
+	ldr     r1, =TASK_SIZE
+	cmp	r2, r1
 	blne	addr_limit_check_failed
 	ldr	r1, [tsk, #TI_FLAGS]
 	tst	r1, #_TIF_WORK_MASK
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index fa259825310c..c06ebfbc48c4 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -29,6 +29,7 @@
 #include <asm/procinfo.h>
 #include <asm/memory.h>
 #include <asm/pgalloc.h>
+#include <asm/kasan_def.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -1255,8 +1256,25 @@ static inline void prepare_page_table(void)
 	/*
 	 * Clear out all the mappings below the kernel image.
 	 */
+#ifdef CONFIG_KASAN
+	/*
+	 * KASan's shadow memory inserts itself between the TASK_SIZE
+	 * and MODULES_VADDR. Do not clear the KASan shadow memory mappings.
+	 */
+	for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE)
+		pmd_clear(pmd_off_k(addr));
+	/*
+	 * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes
+	 * equal to MODULES_VADDR and then we exit the pmd clearing. If we
+	 * are using a thumb-compiled kernel, there there will be 8MB more
+	 * to clear as KASan always offset to 16 MB below MODULES_VADDR.
+	 */
+	for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE)
+		pmd_clear(pmd_off_k(addr));
+#else
 	for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE)
 		pmd_clear(pmd_off_k(addr));
+#endif
 
 #ifdef CONFIG_XIP_KERNEL
 	/* The XIP kernel is mapped in the module area -- skip over it */
-- 
cgit v1.2.3


From 5615f69bc2097452ecc954f5264d784e158d6801 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Oct 2020 23:55:16 +0100
Subject: ARM: 9016/2: Initialize the mapping of KASan shadow memory

This patch initializes KASan shadow region's page table and memory.
There are two stage for KASan initializing:

1. At early boot stage the whole shadow region is mapped to just
   one physical page (kasan_zero_page). It is finished by the function
   kasan_early_init which is called by __mmap_switched(arch/arm/kernel/
   head-common.S)

2. After the calling of paging_init, we use kasan_zero_page as zero
   shadow for some memory that KASan does not need to track, and we
   allocate a new shadow space for the other memory that KASan need to
   track. These issues are finished by the function kasan_init which is
   call by setup_arch.

When using KASan we also need to increase the THREAD_SIZE_ORDER
from 1 to 2 as the extra calls for shadow memory uses quite a bit
of stack.

As we need to make a temporary copy of the PGD when setting up
shadow memory we create a helpful PGD_SIZE definition for both
LPAE and non-LPAE setups.

The KASan core code unconditionally calls pud_populate() so this
needs to be changed from BUG() to do {} while (0) when building
with KASan enabled.

After the initial development by Andre Ryabinin several modifications
have been made to this code:

Abbott Liu <liuwenliang@huawei.com>
- Add support ARM LPAE: If LPAE is enabled, KASan shadow region's
  mapping table need be copied in the pgd_alloc() function.
- Change kasan_pte_populate,kasan_pmd_populate,kasan_pud_populate,
  kasan_pgd_populate from .meminit.text section to .init.text section.
  Reported by Florian Fainelli <f.fainelli@gmail.com>

Linus Walleij <linus.walleij@linaro.org>:
- Drop the custom mainpulation of TTBR0 and just use
  cpu_switch_mm() to switch the pgd table.
- Adopt to handle 4th level page tabel folding.
- Rewrite the entire page directory and page entry initialization
  sequence to be recursive based on ARM64:s kasan_init.c.

Ard Biesheuvel <ardb@kernel.org>:
- Necessary underlying fixes.
- Crucial bug fixes to the memory set-up code.

Co-developed-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Co-developed-by: Abbott Liu <liuwenliang@huawei.com>
Co-developed-by: Ard Biesheuvel <ardb@kernel.org>

Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: kasan-dev@googlegroups.com
Cc: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Ard Biesheuvel <ardb@kernel.org> # QEMU/KVM/mach-virt/LPAE/8G
Tested-by: Florian Fainelli <f.fainelli@gmail.com> # Brahma SoCs
Tested-by: Ahmad Fatoum <a.fatoum@pengutronix.de> # i.MX6Q
Reported-by: Russell King - ARM Linux <rmk+kernel@armlinux.org.uk>
Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Abbott Liu <liuwenliang@huawei.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/kasan.h       |  33 +++++
 arch/arm/include/asm/pgalloc.h     |   8 +-
 arch/arm/include/asm/thread_info.h |   8 +
 arch/arm/kernel/head-common.S      |   3 +
 arch/arm/kernel/setup.c            |   2 +
 arch/arm/mm/Makefile               |   3 +
 arch/arm/mm/kasan_init.c           | 291 +++++++++++++++++++++++++++++++++++++
 arch/arm/mm/pgd.c                  |  16 +-
 8 files changed, 362 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm/include/asm/kasan.h
 create mode 100644 arch/arm/mm/kasan_init.c

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/kasan.h b/arch/arm/include/asm/kasan.h
new file mode 100644
index 000000000000..303c35df3135
--- /dev/null
+++ b/arch/arm/include/asm/kasan.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/arm/include/asm/kasan.h
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ */
+
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifdef CONFIG_KASAN
+
+#include <asm/kasan_def.h>
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
+/*
+ * The compiler uses a shadow offset assuming that addresses start
+ * from 0. Kernel addresses don't start from 0, so shadow
+ * for kernel really starts from 'compiler's shadow offset' +
+ * ('kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT)
+ */
+
+asmlinkage void kasan_early_init(void);
+extern void kasan_init(void);
+
+#else
+static inline void kasan_init(void) { }
+#endif
+
+#endif
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index 15f4674715f8..fdee1f04f4f3 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -21,6 +21,7 @@
 #define _PAGE_KERNEL_TABLE	(PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL))
 
 #ifdef CONFIG_ARM_LPAE
+#define PGD_SIZE		(PTRS_PER_PGD * sizeof(pgd_t))
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 {
@@ -28,14 +29,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 }
 
 #else	/* !CONFIG_ARM_LPAE */
+#define PGD_SIZE		(PAGE_SIZE << 2)
 
 /*
  * Since we have only two-level page tables, these are trivial
  */
 #define pmd_alloc_one(mm,addr)		({ BUG(); ((pmd_t *)2); })
 #define pmd_free(mm, pmd)		do { } while (0)
+#ifdef CONFIG_KASAN
+/* The KASan core unconditionally calls pud_populate() on all architectures */
+#define pud_populate(mm,pmd,pte)	do { } while (0)
+#else
 #define pud_populate(mm,pmd,pte)	BUG()
-
+#endif
 #endif	/* CONFIG_ARM_LPAE */
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h
index 536b6b979f63..56fae7861fd3 100644
--- a/arch/arm/include/asm/thread_info.h
+++ b/arch/arm/include/asm/thread_info.h
@@ -13,7 +13,15 @@
 #include <asm/fpstate.h>
 #include <asm/page.h>
 
+#ifdef CONFIG_KASAN
+/*
+ * KASan uses a lot of extra stack space so the thread size order needs to
+ * be increased.
+ */
+#define THREAD_SIZE_ORDER	2
+#else
 #define THREAD_SIZE_ORDER	1
+#endif
 #define THREAD_SIZE		(PAGE_SIZE << THREAD_SIZE_ORDER)
 #define THREAD_START_SP		(THREAD_SIZE - 8)
 
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 6840c7c60a85..89c80154b9ef 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -111,6 +111,9 @@ __mmap_switched:
 	str	r8, [r2]			@ Save atags pointer
 	cmp	r3, #0
 	strne	r10, [r3]			@ Save control register values
+#ifdef CONFIG_KASAN
+	bl	kasan_early_init
+#endif
 	mov	lr, #0
 	b	start_kernel
 ENDPROC(__mmap_switched)
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 694aa6b4bd03..f4bd8b654255 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -59,6 +59,7 @@
 #include <asm/unwind.h>
 #include <asm/memblock.h>
 #include <asm/virt.h>
+#include <asm/kasan.h>
 
 #include "atags.h"
 
@@ -1145,6 +1146,7 @@ void __init setup_arch(char **cmdline_p)
 	early_ioremap_reset();
 
 	paging_init(mdesc);
+	kasan_init();
 	request_standard_resources(mdesc);
 
 	if (mdesc->restart)
diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile
index 99699c32d8a5..4536159bc8fa 100644
--- a/arch/arm/mm/Makefile
+++ b/arch/arm/mm/Makefile
@@ -113,3 +113,6 @@ obj-$(CONFIG_CACHE_L2X0_PMU)	+= cache-l2x0-pmu.o
 obj-$(CONFIG_CACHE_XSC3L2)	+= cache-xsc3l2.o
 obj-$(CONFIG_CACHE_TAUROS2)	+= cache-tauros2.o
 obj-$(CONFIG_CACHE_UNIPHIER)	+= cache-uniphier.o
+
+KASAN_SANITIZE_kasan_init.o	:= n
+obj-$(CONFIG_KASAN)		+= kasan_init.o
diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c
new file mode 100644
index 000000000000..9c348042a724
--- /dev/null
+++ b/arch/arm/mm/kasan_init.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This file contains kasan initialization code for ARM.
+ *
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ * Author: Linus Walleij <linus.walleij@linaro.org>
+ */
+
+#define pr_fmt(fmt) "kasan: " fmt
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <linux/start_kernel.h>
+#include <linux/pgtable.h>
+#include <asm/cputype.h>
+#include <asm/highmem.h>
+#include <asm/mach/map.h>
+#include <asm/memory.h>
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+#include <asm/procinfo.h>
+#include <asm/proc-fns.h>
+
+#include "mm.h"
+
+static pgd_t tmp_pgd_table[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE);
+
+pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss;
+
+static __init void *kasan_alloc_block(size_t size)
+{
+	return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
+				      MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE);
+}
+
+static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr,
+				      unsigned long end, bool early)
+{
+	unsigned long next;
+	pte_t *ptep = pte_offset_kernel(pmdp, addr);
+
+	do {
+		pte_t entry;
+		void *p;
+
+		next = addr + PAGE_SIZE;
+
+		if (!early) {
+			if (!pte_none(READ_ONCE(*ptep)))
+				continue;
+
+			p = kasan_alloc_block(PAGE_SIZE);
+			if (!p) {
+				panic("%s failed to allocate shadow page for address 0x%lx\n",
+				      __func__, addr);
+				return;
+			}
+			memset(p, KASAN_SHADOW_INIT, PAGE_SIZE);
+			entry = pfn_pte(virt_to_pfn(p),
+					__pgprot(pgprot_val(PAGE_KERNEL)));
+		} else if (pte_none(READ_ONCE(*ptep))) {
+			/*
+			 * The early shadow memory is mapping all KASan
+			 * operations to one and the same page in memory,
+			 * "kasan_early_shadow_page" so that the instrumentation
+			 * will work on a scratch area until we can set up the
+			 * proper KASan shadow memory.
+			 */
+			entry = pfn_pte(virt_to_pfn(kasan_early_shadow_page),
+					__pgprot(_L_PTE_DEFAULT | L_PTE_DIRTY | L_PTE_XN));
+		} else {
+			/*
+			 * Early shadow mappings are PMD_SIZE aligned, so if the
+			 * first entry is already set, they must all be set.
+			 */
+			return;
+		}
+
+		set_pte_at(&init_mm, addr, ptep, entry);
+	} while (ptep++, addr = next, addr != end);
+}
+
+/*
+ * The pmd (page middle directory) is only used on LPAE
+ */
+static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr,
+				      unsigned long end, bool early)
+{
+	unsigned long next;
+	pmd_t *pmdp = pmd_offset(pudp, addr);
+
+	do {
+		if (pmd_none(*pmdp)) {
+			/*
+			 * We attempt to allocate a shadow block for the PMDs
+			 * used by the PTEs for this address if it isn't already
+			 * allocated.
+			 */
+			void *p = early ? kasan_early_shadow_pte :
+				kasan_alloc_block(PAGE_SIZE);
+
+			if (!p) {
+				panic("%s failed to allocate shadow block for address 0x%lx\n",
+				      __func__, addr);
+				return;
+			}
+			pmd_populate_kernel(&init_mm, pmdp, p);
+			flush_pmd_entry(pmdp);
+		}
+
+		next = pmd_addr_end(addr, end);
+		kasan_pte_populate(pmdp, addr, next, early);
+	} while (pmdp++, addr = next, addr != end);
+}
+
+static void __init kasan_pgd_populate(unsigned long addr, unsigned long end,
+				      bool early)
+{
+	unsigned long next;
+	pgd_t *pgdp;
+	p4d_t *p4dp;
+	pud_t *pudp;
+
+	pgdp = pgd_offset_k(addr);
+
+	do {
+		/*
+		 * Allocate and populate the shadow block of p4d folded into
+		 * pud folded into pmd if it doesn't already exist
+		 */
+		if (!early && pgd_none(*pgdp)) {
+			void *p = kasan_alloc_block(PAGE_SIZE);
+
+			if (!p) {
+				panic("%s failed to allocate shadow block for address 0x%lx\n",
+				      __func__, addr);
+				return;
+			}
+			pgd_populate(&init_mm, pgdp, p);
+		}
+
+		next = pgd_addr_end(addr, end);
+		/*
+		 * We just immediately jump over the p4d and pud page
+		 * directories since we believe ARM32 will never gain four
+		 * nor five level page tables.
+		 */
+		p4dp = p4d_offset(pgdp, addr);
+		pudp = pud_offset(p4dp, addr);
+
+		kasan_pmd_populate(pudp, addr, next, early);
+	} while (pgdp++, addr = next, addr != end);
+}
+
+extern struct proc_info_list *lookup_processor_type(unsigned int);
+
+void __init kasan_early_init(void)
+{
+	struct proc_info_list *list;
+
+	/*
+	 * locate processor in the list of supported processor
+	 * types.  The linker builds this table for us from the
+	 * entries in arch/arm/mm/proc-*.S
+	 */
+	list = lookup_processor_type(read_cpuid_id());
+	if (list) {
+#ifdef MULTI_CPU
+		processor = *list->proc;
+#endif
+	}
+
+	BUILD_BUG_ON((KASAN_SHADOW_END - (1UL << 29)) != KASAN_SHADOW_OFFSET);
+	/*
+	 * We walk the page table and set all of the shadow memory to point
+	 * to the scratch page.
+	 */
+	kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+}
+
+static void __init clear_pgds(unsigned long start,
+			unsigned long end)
+{
+	for (; start && start < end; start += PMD_SIZE)
+		pmd_clear(pmd_off_k(start));
+}
+
+static int __init create_mapping(void *start, void *end)
+{
+	void *shadow_start, *shadow_end;
+
+	shadow_start = kasan_mem_to_shadow(start);
+	shadow_end = kasan_mem_to_shadow(end);
+
+	pr_info("Mapping kernel virtual memory block: %px-%px at shadow: %px-%px\n",
+		start, end, shadow_start, shadow_end);
+
+	kasan_pgd_populate((unsigned long)shadow_start & PAGE_MASK,
+			   PAGE_ALIGN((unsigned long)shadow_end), false);
+	return 0;
+}
+
+void __init kasan_init(void)
+{
+	phys_addr_t pa_start, pa_end;
+	u64 i;
+
+	/*
+	 * We are going to perform proper setup of shadow memory.
+	 *
+	 * At first we should unmap early shadow (clear_pgds() call bellow).
+	 * However, instrumented code can't execute without shadow memory.
+	 *
+	 * To keep the early shadow memory MMU tables around while setting up
+	 * the proper shadow memory, we copy swapper_pg_dir (the initial page
+	 * table) to tmp_pgd_table and use that to keep the early shadow memory
+	 * mapped until the full shadow setup is finished. Then we swap back
+	 * to the proper swapper_pg_dir.
+	 */
+
+	memcpy(tmp_pgd_table, swapper_pg_dir, sizeof(tmp_pgd_table));
+#ifdef CONFIG_ARM_LPAE
+	/* We need to be in the same PGD or this won't work */
+	BUILD_BUG_ON(pgd_index(KASAN_SHADOW_START) !=
+		     pgd_index(KASAN_SHADOW_END));
+	memcpy(tmp_pmd_table,
+	       pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)),
+	       sizeof(tmp_pmd_table));
+	set_pgd(&tmp_pgd_table[pgd_index(KASAN_SHADOW_START)],
+		__pgd(__pa(tmp_pmd_table) | PMD_TYPE_TABLE | L_PGD_SWAPPER));
+#endif
+	cpu_switch_mm(tmp_pgd_table, &init_mm);
+	local_flush_tlb_all();
+
+	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+	kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START),
+				    kasan_mem_to_shadow((void *)-1UL) + 1);
+
+	for_each_mem_range(i, &pa_start, &pa_end) {
+		void *start = __va(pa_start);
+		void *end = __va(pa_end);
+
+		/* Do not attempt to shadow highmem */
+		if (pa_start >= arm_lowmem_limit) {
+			pr_info("Skip highmem block at %pa-%pa\n", &pa_start, &pa_end);
+			continue;
+		}
+		if (pa_end > arm_lowmem_limit) {
+			pr_info("Truncating shadow for memory block at %pa-%pa to lowmem region at %pa\n",
+				&pa_start, &pa_end, &arm_lowmem_limit);
+			end = __va(arm_lowmem_limit);
+		}
+		if (start >= end) {
+			pr_info("Skipping invalid memory block %pa-%pa (virtual %p-%p)\n",
+				&pa_start, &pa_end, start, end);
+			continue;
+		}
+
+		create_mapping(start, end);
+	}
+
+	/*
+	 * 1. The module global variables are in MODULES_VADDR ~ MODULES_END,
+	 *    so we need to map this area.
+	 * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR
+	 *    ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't
+	 *    use kasan_populate_zero_shadow.
+	 */
+	create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE));
+
+	/*
+	 * KAsan may reuse the contents of kasan_early_shadow_pte directly, so
+	 * we should make sure that it maps the zero page read-only.
+	 */
+	for (i = 0; i < PTRS_PER_PTE; i++)
+		set_pte_at(&init_mm, KASAN_SHADOW_START + i*PAGE_SIZE,
+			   &kasan_early_shadow_pte[i],
+			   pfn_pte(virt_to_pfn(kasan_early_shadow_page),
+				__pgprot(pgprot_val(PAGE_KERNEL)
+					 | L_PTE_RDONLY)));
+
+	cpu_switch_mm(swapper_pg_dir, &init_mm);
+	local_flush_tlb_all();
+
+	memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+	pr_info("Kernel address sanitizer initialized\n");
+	init_task.kasan_depth = 0;
+}
diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c
index c5e1b27046a8..f8e9bc58a84f 100644
--- a/arch/arm/mm/pgd.c
+++ b/arch/arm/mm/pgd.c
@@ -66,7 +66,21 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	new_pmd = pmd_alloc(mm, new_pud, 0);
 	if (!new_pmd)
 		goto no_pmd;
-#endif
+#ifdef CONFIG_KASAN
+	/*
+	 * Copy PMD table for KASAN shadow mappings.
+	 */
+	init_pgd = pgd_offset_k(TASK_SIZE);
+	init_p4d = p4d_offset(init_pgd, TASK_SIZE);
+	init_pud = pud_offset(init_p4d, TASK_SIZE);
+	init_pmd = pmd_offset(init_pud, TASK_SIZE);
+	new_pmd = pmd_offset(new_pud, TASK_SIZE);
+	memcpy(new_pmd, init_pmd,
+	       (pmd_index(MODULES_VADDR) - pmd_index(TASK_SIZE))
+	       * sizeof(pmd_t));
+	clean_dcache_area(new_pmd, PTRS_PER_PMD * sizeof(pmd_t));
+#endif /* CONFIG_KASAN */
+#endif /* CONFIG_LPAE */
 
 	if (!vectors_high()) {
 		/*
-- 
cgit v1.2.3


From 421015713b306e47af95d4d61cdfbd96d462e4cb Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 25 Oct 2020 23:56:18 +0100
Subject: ARM: 9017/2: Enable KASan for ARM

This patch enables the kernel address sanitizer for ARM. XIP_KERNEL
has not been tested and is therefore not allowed for now.

Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: kasan-dev@googlegroups.com
Acked-by: Dmitry Vyukov <dvyukov@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Ard Biesheuvel <ardb@kernel.org> # QEMU/KVM/mach-virt/LPAE/8G
Tested-by: Florian Fainelli <f.fainelli@gmail.com> # Brahma SoCs
Tested-by: Ahmad Fatoum <a.fatoum@pengutronix.de> # i.MX6Q
Signed-off-by: Abbott Liu <liuwenliang@huawei.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 194032568b7a..2c5eb5c2b310 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -67,6 +67,7 @@ config ARM
 	select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU
 	select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU
+	select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL
 	select HAVE_ARCH_MMAP_RND_BITS if MMU
 	select HAVE_ARCH_SECCOMP
 	select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
-- 
cgit v1.2.3


From fc2933c133744305236793025b00c2f7d258b687 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Wed, 28 Oct 2020 14:20:55 +0100
Subject: ARM: 9020/1: mm: use correct section size macro to describe the FDT
 virtual address

Commit

  149a3ffe62b9dbc3 ("9012/1: move device tree mapping out of linear region")

created a permanent, read-only section mapping of the device tree blob
provided by the firmware, and added a set of macros to get the base and
size of the virtually mapped FDT based on the physical address. However,
while the mapping code uses the SECTION_SIZE macro correctly, the macros
use PMD_SIZE instead, which means something entirely different on ARM when
using short descriptors, and is therefore not the right quantity to use
here. So replace PMD_SIZE with SECTION_SIZE. While at it, change the names
of the macro and its parameter to clarify that it returns the virtual
address of the start of the FDT, based on the physical address in memory.

Tested-by: Joel Stanley <joel@jms.id.au>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/memory.h | 6 +++---
 arch/arm/kernel/setup.c       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 598dbdca2017..38a163f50130 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -73,8 +73,8 @@
 #define XIP_VIRT_ADDR(physaddr)  (MODULES_VADDR + ((physaddr) & 0x000fffff))
 
 #define FDT_FIXED_BASE		UL(0xff800000)
-#define FDT_FIXED_SIZE		(2 * PMD_SIZE)
-#define FDT_VIRT_ADDR(physaddr)	((void *)(FDT_FIXED_BASE | (physaddr) % PMD_SIZE))
+#define FDT_FIXED_SIZE		(2 * SECTION_SIZE)
+#define FDT_VIRT_BASE(physbase)	((void *)(FDT_FIXED_BASE | (physbase) % SECTION_SIZE))
 
 #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
 /*
@@ -116,7 +116,7 @@ extern unsigned long vectors_base;
 #define MODULES_VADDR		PAGE_OFFSET
 
 #define XIP_VIRT_ADDR(physaddr)  (physaddr)
-#define FDT_VIRT_ADDR(physaddr)  ((void *)(physaddr))
+#define FDT_VIRT_BASE(physbase)  ((void *)(physbase))
 
 #endif /* !CONFIG_MMU */
 
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f4bd8b654255..d32f7652a5bf 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1087,7 +1087,7 @@ void __init setup_arch(char **cmdline_p)
 	void *atags_vaddr = NULL;
 
 	if (__atags_pointer)
-		atags_vaddr = FDT_VIRT_ADDR(__atags_pointer);
+		atags_vaddr = FDT_VIRT_BASE(__atags_pointer);
 
 	setup_processor();
 	if (atags_vaddr) {
-- 
cgit v1.2.3


From 4e79f0211b473f8e1eab8211a9fd50cc41a3a061 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 21 Sep 2020 00:10:16 +0200
Subject: ARM: p2v: fix handling of LPAE translation in BE mode

When running in BE mode on LPAE hardware with a PA-to-VA translation
that exceeds 4 GB, we patch bits 39:32 of the offset into the wrong
byte of the opcode. So fix that, by rotating the offset in r0 to the
right by 8 bits, which will put the 8-bit immediate in bits 31:24.

Note that this will also move bit #22 in its correct place when
applying the rotation to the constant #0x400000.

Fixes: d9a790df8e984 ("ARM: 7883/1: fix mov to mvn conversion in case of 64 bit phys_addr_t and BE")
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/head.S | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f8904227e7fd..98c1e68bdfcb 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -671,12 +671,8 @@ ARM_BE8(rev16	ip, ip)
 	ldrcc	r7, [r4], #4	@ use branch for delay slot
 	bcc	1b
 	bx	lr
-#else
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	moveq	r0, #0x00004000	@ set bit 22, mov to mvn instruction
 #else
 	moveq	r0, #0x400000	@ set bit 22, mov to mvn instruction
-#endif
 	b	2f
 1:	ldr	ip, [r7, r3]
 #ifdef CONFIG_CPU_ENDIAN_BE8
@@ -685,7 +681,7 @@ ARM_BE8(rev16	ip, ip)
 	tst	ip, #0x000f0000	@ check the rotation field
 	orrne	ip, ip, r6, lsl #24 @ mask in offset bits 31-24
 	biceq	ip, ip, #0x00004000 @ clear bit 22
-	orreq	ip, ip, r0      @ mask in offset bits 7-0
+	orreq	ip, ip, r0, ror #8  @ mask in offset bits 7-0
 #else
 	bic	ip, ip, #0x000000ff
 	tst	ip, #0xf00	@ check the rotation field
-- 
cgit v1.2.3


From 0b1674638a5c69cbace63278625c199100955490 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:23:39 +0300
Subject: ARM: assembler: introduce adr_l, ldr_l and str_l macros

Like arm64, ARM supports position independent code sequences that
produce symbol references with a greater reach than the ordinary
adr/ldr instructions. Since on ARM, the adrl pseudo-instruction is
only supported in ARM mode (and not at all when using Clang), having
a adr_l macro like we do on arm64 is useful, and increases symmetry
as well.

Currently, we use open coded instruction sequences involving literals
and arithmetic operations. Instead, we can use movw/movt pairs on v7
CPUs, circumventing the D-cache entirely.

E.g., on v7+ CPUs, we can emit a PC-relative reference as follows:

       movw         <reg>, #:lower16:<sym> - (1f + 8)
       movt         <reg>, #:upper16:<sym> - (1f + 8)
  1:   add          <reg>, <reg>, pc

For older CPUs, we can emit the literal into a subsection, allowing it
to be emitted out of line while retaining the ability to perform
arithmetic on label offsets.

E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows:

       ldr          <reg>, 2f
  1:   add          <reg>, <reg>, pc
       .subsection  1
  2:   .long        <sym> - (1b + 8)
       .previous

This is allowed by the assembler because, unlike ordinary sections,
subsections are combined into a single section in the object file, and
so the label references are not true cross-section references that are
visible as relocations. (Subsections have been available in binutils
since 2004 at least, so they should not cause any issues with older
toolchains.)

So use the above to implement the macros mov_l, adr_l, ldr_l and str_l,
all of which will use movw/movt pairs on v7 and later CPUs, and use
PC-relative literals otherwise.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/assembler.h | 84 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index feac2c8b86f2..72627c5fb3b2 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -494,4 +494,88 @@ THUMB(	orr	\reg , \reg , #PSR_T_BIT	)
 #define _ASM_NOKPROBE(entry)
 #endif
 
+	.macro		__adldst_l, op, reg, sym, tmp, c
+	.if		__LINUX_ARM_ARCH__ < 7
+	ldr\c		\tmp, .La\@
+	.subsection	1
+	.align		2
+.La\@:	.long		\sym - .Lpc\@
+	.previous
+	.else
+	.ifnb		\c
+ THUMB(	ittt		\c			)
+	.endif
+	movw\c		\tmp, #:lower16:\sym - .Lpc\@
+	movt\c		\tmp, #:upper16:\sym - .Lpc\@
+	.endif
+
+#ifndef CONFIG_THUMB2_KERNEL
+	.set		.Lpc\@, . + 8			// PC bias
+	.ifc		\op, add
+	add\c		\reg, \tmp, pc
+	.else
+	\op\c		\reg, [pc, \tmp]
+	.endif
+#else
+.Lb\@:	add\c		\tmp, \tmp, pc
+	/*
+	 * In Thumb-2 builds, the PC bias depends on whether we are currently
+	 * emitting into a .arm or a .thumb section. The size of the add opcode
+	 * above will be 2 bytes when emitting in Thumb mode and 4 bytes when
+	 * emitting in ARM mode, so let's use this to account for the bias.
+	 */
+	.set		.Lpc\@, . + (. - .Lb\@)
+
+	.ifnc		\op, add
+	\op\c		\reg, [\tmp]
+	.endif
+#endif
+	.endm
+
+	/*
+	 * mov_l - move a constant value or [relocated] address into a register
+	 */
+	.macro		mov_l, dst:req, imm:req
+	.if		__LINUX_ARM_ARCH__ < 7
+	ldr		\dst, =\imm
+	.else
+	movw		\dst, #:lower16:\imm
+	movt		\dst, #:upper16:\imm
+	.endif
+	.endm
+
+	/*
+	 * adr_l - adr pseudo-op with unlimited range
+	 *
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 * @cond: conditional opcode suffix
+	 */
+	.macro		adr_l, dst:req, sym:req, cond
+	__adldst_l	add, \dst, \sym, \dst, \cond
+	.endm
+
+	/*
+	 * ldr_l - ldr <literal> pseudo-op with unlimited range
+	 *
+	 * @dst: destination register
+	 * @sym: name of the symbol
+	 * @cond: conditional opcode suffix
+	 */
+	.macro		ldr_l, dst:req, sym:req, cond
+	__adldst_l	ldr, \dst, \sym, \dst, \cond
+	.endm
+
+	/*
+	 * str_l - str <literal> pseudo-op with unlimited range
+	 *
+	 * @src: source register
+	 * @sym: name of the symbol
+	 * @tmp: mandatory scratch register
+	 * @cond: conditional opcode suffix
+	 */
+	.macro		str_l, src:req, sym:req, tmp:req, cond
+	__adldst_l	str, \src, \sym, \tmp, \cond
+	.endm
+
 #endif /* __ASM_ASSEMBLER_H__ */
-- 
cgit v1.2.3


From 22f2d23098f7d34fc5142531cffb241d14611684 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:24:37 +0300
Subject: ARM: module: add support for place relative relocations

When using the new adr_l/ldr_l/str_l macros to refer to external symbols
from modules, the linker may emit place relative ELF relocations that
need to be fixed up by the module loader. So add support for these.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/elf.h |  5 +++++
 arch/arm/kernel/module.c   | 20 ++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h
index b078d992414b..0ac62a54b73c 100644
--- a/arch/arm/include/asm/elf.h
+++ b/arch/arm/include/asm/elf.h
@@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t;
 #define R_ARM_NONE		0
 #define R_ARM_PC24		1
 #define R_ARM_ABS32		2
+#define R_ARM_REL32		3
 #define R_ARM_CALL		28
 #define R_ARM_JUMP24		29
 #define R_ARM_TARGET1		38
@@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t;
 #define R_ARM_PREL31		42
 #define R_ARM_MOVW_ABS_NC	43
 #define R_ARM_MOVT_ABS		44
+#define R_ARM_MOVW_PREL_NC	45
+#define R_ARM_MOVT_PREL		46
 
 #define R_ARM_THM_CALL		10
 #define R_ARM_THM_JUMP24	30
 #define R_ARM_THM_MOVW_ABS_NC	47
 #define R_ARM_THM_MOVT_ABS	48
+#define R_ARM_THM_MOVW_PREL_NC	49
+#define R_ARM_THM_MOVT_PREL	50
 
 /*
  * These are used to set parameters in the core dumps.
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c
index e15444b25ca0..beac45e89ba6 100644
--- a/arch/arm/kernel/module.c
+++ b/arch/arm/kernel/module.c
@@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			*(u32 *)loc |= offset & 0x7fffffff;
 			break;
 
+		case R_ARM_REL32:
+			*(u32 *)loc += sym->st_value - loc;
+			break;
+
 		case R_ARM_MOVW_ABS_NC:
 		case R_ARM_MOVT_ABS:
+		case R_ARM_MOVW_PREL_NC:
+		case R_ARM_MOVT_PREL:
 			offset = tmp = __mem_to_opcode_arm(*(u32 *)loc);
 			offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff);
 			offset = (offset ^ 0x8000) - 0x8000;
 
 			offset += sym->st_value;
-			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS)
+			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL ||
+			    ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC)
+				offset -= loc;
+			if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS ||
+			    ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL)
 				offset >>= 16;
 
 			tmp &= 0xfff0f000;
@@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 
 		case R_ARM_THM_MOVW_ABS_NC:
 		case R_ARM_THM_MOVT_ABS:
+		case R_ARM_THM_MOVW_PREL_NC:
+		case R_ARM_THM_MOVT_PREL:
 			upper = __mem_to_opcode_thumb16(*(u16 *)loc);
 			lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2));
 
@@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
 			offset = (offset ^ 0x8000) - 0x8000;
 			offset += sym->st_value;
 
-			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS)
+			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL ||
+			    ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC)
+				offset -= loc;
+			if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS ||
+			    ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL)
 				offset >>= 16;
 
 			upper = (u16)((upper & 0xfbf0) |
-- 
cgit v1.2.3


From eae78e1a97201a81a851342ad9659b60f61a3951 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 20 Sep 2020 17:51:55 +0200
Subject: ARM: p2v: move patching code to separate assembler source file

Move the phys2virt patching code into a separate .S file before doing
some work on it.

Suggested-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/Makefile    |   1 +
 arch/arm/kernel/head.S      | 138 ----------------------------------------
 arch/arm/kernel/phys2virt.S | 151 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 152 insertions(+), 138 deletions(-)
 create mode 100644 arch/arm/kernel/phys2virt.S

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 89e5d864e923..9e465efcc8b6 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_PARAVIRT)	+= paravirt.o
 head-y			:= head$(MMUEXT).o
 obj-$(CONFIG_DEBUG_LL)	+= debug.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
+obj-$(CONFIG_ARM_PATCH_PHYS_VIRT)	+= phys2virt.o
 
 # This is executed very early using a temporary stack when no memory allocator
 # nor global data is available. Everything has to be allocated on the stack.
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 98c1e68bdfcb..7e3f36809011 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -586,142 +586,4 @@ ENTRY(fixup_smp)
 	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(fixup_smp)
 
-#ifdef __ARMEB__
-#define LOW_OFFSET	0x4
-#define HIGH_OFFSET	0x0
-#else
-#define LOW_OFFSET	0x0
-#define HIGH_OFFSET	0x4
-#endif
-
-#ifdef CONFIG_ARM_PATCH_PHYS_VIRT
-
-/* __fixup_pv_table - patch the stub instructions with the delta between
- * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and
- * can be expressed by an immediate shifter operand. The stub instruction
- * has a form of '(add|sub) rd, rn, #imm'.
- */
-	__HEAD
-__fixup_pv_table:
-	adr	r0, 1f
-	ldmia	r0, {r3-r7}
-	mvn	ip, #0
-	subs	r3, r0, r3	@ PHYS_OFFSET - PAGE_OFFSET
-	add	r4, r4, r3	@ adjust table start address
-	add	r5, r5, r3	@ adjust table end address
-	add	r6, r6, r3	@ adjust __pv_phys_pfn_offset address
-	add	r7, r7, r3	@ adjust __pv_offset address
-	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
-	str	r0, [r6]	@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
-	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
-	mov	r6, r3, lsr #24	@ constant for add/sub instructions
-	teq	r3, r6, lsl #24 @ must be 16MiB aligned
-THUMB(	it	ne		@ cross section branch )
-	bne	__error
-	str	r3, [r7, #LOW_OFFSET]	@ save to __pv_offset low bits
-	b	__fixup_a_pv_table
-ENDPROC(__fixup_pv_table)
-
-	.align
-1:	.long	.
-	.long	__pv_table_begin
-	.long	__pv_table_end
-2:	.long	__pv_phys_pfn_offset
-	.long	__pv_offset
-
-	.text
-__fixup_a_pv_table:
-	adr	r0, 3f
-	ldr	r6, [r0]
-	add	r6, r6, r3
-	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
-	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
-	mov	r6, r6, lsr #24
-	cmn	r0, #1
-#ifdef CONFIG_THUMB2_KERNEL
-	moveq	r0, #0x200000	@ set bit 21, mov to mvn instruction
-	lsls	r6, #24
-	beq	2f
-	clz	r7, r6
-	lsr	r6, #24
-	lsl	r6, r7
-	bic	r6, #0x0080
-	lsrs	r7, #1
-	orrcs	r6, #0x0080
-	orr	r6, r6, r7, lsl #12
-	orr	r6, #0x4000
-	b	2f
-1:	add     r7, r3
-	ldrh	ip, [r7, #2]
-ARM_BE8(rev16	ip, ip)
-	tst	ip, #0x4000
-	and	ip, #0x8f00
-	orrne	ip, r6	@ mask in offset bits 31-24
-	orreq	ip, r0	@ mask in offset bits 7-0
-ARM_BE8(rev16	ip, ip)
-	strh	ip, [r7, #2]
-	bne	2f
-	ldrh	ip, [r7]
-ARM_BE8(rev16	ip, ip)
-	bic	ip, #0x20
-	orr	ip, ip, r0, lsr #16
-ARM_BE8(rev16	ip, ip)
-	strh	ip, [r7]
-2:	cmp	r4, r5
-	ldrcc	r7, [r4], #4	@ use branch for delay slot
-	bcc	1b
-	bx	lr
-#else
-	moveq	r0, #0x400000	@ set bit 22, mov to mvn instruction
-	b	2f
-1:	ldr	ip, [r7, r3]
-#ifdef CONFIG_CPU_ENDIAN_BE8
-	@ in BE8, we load data in BE, but instructions still in LE
-	bic	ip, ip, #0xff000000
-	tst	ip, #0x000f0000	@ check the rotation field
-	orrne	ip, ip, r6, lsl #24 @ mask in offset bits 31-24
-	biceq	ip, ip, #0x00004000 @ clear bit 22
-	orreq	ip, ip, r0, ror #8  @ mask in offset bits 7-0
-#else
-	bic	ip, ip, #0x000000ff
-	tst	ip, #0xf00	@ check the rotation field
-	orrne	ip, ip, r6	@ mask in offset bits 31-24
-	biceq	ip, ip, #0x400000	@ clear bit 22
-	orreq	ip, ip, r0	@ mask in offset bits 7-0
-#endif
-	str	ip, [r7, r3]
-2:	cmp	r4, r5
-	ldrcc	r7, [r4], #4	@ use branch for delay slot
-	bcc	1b
-	ret	lr
-#endif
-ENDPROC(__fixup_a_pv_table)
-
-	.align
-3:	.long __pv_offset
-
-ENTRY(fixup_pv_table)
-	stmfd	sp!, {r4 - r7, lr}
-	mov	r3, #0			@ no offset
-	mov	r4, r0			@ r0 = table start
-	add	r5, r0, r1		@ r1 = table size
-	bl	__fixup_a_pv_table
-	ldmfd	sp!, {r4 - r7, pc}
-ENDPROC(fixup_pv_table)
-
-	.data
-	.align	2
-	.globl	__pv_phys_pfn_offset
-	.type	__pv_phys_pfn_offset, %object
-__pv_phys_pfn_offset:
-	.word	0
-	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
-
-	.globl	__pv_offset
-	.type	__pv_offset, %object
-__pv_offset:
-	.quad	0
-	.size	__pv_offset, . -__pv_offset
-#endif
-
 #include "head-common.S"
diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
new file mode 100644
index 000000000000..7c17fbfeeedd
--- /dev/null
+++ b/arch/arm/kernel/phys2virt.S
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ *  Copyright (C) 1994-2002 Russell King
+ *  Copyright (c) 2003 ARM Limited
+ *  All Rights Reserved
+ */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+
+#ifdef __ARMEB__
+#define LOW_OFFSET	0x4
+#define HIGH_OFFSET	0x0
+#else
+#define LOW_OFFSET	0x0
+#define HIGH_OFFSET	0x4
+#endif
+
+/*
+ * __fixup_pv_table - patch the stub instructions with the delta between
+ *                    PHYS_OFFSET and PAGE_OFFSET, which is assumed to be
+ *                    16MiB aligned.
+ *
+ * Called from head.S, which expects the following registers to be preserved:
+ *   r1 = machine no, r2 = atags or dtb,
+ *   r8 = phys_offset, r9 = cpuid, r10 = procinfo
+ */
+	__HEAD
+ENTRY(__fixup_pv_table)
+	adr	r0, 1f
+	ldmia	r0, {r3-r7}
+	mvn	ip, #0
+	subs	r3, r0, r3		@ PHYS_OFFSET - PAGE_OFFSET
+	add	r4, r4, r3		@ adjust table start address
+	add	r5, r5, r3		@ adjust table end address
+	add	r6, r6, r3		@ adjust __pv_phys_pfn_offset address
+	add	r7, r7, r3		@ adjust __pv_offset address
+	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
+	str	r0, [r6]		@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
+	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
+	mov	r6, r3, lsr #24		@ constant for add/sub instructions
+	teq	r3, r6, lsl #24 	@ must be 16MiB aligned
+	bne	0f
+	str	r3, [r7, #LOW_OFFSET]	@ save to __pv_offset low bits
+	b	__fixup_a_pv_table
+0:	mov	r0, r0			@ deadloop on error
+	b	0b
+ENDPROC(__fixup_pv_table)
+
+	.align
+1:	.long	.
+	.long	__pv_table_begin
+	.long	__pv_table_end
+2:	.long	__pv_phys_pfn_offset
+	.long	__pv_offset
+
+	.text
+__fixup_a_pv_table:
+	adr	r0, 3f
+	ldr	r6, [r0]
+	add	r6, r6, r3
+	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
+	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
+	mov	r6, r6, lsr #24
+	cmn	r0, #1
+#ifdef CONFIG_THUMB2_KERNEL
+	moveq	r0, #0x200000		@ set bit 21, mov to mvn instruction
+	lsls	r6, #24
+	beq	2f
+	clz	r7, r6
+	lsr	r6, #24
+	lsl	r6, r7
+	bic	r6, #0x0080
+	lsrs	r7, #1
+	orrcs	r6, #0x0080
+	orr	r6, r6, r7, lsl #12
+	orr	r6, #0x4000
+	b	2f
+1:	add	r7, r3
+	ldrh	ip, [r7, #2]
+ARM_BE8(rev16	ip, ip)
+	tst	ip, #0x4000
+	and	ip, #0x8f00
+	orrne	ip, r6			@ mask in offset bits 31-24
+	orreq	ip, r0			@ mask in offset bits 7-0
+ARM_BE8(rev16	ip, ip)
+	strh	ip, [r7, #2]
+	bne	2f
+	ldrh	ip, [r7]
+ARM_BE8(rev16	ip, ip)
+	bic	ip, #0x20
+	orr	ip, ip, r0, lsr #16
+ARM_BE8(rev16	ip, ip)
+	strh	ip, [r7]
+2:	cmp	r4, r5
+	ldrcc	r7, [r4], #4		@ use branch for delay slot
+	bcc	1b
+	bx	lr
+#else
+	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
+	b	2f
+1:	ldr	ip, [r7, r3]
+#ifdef CONFIG_CPU_ENDIAN_BE8
+	@ in BE8, we load data in BE, but instructions still in LE
+	bic	ip, ip, #0xff000000
+	tst	ip, #0x000f0000		@ check the rotation field
+	orrne	ip, ip, r6, lsl #24	@ mask in offset bits 31-24
+	biceq	ip, ip, #0x00004000	@ clear bit 22
+	orreq	ip, ip, r0, ror #8	@ mask in offset bits 7-0
+#else
+	bic	ip, ip, #0x000000ff
+	tst	ip, #0xf00		@ check the rotation field
+	orrne	ip, ip, r6		@ mask in offset bits 31-24
+	biceq	ip, ip, #0x400000	@ clear bit 22
+	orreq	ip, ip, r0		@ mask in offset bits 7-0
+#endif
+	str	ip, [r7, r3]
+2:	cmp	r4, r5
+	ldrcc	r7, [r4], #4		@ use branch for delay slot
+	bcc	1b
+	ret	lr
+#endif
+ENDPROC(__fixup_a_pv_table)
+
+	.align
+3:	.long __pv_offset
+
+ENTRY(fixup_pv_table)
+	stmfd	sp!, {r4 - r7, lr}
+	mov	r3, #0			@ no offset
+	mov	r4, r0			@ r0 = table start
+	add	r5, r0, r1		@ r1 = table size
+	bl	__fixup_a_pv_table
+	ldmfd	sp!, {r4 - r7, pc}
+ENDPROC(fixup_pv_table)
+
+	.data
+	.align	2
+	.globl	__pv_phys_pfn_offset
+	.type	__pv_phys_pfn_offset, %object
+__pv_phys_pfn_offset:
+	.word	0
+	.size	__pv_phys_pfn_offset, . -__pv_phys_pfn_offset
+
+	.globl	__pv_offset
+	.type	__pv_offset, %object
+__pv_offset:
+	.quad	0
+	.size	__pv_offset, . -__pv_offset
-- 
cgit v1.2.3


From 4b16421c3e955f440eb45546db6ce33d47f29c78 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 17 Sep 2020 21:29:36 +0300
Subject: ARM: p2v: factor out shared loop processing

The ARM and Thumb2 versions of the p2v patching loop have some overlap
at the end of the loop, so factor that out. As numeric labels are not
required to be unique, and may therefore be ambiguous, use named local
labels for the start and end of the loop instead.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/phys2virt.S | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
index 7c17fbfeeedd..8fb1f7bcc720 100644
--- a/arch/arm/kernel/phys2virt.S
+++ b/arch/arm/kernel/phys2virt.S
@@ -68,7 +68,7 @@ __fixup_a_pv_table:
 #ifdef CONFIG_THUMB2_KERNEL
 	moveq	r0, #0x200000		@ set bit 21, mov to mvn instruction
 	lsls	r6, #24
-	beq	2f
+	beq	.Lnext
 	clz	r7, r6
 	lsr	r6, #24
 	lsl	r6, r7
@@ -77,8 +77,8 @@ __fixup_a_pv_table:
 	orrcs	r6, #0x0080
 	orr	r6, r6, r7, lsl #12
 	orr	r6, #0x4000
-	b	2f
-1:	add	r7, r3
+	b	.Lnext
+.Lloop:	add	r7, r3
 	ldrh	ip, [r7, #2]
 ARM_BE8(rev16	ip, ip)
 	tst	ip, #0x4000
@@ -87,21 +87,17 @@ ARM_BE8(rev16	ip, ip)
 	orreq	ip, r0			@ mask in offset bits 7-0
 ARM_BE8(rev16	ip, ip)
 	strh	ip, [r7, #2]
-	bne	2f
+	bne	.Lnext
 	ldrh	ip, [r7]
 ARM_BE8(rev16	ip, ip)
 	bic	ip, #0x20
 	orr	ip, ip, r0, lsr #16
 ARM_BE8(rev16	ip, ip)
 	strh	ip, [r7]
-2:	cmp	r4, r5
-	ldrcc	r7, [r4], #4		@ use branch for delay slot
-	bcc	1b
-	bx	lr
 #else
 	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
-	b	2f
-1:	ldr	ip, [r7, r3]
+	b	.Lnext
+.Lloop:	ldr	ip, [r7, r3]
 #ifdef CONFIG_CPU_ENDIAN_BE8
 	@ in BE8, we load data in BE, but instructions still in LE
 	bic	ip, ip, #0xff000000
@@ -117,11 +113,13 @@ ARM_BE8(rev16	ip, ip)
 	orreq	ip, ip, r0		@ mask in offset bits 7-0
 #endif
 	str	ip, [r7, r3]
-2:	cmp	r4, r5
+#endif
+
+.Lnext:
+	cmp	r4, r5
 	ldrcc	r7, [r4], #4		@ use branch for delay slot
-	bcc	1b
+	bcc	.Lloop
 	ret	lr
-#endif
 ENDPROC(__fixup_a_pv_table)
 
 	.align
-- 
cgit v1.2.3


From 7a94849e81b5c10e71f0a555300313c2789d9b0d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 17 Sep 2020 21:36:46 +0300
Subject: ARM: p2v: factor out BE8 handling

The big and little endian versions of the ARM p2v patching routine only
differ in the values of the constants, so factor those out into macros
so that we only have one version of the logic sequence to maintain.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/phys2virt.S | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
index 8fb1f7bcc720..5031e5a2e78b 100644
--- a/arch/arm/kernel/phys2virt.S
+++ b/arch/arm/kernel/phys2virt.S
@@ -95,23 +95,25 @@ ARM_BE8(rev16	ip, ip)
 ARM_BE8(rev16	ip, ip)
 	strh	ip, [r7]
 #else
-	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
-	b	.Lnext
-.Lloop:	ldr	ip, [r7, r3]
 #ifdef CONFIG_CPU_ENDIAN_BE8
-	@ in BE8, we load data in BE, but instructions still in LE
-	bic	ip, ip, #0xff000000
-	tst	ip, #0x000f0000		@ check the rotation field
-	orrne	ip, ip, r6, lsl #24	@ mask in offset bits 31-24
-	biceq	ip, ip, #0x00004000	@ clear bit 22
-	orreq	ip, ip, r0, ror #8	@ mask in offset bits 7-0
+@ in BE8, we load data in BE, but instructions still in LE
+#define PV_BIT22	0x00004000
+#define PV_IMM8_MASK	0xff000000
+#define PV_ROT_MASK	0x000f0000
 #else
-	bic	ip, ip, #0x000000ff
-	tst	ip, #0xf00		@ check the rotation field
-	orrne	ip, ip, r6		@ mask in offset bits 31-24
-	biceq	ip, ip, #0x400000	@ clear bit 22
-	orreq	ip, ip, r0		@ mask in offset bits 7-0
+#define PV_BIT22	0x00400000
+#define PV_IMM8_MASK	0x000000ff
+#define PV_ROT_MASK	0xf00
 #endif
+
+	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
+	b	.Lnext
+.Lloop:	ldr	ip, [r7, r3]
+	bic	ip, ip, #PV_IMM8_MASK
+	tst	ip, #PV_ROT_MASK		@ check the rotation field
+	orrne	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
+	biceq	ip, ip, #PV_BIT22		@ clear bit 22
+	orreq	ip, ip, r0 ARM_BE8(, ror #8)	@ mask in offset bits 7-0 (or bit 22)
 	str	ip, [r7, r3]
 #endif
 
-- 
cgit v1.2.3


From 0869f3b9da38889faef2ccafcf675c713d4a3aa8 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 18 Sep 2020 10:00:24 +0300
Subject: ARM: p2v: drop redundant 'type' argument from __pv_stub

We always pass the same value for 'type' so pull it into the __pv_stub
macro itself.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/memory.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 99035b5891ef..eb3c8e6e960a 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -183,14 +183,14 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
 #define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
 
-#define __pv_stub(from,to,instr,type)			\
+#define __pv_stub(from,to,instr)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
 	"	.pushsection .pv_table,\"a\"\n"		\
 	"	.long	1b\n"				\
 	"	.popsection\n"				\
 	: "=r" (to)					\
-	: "r" (from), "I" (type))
+	: "r" (from), "I" (__PV_BITS_31_24))
 
 #define __pv_stub_mov_hi(t)				\
 	__asm__ volatile("@ __pv_stub_mov\n"		\
@@ -217,7 +217,7 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 	phys_addr_t t;
 
 	if (sizeof(phys_addr_t) == 4) {
-		__pv_stub(x, t, "add", __PV_BITS_31_24);
+		__pv_stub(x, t, "add");
 	} else {
 		__pv_stub_mov_hi(t);
 		__pv_add_carry_stub(x, t);
@@ -235,7 +235,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x)
 	 * assembler expression receives 32 bit argument
 	 * in place where 'r' 32 bit operand is expected.
 	 */
-	__pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24);
+	__pv_stub((unsigned long) x, t, "sub");
 	return t;
 }
 
-- 
cgit v1.2.3


From 2730e8eaa4f2baccc03296e0c5ee109c0673fe5f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 20 Sep 2020 18:23:35 +0200
Subject: ARM: p2v: use relative references in patch site arrays

Free up a register in the p2v patching code by switching to relative
references, which don't require keeping the phys-to-virt displacement
live in a register.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/memory.h |  6 +++---
 arch/arm/kernel/phys2virt.S   | 18 +++++++-----------
 2 files changed, 10 insertions(+), 14 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index eb3c8e6e960a..4121662dea5a 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -187,7 +187,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b\n"				\
+	"	.long	1b - .\n"			\
 	"	.popsection\n"				\
 	: "=r" (to)					\
 	: "r" (from), "I" (__PV_BITS_31_24))
@@ -196,7 +196,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	__asm__ volatile("@ __pv_stub_mov\n"		\
 	"1:	mov	%R0, %1\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b\n"				\
+	"	.long	1b - .\n"			\
 	"	.popsection\n"				\
 	: "=r" (t)					\
 	: "I" (__PV_BITS_7_0))
@@ -206,7 +206,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	"1:	adds	%Q0, %1, %2\n"			\
 	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b\n"				\
+	"	.long	1b - .\n"			\
 	"	.popsection\n"				\
 	: "+r" (y)					\
 	: "r" (x), "I" (__PV_BITS_31_24)		\
diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
index 5031e5a2e78b..8e4be15e1559 100644
--- a/arch/arm/kernel/phys2virt.S
+++ b/arch/arm/kernel/phys2virt.S
@@ -58,9 +58,7 @@ ENDPROC(__fixup_pv_table)
 
 	.text
 __fixup_a_pv_table:
-	adr	r0, 3f
-	ldr	r6, [r0]
-	add	r6, r6, r3
+	adr_l	r6, __pv_offset
 	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
 	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
 	mov	r6, r6, lsr #24
@@ -78,7 +76,8 @@ __fixup_a_pv_table:
 	orr	r6, r6, r7, lsl #12
 	orr	r6, #0x4000
 	b	.Lnext
-.Lloop:	add	r7, r3
+.Lloop:	add	r7, r4
+	adds	r4, #4
 	ldrh	ip, [r7, #2]
 ARM_BE8(rev16	ip, ip)
 	tst	ip, #0x4000
@@ -108,28 +107,25 @@ ARM_BE8(rev16	ip, ip)
 
 	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
 	b	.Lnext
-.Lloop:	ldr	ip, [r7, r3]
+.Lloop:	ldr	ip, [r7, r4]
 	bic	ip, ip, #PV_IMM8_MASK
 	tst	ip, #PV_ROT_MASK		@ check the rotation field
 	orrne	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
 	biceq	ip, ip, #PV_BIT22		@ clear bit 22
 	orreq	ip, ip, r0 ARM_BE8(, ror #8)	@ mask in offset bits 7-0 (or bit 22)
-	str	ip, [r7, r3]
+	str	ip, [r7, r4]
+	add	r4, r4, #4
 #endif
 
 .Lnext:
 	cmp	r4, r5
-	ldrcc	r7, [r4], #4		@ use branch for delay slot
+	ldrcc	r7, [r4]		@ use branch for delay slot
 	bcc	.Lloop
 	ret	lr
 ENDPROC(__fixup_a_pv_table)
 
-	.align
-3:	.long __pv_offset
-
 ENTRY(fixup_pv_table)
 	stmfd	sp!, {r4 - r7, lr}
-	mov	r3, #0			@ no offset
 	mov	r4, r0			@ r0 = table start
 	add	r5, r0, r1		@ r1 = table size
 	bl	__fixup_a_pv_table
-- 
cgit v1.2.3


From 0e3db6c9d7f6fd0ee263325027e8d3fdac5a4c9e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 20 Sep 2020 19:19:32 +0200
Subject: ARM: p2v: simplify __fixup_pv_table()

Declutter the code in __fixup_pv_table() by using the new adr_l/str_l
macros to take PC relative references to external symbols, and by
using the value of PHYS_OFFSET passed in r8 to calculate the p2v
offset.

Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/phys2virt.S | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
index 8e4be15e1559..be8fb0d89877 100644
--- a/arch/arm/kernel/phys2virt.S
+++ b/arch/arm/kernel/phys2virt.S
@@ -29,33 +29,27 @@
  */
 	__HEAD
 ENTRY(__fixup_pv_table)
-	adr	r0, 1f
-	ldmia	r0, {r3-r7}
-	mvn	ip, #0
-	subs	r3, r0, r3		@ PHYS_OFFSET - PAGE_OFFSET
-	add	r4, r4, r3		@ adjust table start address
-	add	r5, r5, r3		@ adjust table end address
-	add	r6, r6, r3		@ adjust __pv_phys_pfn_offset address
-	add	r7, r7, r3		@ adjust __pv_offset address
 	mov	r0, r8, lsr #PAGE_SHIFT	@ convert to PFN
-	str	r0, [r6]		@ save computed PHYS_OFFSET to __pv_phys_pfn_offset
-	strcc	ip, [r7, #HIGH_OFFSET]	@ save to __pv_offset high bits
-	mov	r6, r3, lsr #24		@ constant for add/sub instructions
-	teq	r3, r6, lsl #24 	@ must be 16MiB aligned
+	str_l	r0, __pv_phys_pfn_offset, r3
+
+	adr_l	r0, __pv_offset
+	subs	r3, r8, #PAGE_OFFSET	@ PHYS_OFFSET - PAGE_OFFSET
+	mvn	ip, #0
+	strcc	ip, [r0, #HIGH_OFFSET]	@ save to __pv_offset high bits
+	str	r3, [r0, #LOW_OFFSET]	@ save to __pv_offset low bits
+
+	mov	r0, r3, lsr #24		@ constant for add/sub instructions
+	teq	r3, r0, lsl #24 	@ must be 16MiB aligned
 	bne	0f
-	str	r3, [r7, #LOW_OFFSET]	@ save to __pv_offset low bits
+
+	adr_l	r4, __pv_table_begin
+	adr_l	r5, __pv_table_end
 	b	__fixup_a_pv_table
+
 0:	mov	r0, r0			@ deadloop on error
 	b	0b
 ENDPROC(__fixup_pv_table)
 
-	.align
-1:	.long	.
-	.long	__pv_table_begin
-	.long	__pv_table_end
-2:	.long	__pv_phys_pfn_offset
-	.long	__pv_offset
-
 	.text
 __fixup_a_pv_table:
 	adr_l	r6, __pv_offset
-- 
cgit v1.2.3


From e8e00f5afb087912fb3edb225ee373aa6499bb79 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sun, 20 Sep 2020 23:02:25 +0200
Subject: ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE

In preparation for reducing the phys-to-virt minimum relative alignment
from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW
instructions that can more easily be manipulated to carry a 12-bit
immediate. Note that the non-LPAE ARM sequence is not updated: MOVW
may not be supported on non-LPAE platforms, and the sequence itself
can be updated more easily to apply the 12 bits of displacement.

For Thumb2, which has many more versions of opcodes, switch to a sequence
that can be patched by the same patching code for both versions. Note
that the Thumb2 opcodes for MOVW and MVN are unambiguous, and have no
rotation bits in their immediate fields, so there is no need to use
placeholder constants in the asm blocks.

While at it, drop the 'volatile' qualifiers from the asm blocks: the
code does not have any side effects that are invisible to the compiler,
so it is free to omit these sequences if the outputs are not used.

Suggested-by: Russell King <linux@armlinux.org.uk>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/memory.h |  44 +++++++++----
 arch/arm/kernel/phys2virt.S   | 147 +++++++++++++++++++++++++++++++++---------
 2 files changed, 148 insertions(+), 43 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 4121662dea5a..ccf55cef6ab9 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
 #define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)
 
+#ifndef CONFIG_THUMB2_KERNEL
 #define __pv_stub(from,to,instr)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
@@ -192,25 +193,45 @@ extern const void *__pv_table_begin, *__pv_table_end;
 	: "=r" (to)					\
 	: "r" (from), "I" (__PV_BITS_31_24))
 
-#define __pv_stub_mov_hi(t)				\
-	__asm__ volatile("@ __pv_stub_mov\n"		\
-	"1:	mov	%R0, %1\n"			\
+#define __pv_add_carry_stub(x, y)			\
+	__asm__("@ __pv_add_carry_stub\n"		\
+	"0:	movw	%R0, #0\n"			\
+	"	adds	%Q0, %1, %R0, lsl #24\n"	\
+	"1:	mov	%R0, %2\n"			\
+	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b - .\n"			\
+	"	.long	0b - ., 1b - .\n"		\
 	"	.popsection\n"				\
-	: "=r" (t)					\
-	: "I" (__PV_BITS_7_0))
+	: "=&r" (y)					\
+	: "r" (x), "I" (__PV_BITS_7_0)			\
+	: "cc")
+
+#else
+#define __pv_stub(from,to,instr)			\
+	__asm__("@ __pv_stub\n"				\
+	"0:	movw	%0, #0\n"			\
+	"	lsl	%0, #24\n"			\
+	"	" instr " %0, %1, %0\n"			\
+	"	.pushsection .pv_table,\"a\"\n"		\
+	"	.long	0b - .\n"			\
+	"	.popsection\n"				\
+	: "=&r" (to)					\
+	: "r" (from))
 
 #define __pv_add_carry_stub(x, y)			\
-	__asm__ volatile("@ __pv_add_carry_stub\n"	\
-	"1:	adds	%Q0, %1, %2\n"			\
+	__asm__("@ __pv_add_carry_stub\n"		\
+	"0:	movw	%R0, #0\n"			\
+	"	lsls	%R0, #24\n"			\
+	"	adds	%Q0, %1, %R0\n"			\
+	"1:	mvn	%R0, #0\n"			\
 	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b - .\n"			\
+	"	.long	0b - ., 1b - .\n"		\
 	"	.popsection\n"				\
-	: "+r" (y)					\
-	: "r" (x), "I" (__PV_BITS_31_24)		\
+	: "=&r" (y)					\
+	: "r" (x)					\
 	: "cc")
+#endif
 
 static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 {
@@ -219,7 +240,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
 	if (sizeof(phys_addr_t) == 4) {
 		__pv_stub(x, t, "add");
 	} else {
-		__pv_stub_mov_hi(t);
 		__pv_add_carry_stub(x, t);
 	}
 	return t;
diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
index be8fb0d89877..a4e364689663 100644
--- a/arch/arm/kernel/phys2virt.S
+++ b/arch/arm/kernel/phys2virt.S
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
  *  Copyright (C) 1994-2002 Russell King
- *  Copyright (c) 2003 ARM Limited
+ *  Copyright (c) 2003, 2020 ARM Limited
  *  All Rights Reserved
  */
 
@@ -58,55 +58,140 @@ __fixup_a_pv_table:
 	mov	r6, r6, lsr #24
 	cmn	r0, #1
 #ifdef CONFIG_THUMB2_KERNEL
+	@
+	@ The Thumb-2 versions of the patchable sequences are
+	@
+	@ phys-to-virt:			movw	<reg>, #offset<31:24>
+	@				lsl	<reg>, #24
+	@				sub	<VA>, <PA>, <reg>
+	@
+	@ virt-to-phys (non-LPAE):	movw	<reg>, #offset<31:24>
+	@				lsl	<reg>, #24
+	@				add	<PA>, <VA>, <reg>
+	@
+	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:24>
+	@				lsl	<reg>, #24
+	@				adds	<PAlo>, <VA>, <reg>
+	@				mov	<PAhi>, #offset<39:32>
+	@				adc	<PAhi>, <PAhi>, #0
+	@
+	@ In the non-LPAE case, all patchable instructions are MOVW
+	@ instructions, where we need to patch in the offset into the
+	@ second halfword of the opcode (the 16-bit immediate is encoded
+	@ as imm4:i:imm3:imm8)
+	@
+	@       15       11 10  9           4 3    0  15  14  12 11 8 7    0
+	@      +-----------+---+-------------+------++---+------+----+------+
+	@ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
+	@      +-----------+---+-------------+------++---+------+----+------+
+	@
+	@ In the LPAE case, we also need to patch in the high word of the
+	@ offset into the immediate field of the MOV instruction, or patch it
+	@ to a MVN instruction if the offset is negative. In this case, we
+	@ need to inspect the first halfword of the opcode, to check whether
+	@ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
+	@ needed. The encoding of the immediate is rather complex for values
+	@ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
+	@ order bits, which can be patched into imm8 directly (and i:imm3
+	@ cleared)
+	@
+	@      15       11 10  9        5         0  15  14  12 11 8 7    0
+	@     +-----------+---+---------------------++---+------+----+------+
+	@ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+	@ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
+	@     +-----------+---+---------------------++---+------+----+------+
+	@
 	moveq	r0, #0x200000		@ set bit 21, mov to mvn instruction
-	lsls	r6, #24
-	beq	.Lnext
-	clz	r7, r6
-	lsr	r6, #24
-	lsl	r6, r7
-	bic	r6, #0x0080
-	lsrs	r7, #1
-	orrcs	r6, #0x0080
-	orr	r6, r6, r7, lsl #12
-	orr	r6, #0x4000
 	b	.Lnext
 .Lloop:	add	r7, r4
-	adds	r4, #4
-	ldrh	ip, [r7, #2]
-ARM_BE8(rev16	ip, ip)
-	tst	ip, #0x4000
-	and	ip, #0x8f00
-	orrne	ip, r6			@ mask in offset bits 31-24
-	orreq	ip, r0			@ mask in offset bits 7-0
-ARM_BE8(rev16	ip, ip)
-	strh	ip, [r7, #2]
-	bne	.Lnext
+	adds	r4, #4			@ clears Z flag
+#ifdef CONFIG_ARM_LPAE
 	ldrh	ip, [r7]
 ARM_BE8(rev16	ip, ip)
-	bic	ip, #0x20
-	orr	ip, ip, r0, lsr #16
+	tst	ip, #0x200		@ MOVW has bit 9 set, MVN has it clear
+	bne	0f			@ skip to MOVW handling (Z flag is clear)
+	bic	ip, #0x20		@ clear bit 5 (MVN -> MOV)
+	orr	ip, ip, r0, lsr #16	@ MOV -> MVN if offset < 0
 ARM_BE8(rev16	ip, ip)
 	strh	ip, [r7]
+	@ Z flag is set
+0:
+#endif
+	ldrh	ip, [r7, #2]
+ARM_BE8(rev16	ip, ip)
+	and	ip, #0xf00		@ clear everything except Rd field
+	orreq	ip, r0			@ Z flag set -> MOV/MVN -> patch in high bits
+	orrne	ip, r6			@ Z flag clear -> MOVW -> patch in low bits
+ARM_BE8(rev16	ip, ip)
+	strh	ip, [r7, #2]
 #else
 #ifdef CONFIG_CPU_ENDIAN_BE8
 @ in BE8, we load data in BE, but instructions still in LE
-#define PV_BIT22	0x00004000
+#define PV_BIT24	0x00000001
 #define PV_IMM8_MASK	0xff000000
-#define PV_ROT_MASK	0x000f0000
 #else
-#define PV_BIT22	0x00400000
+#define PV_BIT24	0x01000000
 #define PV_IMM8_MASK	0x000000ff
-#define PV_ROT_MASK	0xf00
 #endif
 
+	@
+	@ The ARM versions of the patchable sequences are
+	@
+	@ phys-to-virt:			sub	<VA>, <PA>, #offset<31:24>, lsl #24
+	@
+	@ virt-to-phys (non-LPAE):	add	<PA>, <VA>, #offset<31:24>, lsl #24
+	@
+	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:24>
+	@				adds	<PAlo>, <VA>, <reg>, lsl #24
+	@				mov	<PAhi>, #offset<39:32>
+	@				adc	<PAhi>, <PAhi>, #0
+	@
+	@ In the non-LPAE case, all patchable instructions are ADD or SUB
+	@ instructions, where we need to patch in the offset into the
+	@ immediate field of the opcode, which is emitted with the correct
+	@ rotation value. (The effective value of the immediate is imm12<7:0>
+	@ rotated right by [2 * imm12<11:8>] bits)
+	@
+	@      31   28 27      23 22  20 19  16 15  12 11    0
+	@      +------+-----------------+------+------+-------+
+	@  ADD | cond | 0 0 1 0 1 0 0 0 |  Rn  |  Rd  | imm12 |
+	@  SUB | cond | 0 0 1 0 0 1 0 0 |  Rn  |  Rd  | imm12 |
+	@  MOV | cond | 0 0 1 1 1 0 1 0 |  Rn  |  Rd  | imm12 |
+	@  MVN | cond | 0 0 1 1 1 1 1 0 |  Rn  |  Rd  | imm12 |
+	@      +------+-----------------+------+------+-------+
+	@
+	@ In the LPAE case, we use a MOVW instruction to carry the low offset
+	@ word, and patch in the high word of the offset into the immediate
+	@ field of the subsequent MOV instruction, or patch it to a MVN
+	@ instruction if the offset is negative. We can distinguish MOVW
+	@ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
+	@ distinguished from MOV/MVN (all using the encodings above) using
+	@ bit 24.
+	@
+	@      31   28 27      23 22  20 19  16 15  12 11    0
+	@      +------+-----------------+------+------+-------+
+	@ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 |  Rd  | imm12 |
+	@      +------+-----------------+------+------+-------+
+	@
 	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
 	b	.Lnext
 .Lloop:	ldr	ip, [r7, r4]
+#ifdef CONFIG_ARM_LPAE
+	tst	ip, #PV_BIT24		@ ADD/SUB have bit 24 clear
+	beq	1f
+ARM_BE8(rev	ip, ip)
+	tst	ip, #0xc00000		@ MOVW has bits 23:22 clear
+	bic	ip, ip, #0x400000	@ clear bit 22
+	bfc	ip, #0, #12		@ clear imm12 field of MOV[W] instruction
+	orreq	ip, ip, r6		@ MOVW -> mask in offset bits 31-24
+	orrne	ip, ip, r0		@ MOV  -> mask in offset bits 7-0 (or bit 22)
+ARM_BE8(rev	ip, ip)
+	b	2f
+1:
+#endif
 	bic	ip, ip, #PV_IMM8_MASK
-	tst	ip, #PV_ROT_MASK		@ check the rotation field
-	orrne	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
-	biceq	ip, ip, #PV_BIT22		@ clear bit 22
-	orreq	ip, ip, r0 ARM_BE8(, ror #8)	@ mask in offset bits 7-0 (or bit 22)
+	orr	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
+2:
 	str	ip, [r7, r4]
 	add	r4, r4, #4
 #endif
-- 
cgit v1.2.3


From 9443076e4330a14ae2c6114307668b98a8293b77 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 18 Sep 2020 11:55:42 +0300
Subject: ARM: p2v: reduce p2v alignment requirement to 2 MiB

The ARM kernel's linear map starts at PAGE_OFFSET, which maps to a
physical address (PHYS_OFFSET) that is platform specific, and is
discovered at boot. Since we don't want to slow down translations
between physical and virtual addresses by keeping the offset in a
variable in memory, we implement this by patching the code performing
the translation, and putting the offset between PAGE_OFFSET and the
start of physical RAM directly into the instruction opcodes.

As we only patch up to 8 bits of offset, yielding 4 GiB >> 8 == 16 MiB
of granularity, we have to round up PHYS_OFFSET to the next multiple if
the start of physical RAM is not a multiple of 16 MiB. This wastes some
physical RAM, since the memory that was skipped will now live below
PAGE_OFFSET, making it inaccessible to the kernel.

We can improve this by changing the patchable sequences and the patching
logic to carry more bits of offset: 11 bits gives us 4 GiB >> 11 == 2 MiB
of granularity, and so we will never waste more than that amount by
rounding up the physical start of DRAM to the next multiple of 2 MiB.
(Note that 2 MiB granularity guarantees that the linear mapping can be
created efficiently, whereas less than 2 MiB may result in the linear
mapping needing another level of page tables)

This helps Zhen Lei's scenario, where the start of DRAM is known to be
occupied. It also helps EFI boot, which relies on the firmware's page
allocator to allocate space for the decompressed kernel as low as
possible. And if the KASLR patches ever land for 32-bit, it will give
us 3 more bits of randomization of the placement of the kernel inside
the linear region.

For the ARM code path, it simply comes down to using two add/sub
instructions instead of one for the carryless version, and patching
each of them with the correct immediate depending on the rotation
field. For the LPAE calculation, which has to deal with a carry, it
patches the MOVW instruction with up to 12 bits of offset (but we only
need 11 bits anyway)

For the Thumb2 code path, patching more than 11 bits of displacement
would be somewhat cumbersome, but the 11 bits we need fit nicely into
the second word of the u16[2] opcode, so we simply update the immediate
assignment and the left shift to create an addend of the right magnitude.

Suggested-by: Zhen Lei <thunder.leizhen@huawei.com>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/Kconfig              |  2 +-
 arch/arm/include/asm/memory.h | 13 ++++++++-----
 arch/arm/kernel/phys2virt.S   | 40 ++++++++++++++++++++++++++--------------
 3 files changed, 35 insertions(+), 20 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index fe2f17eb2b50..d8d62b8e72e4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -243,7 +243,7 @@ config ARM_PATCH_PHYS_VIRT
 	  kernel in system memory.
 
 	  This can only be used with non-XIP MMU kernels where the base
-	  of physical memory is at a 16MB boundary.
+	  of physical memory is at a 2 MiB boundary.
 
 	  Only disable this option if you know that you do not require
 	  this feature (eg, building a kernel for a single machine) and
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index ccf55cef6ab9..2611be35f26b 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -173,6 +173,7 @@ extern unsigned long vectors_base;
  * so that all we need to do is modify the 8-bit constant field.
  */
 #define __PV_BITS_31_24	0x81000000
+#define __PV_BITS_23_16	0x810000
 #define __PV_BITS_7_0	0x81
 
 extern unsigned long __pv_phys_pfn_offset;
@@ -187,16 +188,18 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define __pv_stub(from,to,instr)			\
 	__asm__("@ __pv_stub\n"				\
 	"1:	" instr "	%0, %1, %2\n"		\
+	"2:	" instr "	%0, %0, %3\n"		\
 	"	.pushsection .pv_table,\"a\"\n"		\
-	"	.long	1b - .\n"			\
+	"	.long	1b - ., 2b - .\n"		\
 	"	.popsection\n"				\
 	: "=r" (to)					\
-	: "r" (from), "I" (__PV_BITS_31_24))
+	: "r" (from), "I" (__PV_BITS_31_24),		\
+	  "I"(__PV_BITS_23_16))
 
 #define __pv_add_carry_stub(x, y)			\
 	__asm__("@ __pv_add_carry_stub\n"		\
 	"0:	movw	%R0, #0\n"			\
-	"	adds	%Q0, %1, %R0, lsl #24\n"	\
+	"	adds	%Q0, %1, %R0, lsl #20\n"	\
 	"1:	mov	%R0, %2\n"			\
 	"	adc	%R0, %R0, #0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
@@ -210,7 +213,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define __pv_stub(from,to,instr)			\
 	__asm__("@ __pv_stub\n"				\
 	"0:	movw	%0, #0\n"			\
-	"	lsl	%0, #24\n"			\
+	"	lsl	%0, #21\n"			\
 	"	" instr " %0, %1, %0\n"			\
 	"	.pushsection .pv_table,\"a\"\n"		\
 	"	.long	0b - .\n"			\
@@ -221,7 +224,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
 #define __pv_add_carry_stub(x, y)			\
 	__asm__("@ __pv_add_carry_stub\n"		\
 	"0:	movw	%R0, #0\n"			\
-	"	lsls	%R0, #24\n"			\
+	"	lsls	%R0, #21\n"			\
 	"	adds	%Q0, %1, %R0\n"			\
 	"1:	mvn	%R0, #0\n"			\
 	"	adc	%R0, %R0, #0\n"			\
diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S
index a4e364689663..fb53db78fe78 100644
--- a/arch/arm/kernel/phys2virt.S
+++ b/arch/arm/kernel/phys2virt.S
@@ -21,7 +21,7 @@
 /*
  * __fixup_pv_table - patch the stub instructions with the delta between
  *                    PHYS_OFFSET and PAGE_OFFSET, which is assumed to be
- *                    16MiB aligned.
+ *                    2 MiB aligned.
  *
  * Called from head.S, which expects the following registers to be preserved:
  *   r1 = machine no, r2 = atags or dtb,
@@ -38,8 +38,8 @@ ENTRY(__fixup_pv_table)
 	strcc	ip, [r0, #HIGH_OFFSET]	@ save to __pv_offset high bits
 	str	r3, [r0, #LOW_OFFSET]	@ save to __pv_offset low bits
 
-	mov	r0, r3, lsr #24		@ constant for add/sub instructions
-	teq	r3, r0, lsl #24 	@ must be 16MiB aligned
+	mov	r0, r3, lsr #21		@ constant for add/sub instructions
+	teq	r3, r0, lsl #21 	@ must be 2 MiB aligned
 	bne	0f
 
 	adr_l	r4, __pv_table_begin
@@ -55,22 +55,21 @@ __fixup_a_pv_table:
 	adr_l	r6, __pv_offset
 	ldr	r0, [r6, #HIGH_OFFSET]	@ pv_offset high word
 	ldr	r6, [r6, #LOW_OFFSET]	@ pv_offset low word
-	mov	r6, r6, lsr #24
 	cmn	r0, #1
 #ifdef CONFIG_THUMB2_KERNEL
 	@
 	@ The Thumb-2 versions of the patchable sequences are
 	@
-	@ phys-to-virt:			movw	<reg>, #offset<31:24>
-	@				lsl	<reg>, #24
+	@ phys-to-virt:			movw	<reg>, #offset<31:21>
+	@				lsl	<reg>, #21
 	@				sub	<VA>, <PA>, <reg>
 	@
-	@ virt-to-phys (non-LPAE):	movw	<reg>, #offset<31:24>
-	@				lsl	<reg>, #24
+	@ virt-to-phys (non-LPAE):	movw	<reg>, #offset<31:21>
+	@				lsl	<reg>, #21
 	@				add	<PA>, <VA>, <reg>
 	@
-	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:24>
-	@				lsl	<reg>, #24
+	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:21>
+	@				lsl	<reg>, #21
 	@				adds	<PAlo>, <VA>, <reg>
 	@				mov	<PAhi>, #offset<39:32>
 	@				adc	<PAhi>, <PAhi>, #0
@@ -102,6 +101,9 @@ __fixup_a_pv_table:
 	@     +-----------+---+---------------------++---+------+----+------+
 	@
 	moveq	r0, #0x200000		@ set bit 21, mov to mvn instruction
+	lsrs	r3, r6, #29		@ isolate top 3 bits of displacement
+	ubfx	r6, r6, #21, #8		@ put bits 28:21 into the MOVW imm8 field
+	bfi	r6, r3, #12, #3		@ put bits 31:29 into the MOVW imm3 field
 	b	.Lnext
 .Lloop:	add	r7, r4
 	adds	r4, #4			@ clears Z flag
@@ -129,20 +131,24 @@ ARM_BE8(rev16	ip, ip)
 @ in BE8, we load data in BE, but instructions still in LE
 #define PV_BIT24	0x00000001
 #define PV_IMM8_MASK	0xff000000
+#define PV_IMMR_MSB	0x00080000
 #else
 #define PV_BIT24	0x01000000
 #define PV_IMM8_MASK	0x000000ff
+#define PV_IMMR_MSB	0x00000800
 #endif
 
 	@
 	@ The ARM versions of the patchable sequences are
 	@
 	@ phys-to-virt:			sub	<VA>, <PA>, #offset<31:24>, lsl #24
+	@				sub	<VA>, <PA>, #offset<23:16>, lsl #16
 	@
 	@ virt-to-phys (non-LPAE):	add	<PA>, <VA>, #offset<31:24>, lsl #24
+	@				add	<PA>, <VA>, #offset<23:16>, lsl #16
 	@
-	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:24>
-	@				adds	<PAlo>, <VA>, <reg>, lsl #24
+	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:20>
+	@				adds	<PAlo>, <VA>, <reg>, lsl #20
 	@				mov	<PAhi>, #offset<39:32>
 	@				adc	<PAhi>, <PAhi>, #0
 	@
@@ -174,6 +180,9 @@ ARM_BE8(rev16	ip, ip)
 	@      +------+-----------------+------+------+-------+
 	@
 	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
+	mov	r3, r6, lsr #16		@ put offset bits 31-16 into r3
+	mov	r6, r6, lsr #24		@ put offset bits 31-24 into r6
+	and	r3, r3, #0xf0		@ only keep offset bits 23-20 in r3
 	b	.Lnext
 .Lloop:	ldr	ip, [r7, r4]
 #ifdef CONFIG_ARM_LPAE
@@ -183,14 +192,17 @@ ARM_BE8(rev	ip, ip)
 	tst	ip, #0xc00000		@ MOVW has bits 23:22 clear
 	bic	ip, ip, #0x400000	@ clear bit 22
 	bfc	ip, #0, #12		@ clear imm12 field of MOV[W] instruction
-	orreq	ip, ip, r6		@ MOVW -> mask in offset bits 31-24
+	orreq	ip, ip, r6, lsl #4	@ MOVW -> mask in offset bits 31-24
+	orreq	ip, ip, r3, lsr #4	@ MOVW -> mask in offset bits 23-20
 	orrne	ip, ip, r0		@ MOV  -> mask in offset bits 7-0 (or bit 22)
 ARM_BE8(rev	ip, ip)
 	b	2f
 1:
 #endif
+	tst	ip, #PV_IMMR_MSB		@ rotation value >= 16 ?
 	bic	ip, ip, #PV_IMM8_MASK
-	orr	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
+	orreq	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
+	orrne	ip, ip, r3 ARM_BE8(, lsl #24)	@ mask in offset bits 23-20
 2:
 	str	ip, [r7, r4]
 	add	r4, r4, #4
-- 
cgit v1.2.3


From 67e3f828bd4bf5e4eb4214dc4eb227d8f1c8a877 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 12:28:01 +0300
Subject: ARM: efistub: replace adrl pseudo-op with adr_l macro invocation

The ARM 'adrl' pseudo instruction is a bit problematic, as it does not
exist in Thumb mode, and it is not implemented by Clang either. Since
the Thumb variant has a slightly bigger range, it is sometimes necessary
to emit the 'adrl' variant in ARM mode where Thumb mode can use adr just
fine. However, that still leaves the Clang issue, which does not appear
to be supporting this any time soon.

So let's switch to the adr_l macro, which works for both ARM and Thumb,
and has unlimited range.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/boot/compressed/head.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 2e04ec5b5446..5b591dacbaaf 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -1440,8 +1440,7 @@ ENTRY(efi_enter_kernel)
 		mov	r4, r0			@ preserve image base
 		mov	r8, r1			@ preserve DT pointer
 
- ARM(		adrl	r0, call_cache_fn	)
- THUMB(		adr	r0, call_cache_fn	)
+		adr_l	r0, call_cache_fn
 		adr	r1, 0f			@ clean the region of code we
 		bl	cache_clean_flush	@ may run with the MMU off
 
-- 
cgit v1.2.3


From 62c4a2e202b18e1d7176875b7e7af240f340596b Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:25:06 +0300
Subject: ARM: head-common.S: use PC-relative insn sequence for __proc_info

Replace the open coded PC relative offset calculations with a pair of
adr_l invocations. This removes some open coded arithmetic involving
virtual addresses, avoids literal pools on v7+, and slightly reduces
the footprint of the code.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/head-common.S | 22 ++++++----------------
 1 file changed, 6 insertions(+), 16 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 4a3982812a40..9a5ab6c19568 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -170,11 +170,12 @@ ENDPROC(lookup_processor_type)
  *	r9 = cpuid (preserved)
  */
 __lookup_processor_type:
-	adr	r3, __lookup_processor_type_data
-	ldmia	r3, {r4 - r6}
-	sub	r3, r3, r4			@ get offset between virt&phys
-	add	r5, r5, r3			@ convert virt addresses to
-	add	r6, r6, r3			@ physical address space
+	/*
+	 * Look in <asm/procinfo.h> for information about the __proc_info
+	 * structure.
+	 */
+	adr_l	r5, __proc_info_begin
+	adr_l	r6, __proc_info_end
 1:	ldmia	r5, {r3, r4}			@ value, mask
 	and	r4, r4, r9			@ mask wanted bits
 	teq	r3, r4
@@ -186,17 +187,6 @@ __lookup_processor_type:
 2:	ret	lr
 ENDPROC(__lookup_processor_type)
 
-/*
- * Look in <asm/procinfo.h> for information about the __proc_info structure.
- */
-	.align	2
-	.type	__lookup_processor_type_data, %object
-__lookup_processor_type_data:
-	.long	.
-	.long	__proc_info_begin
-	.long	__proc_info_end
-	.size	__lookup_processor_type_data, . - __lookup_processor_type_data
-
 __error_lpae:
 #ifdef CONFIG_DEBUG_LL
 	adr	r0, str_lpae
-- 
cgit v1.2.3


From 172c34c9ff0144c3e1d96a9b54d6fecfe5d17c3c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:25:16 +0300
Subject: ARM: head-common.S: use PC-relative insn sequence for idmap creation

Replace the open coded PC relative offset calculations involving
__turn_mmu_on and __turn_mmu_on_end with a pair of adr_l invocations.
This removes some open coded arithmetic involving virtual addresses,
avoids literal pools on v7+, and slightly reduces the footprint of the
code.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/head.S | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 7e3f36809011..f5a636fee9d0 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -224,11 +224,8 @@ __create_page_tables:
 	 * Create identity mapping to cater for __enable_mmu.
 	 * This identity mapping will be removed by paging_init().
 	 */
-	adr	r0, __turn_mmu_on_loc
-	ldmia	r0, {r3, r5, r6}
-	sub	r0, r0, r3			@ virt->phys offset
-	add	r5, r5, r0			@ phys __turn_mmu_on
-	add	r6, r6, r0			@ phys __turn_mmu_on_end
+	adr_l	r5, __turn_mmu_on		@ _pa(__turn_mmu_on)
+	adr_l	r6, __turn_mmu_on_end		@ _pa(__turn_mmu_on_end)
 	mov	r5, r5, lsr #SECTION_SHIFT
 	mov	r6, r6, lsr #SECTION_SHIFT
 
@@ -351,11 +348,6 @@ __create_page_tables:
 	ret	lr
 ENDPROC(__create_page_tables)
 	.ltorg
-	.align
-__turn_mmu_on_loc:
-	.long	.
-	.long	__turn_mmu_on
-	.long	__turn_mmu_on_end
 
 #if defined(CONFIG_SMP)
 	.text
-- 
cgit v1.2.3


From 91580f0dbf24c6d616091526a900213bc7aa48fe Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:25:23 +0300
Subject: ARM: head.S: use PC-relative insn sequence for secondary_data

Replace the open coded PC relative offset calculations with adr_l
and ldr_l invocations. This removes some open coded arithmetic
involving virtual addresses, avoids literal pools on v7+, and slightly
reduces the footprint of the code.

Note that it also removes a stale comment about the contents of r6.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/head.S | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index f5a636fee9d0..478506b2d51f 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -383,10 +383,8 @@ ENTRY(secondary_startup)
 	/*
 	 * Use the page tables supplied from  __cpu_up.
 	 */
-	adr	r4, __secondary_data
-	ldmia	r4, {r5, r7, r12}		@ address to jump to after
-	sub	lr, r4, r5			@ mmu has been enabled
-	add	r3, r7, lr
+	adr_l	r3, secondary_data
+	mov_l	r12, __secondary_switched
 	ldrd	r4, r5, [r3, #0]		@ get secondary_data.pgdir
 ARM_BE8(eor	r4, r4, r5)			@ Swap r5 and r4 in BE:
 ARM_BE8(eor	r5, r4, r5)			@ it can be done in 3 steps
@@ -401,22 +399,13 @@ ARM_BE8(eor	r4, r4, r5)			@ without using a temp reg.
 ENDPROC(secondary_startup)
 ENDPROC(secondary_startup_arm)
 
-	/*
-	 * r6  = &secondary_data
-	 */
 ENTRY(__secondary_switched)
-	ldr	sp, [r7, #12]			@ get secondary_data.stack
+	ldr_l	r7, secondary_data + 12		@ get secondary_data.stack
+	mov	sp, r7
 	mov	fp, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
 
-	.align
-
-	.type	__secondary_data, %object
-__secondary_data:
-	.long	.
-	.long	secondary_data
-	.long	__secondary_switched
 #endif /* defined(CONFIG_SMP) */
 
 
-- 
cgit v1.2.3


From 450abd38fe6c6313ce9bdd9dce81c1dd604f6fb0 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:48:20 +0300
Subject: ARM: kernel: use relative references for UP/SMP alternatives

Currently, the .alt.smp.init section contains the virtual addresses
of the patch sites. Since patching may occur both before and after
switching into virtual mode, this requires some manual handling of
the address when applying the UP alternative.

Let's simplify this by using relative offsets in the table entries:
this allows us to simply add each entry's address to its contents,
regardless of whether we are running in virtual mode or not.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/include/asm/assembler.h |  4 ++--
 arch/arm/include/asm/processor.h |  2 +-
 arch/arm/kernel/head.S           | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 72627c5fb3b2..6ed30421f697 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -259,7 +259,7 @@
  */
 #define ALT_UP(instr...)					\
 	.pushsection ".alt.smp.init", "a"			;\
-	.long	9998b						;\
+	.long	9998b - .					;\
 9997:	instr							;\
 	.if . - 9997b == 2					;\
 		nop						;\
@@ -270,7 +270,7 @@
 	.popsection
 #define ALT_UP_B(label)					\
 	.pushsection ".alt.smp.init", "a"			;\
-	.long	9998b						;\
+	.long	9998b - .					;\
 	W(b)	. + (label - 9998b)					;\
 	.popsection
 #else
diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h
index b9241051e5cb..9e6b97286307 100644
--- a/arch/arm/include/asm/processor.h
+++ b/arch/arm/include/asm/processor.h
@@ -96,7 +96,7 @@ unsigned long get_wchan(struct task_struct *p);
 #define __ALT_SMP_ASM(smp, up)						\
 	"9998:	" smp "\n"						\
 	"	.pushsection \".alt.smp.init\", \"a\"\n"		\
-	"	.long	9998b\n"					\
+	"	.long	9998b - .\n"					\
 	"	" up "\n"						\
 	"	.popsection\n"
 #else
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 478506b2d51f..cdc79fcee43e 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -546,14 +546,15 @@ smp_on_up:
 __do_fixup_smp_on_up:
 	cmp	r4, r5
 	reths	lr
-	ldmia	r4!, {r0, r6}
- ARM(	str	r6, [r0, r3]	)
- THUMB(	add	r0, r0, r3	)
+	ldmia	r4, {r0, r6}
+ ARM(	str	r6, [r0, r4]	)
+ THUMB(	add	r0, r0, r4	)
+	add	r4, r4, #8
 #ifdef __ARMEB__
  THUMB(	mov	r6, r6, ror #16	)	@ Convert word order for big-endian.
 #endif
  THUMB(	strh	r6, [r0], #2	)	@ For Thumb-2, store as two halfwords
- THUMB(	mov	r6, r6, lsr #16	)	@ to be robust against misaligned r3.
+ THUMB(	mov	r6, r6, lsr #16	)	@ to be robust against misaligned r0.
  THUMB(	strh	r6, [r0]	)
 	b	__do_fixup_smp_on_up
 ENDPROC(__do_fixup_smp_on_up)
@@ -562,7 +563,6 @@ ENTRY(fixup_smp)
 	stmfd	sp!, {r4 - r6, lr}
 	mov	r4, r0
 	add	r5, r0, r1
-	mov	r3, #0
 	bl	__do_fixup_smp_on_up
 	ldmfd	sp!, {r4 - r6, pc}
 ENDPROC(fixup_smp)
-- 
cgit v1.2.3


From 59d2f2827dfdccf8911d5e51465136b52ba623c4 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:25:29 +0300
Subject: ARM: head: use PC-relative insn sequence for __smp_alt

Now that calling __do_fixup_smp_on_up() can be done without passing
the physical-to-virtual offset in r3, we can replace the open coded
PC relative offset calculations with a pair of adr_l invocations. This
removes some open coded arithmetic involving virtual addresses, avoids
literal pools on v7+, and slightly reduces the footprint of the code.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/head.S | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index cdc79fcee43e..5e031a0bf9a9 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -520,19 +520,11 @@ ARM_BE8(rev	r0, r0)			@ byteswap if big endian
 	retne	lr
 
 __fixup_smp_on_up:
-	adr	r0, 1f
-	ldmia	r0, {r3 - r5}
-	sub	r3, r0, r3
-	add	r4, r4, r3
-	add	r5, r5, r3
+	adr_l	r4, __smpalt_begin
+	adr_l	r5, __smpalt_end
 	b	__do_fixup_smp_on_up
 ENDPROC(__fixup_smp)
 
-	.align
-1:	.word	.
-	.word	__smpalt_begin
-	.word	__smpalt_end
-
 	.pushsection .data
 	.align	2
 	.globl	smp_on_up
-- 
cgit v1.2.3


From d74d2b225018baa0e04e080ee9e80b21667ba3a2 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:25:34 +0300
Subject: ARM: sleep.S: use PC-relative insn sequence for
 sleep_save_sp/mpidr_hash

Replace the open coded PC relative offset calculations with adr_l and
ldr_l invocations. This removes some open coded PC relative arithmetic,
avoids literal pools on v7+, and slightly reduces the footprint of the
code. Note that ALT_SMP() expects a single instruction so move the macro
invocation after it.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/sleep.S | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S
index 5dc8b80bb693..43077e11dafd 100644
--- a/arch/arm/kernel/sleep.S
+++ b/arch/arm/kernel/sleep.S
@@ -72,8 +72,9 @@ ENTRY(__cpu_suspend)
 	ldr	r3, =sleep_save_sp
 	stmfd	sp!, {r0, r1}		@ save suspend func arg and pointer
 	ldr	r3, [r3, #SLEEP_SAVE_SP_VIRT]
-	ALT_SMP(ldr r0, =mpidr_hash)
+	ALT_SMP(W(nop))			@ don't use adr_l inside ALT_SMP()
 	ALT_UP_B(1f)
+	adr_l	r0, mpidr_hash
 	/* This ldmia relies on the memory layout of the mpidr_hash struct */
 	ldmia	r0, {r1, r6-r8}	@ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts
 	compute_mpidr_hash	r0, r6, r7, r8, r2, r1
@@ -147,9 +148,8 @@ no_hyp:
 	mov	r1, #0
 	ALT_SMP(mrc p15, 0, r0, c0, c0, 5)
 	ALT_UP_B(1f)
-	adr	r2, mpidr_hash_ptr
-	ldr	r3, [r2]
-	add	r2, r2, r3		@ r2 = struct mpidr_hash phys address
+	adr_l	r2, mpidr_hash		@ r2 = struct mpidr_hash phys address
+
 	/*
 	 * This ldmia relies on the memory layout of the mpidr_hash
 	 * struct mpidr_hash.
@@ -157,10 +157,7 @@ no_hyp:
 	ldmia	r2, { r3-r6 }	@ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts
 	compute_mpidr_hash	r1, r4, r5, r6, r0, r3
 1:
-	adr	r0, _sleep_save_sp
-	ldr	r2, [r0]
-	add	r0, r0, r2
-	ldr	r0, [r0, #SLEEP_SAVE_SP_PHYS]
+	ldr_l	r0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
 	ldr	r0, [r0, r1, lsl #2]
 
 	@ load phys pgd, stack, resume fn
@@ -177,12 +174,6 @@ ENDPROC(cpu_resume_arm)
 ENDPROC(cpu_resume_no_hyp)
 #endif
 
-	.align 2
-_sleep_save_sp:
-	.long	sleep_save_sp - .
-mpidr_hash_ptr:
-	.long	mpidr_hash - .			@ mpidr_hash struct offset
-
 	.data
 	.align	2
 	.type	sleep_save_sp, #object
-- 
cgit v1.2.3


From 3bcf906b194cebb6817cbb2f07b69e12aa5d7f51 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:25:46 +0300
Subject: ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET

Replace the open coded arithmetic with a simple adr_l/sub pair. This
removes some open coded arithmetic involving virtual addresses, avoids
literal pools on v7+, and slightly reduces the footprint of the code.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/kernel/head.S | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 5e031a0bf9a9..ae0b08b47f52 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -103,10 +103,8 @@ ENTRY(stext)
 #endif
 
 #ifndef CONFIG_XIP_KERNEL
-	adr	r3, 2f
-	ldmia	r3, {r4, r8}
-	sub	r4, r3, r4			@ (PHYS_OFFSET - PAGE_OFFSET)
-	add	r8, r8, r4			@ PHYS_OFFSET
+	adr_l	r8, _text			@ __pa(_text)
+	sub	r8, r8, #TEXT_OFFSET		@ PHYS_OFFSET
 #else
 	ldr	r8, =PLAT_PHYS_OFFSET		@ always constant in this case
 #endif
@@ -158,10 +156,6 @@ ENTRY(stext)
 1:	b	__enable_mmu
 ENDPROC(stext)
 	.ltorg
-#ifndef CONFIG_XIP_KERNEL
-2:	.long	.
-	.long	PAGE_OFFSET
-#endif
 
 /*
  * Setup the initial page tables.  We only setup the barest
-- 
cgit v1.2.3


From aaac3733171fca948c4fb66b78257620e3885339 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 14 Sep 2020 11:25:52 +0300
Subject: ARM: kvm: replace open coded VA->PA calculations with adr_l call

Replace the open coded calculations of the actual physical address
of the KVM stub vector table with a single adr_l invocation.

Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/arm/boot/compressed/head.S | 15 ++-------------
 arch/arm/kernel/hyp-stub.S      | 27 ++++++++++++---------------
 2 files changed, 14 insertions(+), 28 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 5b591dacbaaf..9905fb7560df 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -468,15 +468,10 @@ dtb_check_done:
 
 		/*
 		 * Compute the address of the hyp vectors after relocation.
-		 * This requires some arithmetic since we cannot directly
-		 * reference __hyp_stub_vectors in a PC-relative way.
 		 * Call __hyp_set_vectors with the new address so that we
 		 * can HVC again after the copy.
 		 */
-0:		adr	r0, 0b
-		movw	r1, #:lower16:__hyp_stub_vectors - 0b
-		movt	r1, #:upper16:__hyp_stub_vectors - 0b
-		add	r0, r0, r1
+		adr_l	r0, __hyp_stub_vectors
 		sub	r0, r0, r5
 		add	r0, r0, r10
 		bl	__hyp_set_vectors
@@ -627,17 +622,11 @@ not_relocated:	mov	r0, #0
 		cmp	r0, #HYP_MODE		@ if not booted in HYP mode...
 		bne	__enter_kernel		@ boot kernel directly
 
-		adr	r12, .L__hyp_reentry_vectors_offset
-		ldr	r0, [r12]
-		add	r0, r0, r12
-
+		adr_l	r0, __hyp_reentry_vectors
 		bl	__hyp_set_vectors
 		__HVC(0)			@ otherwise bounce to hyp mode
 
 		b	.			@ should never be reached
-
-		.align	2
-.L__hyp_reentry_vectors_offset:	.long	__hyp_reentry_vectors - .
 #else
 		b	__enter_kernel
 #endif
diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 26d8e03b1dd3..103d0bdb2b7e 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -24,41 +24,38 @@ ENTRY(__boot_cpu_mode)
 .text
 
 	/*
-	 * Save the primary CPU boot mode. Requires 3 scratch registers.
+	 * Save the primary CPU boot mode. Requires 2 scratch registers.
 	 */
-	.macro	store_primary_cpu_mode	reg1, reg2, reg3
+	.macro	store_primary_cpu_mode	reg1, reg2
 	mrs	\reg1, cpsr
 	and	\reg1, \reg1, #MODE_MASK
-	adr	\reg2, .L__boot_cpu_mode_offset
-	ldr	\reg3, [\reg2]
-	str	\reg1, [\reg2, \reg3]
+	str_l	\reg1, __boot_cpu_mode, \reg2
 	.endm
 
 	/*
 	 * Compare the current mode with the one saved on the primary CPU.
 	 * If they don't match, record that fact. The Z bit indicates
 	 * if there's a match or not.
-	 * Requires 3 additionnal scratch registers.
+	 * Requires 2 additional scratch registers.
 	 */
-	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
-	adr	\reg2, .L__boot_cpu_mode_offset
-	ldr	\reg3, [\reg2]
-	ldr	\reg1, [\reg2, \reg3]
+	.macro	compare_cpu_mode_with_primary mode, reg1, reg2
+	adr_l	\reg2, __boot_cpu_mode
+	ldr	\reg1, [\reg2]
 	cmp	\mode, \reg1		@ matches primary CPU boot mode?
 	orrne	\reg1, \reg1, #BOOT_CPU_MODE_MISMATCH
-	strne	\reg1, [\reg2, \reg3]	@ record what happened and give up
+	strne	\reg1, [\reg2]		@ record what happened and give up
 	.endm
 
 #else	/* ZIMAGE */
 
-	.macro	store_primary_cpu_mode	reg1:req, reg2:req, reg3:req
+	.macro	store_primary_cpu_mode	reg1:req, reg2:req
 	.endm
 
 /*
  * The zImage loader only runs on one CPU, so we don't bother with mult-CPU
  * consistency checking:
  */
-	.macro	compare_cpu_mode_with_primary mode, reg1, reg2, reg3
+	.macro	compare_cpu_mode_with_primary mode, reg1, reg2
 	cmp	\mode, \mode
 	.endm
 
@@ -73,7 +70,7 @@ ENTRY(__boot_cpu_mode)
  */
 @ Call this from the primary CPU
 ENTRY(__hyp_stub_install)
-	store_primary_cpu_mode	r4, r5, r6
+	store_primary_cpu_mode	r4, r5
 ENDPROC(__hyp_stub_install)
 
 	@ fall through...
@@ -87,7 +84,7 @@ ENTRY(__hyp_stub_install_secondary)
 	 * If the secondary has booted with a different mode, give up
 	 * immediately.
 	 */
-	compare_cpu_mode_with_primary	r4, r5, r6, r7
+	compare_cpu_mode_with_primary	r4, r5, r6
 	retne	lr
 
 	/*
-- 
cgit v1.2.3


From 735e8d93dc2b107f7891a9c2b1c4cfbea1fcbbbc Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 6 Nov 2020 21:46:11 +0100
Subject: ARM: 9022/1: Change arch/arm/lib/mem*.S to use WEAK instead of .weak

Commit d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for
KASan") add .weak directives to memcpy/memmove/memset to avoid collision
with KASAN interceptors.

This does not work with LLVM's integrated assembler (the assembly snippet
`.weak memcpy ... .globl memcpy` produces a STB_GLOBAL memcpy while GNU as
produces a STB_WEAK memcpy). LLVM 12 (since https://reviews.llvm.org/D90108)
will error on such an overridden symbol binding.

Use the appropriate WEAK macro instead.

Link: https://github.com/ClangBuiltLinux/linux/issues/1190
--

Fixes: d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for KASan")
Reported-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Fangrui Song <maskray@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/lib/memcpy.S  | 3 +--
 arch/arm/lib/memmove.S | 3 +--
 arch/arm/lib/memset.S  | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S
index ad4625d16e11..e4caf48c089f 100644
--- a/arch/arm/lib/memcpy.S
+++ b/arch/arm/lib/memcpy.S
@@ -58,10 +58,9 @@
 
 /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
 
-.weak memcpy
 ENTRY(__memcpy)
 ENTRY(mmiocpy)
-ENTRY(memcpy)
+WEAK(memcpy)
 
 #include "copy_template.S"
 
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index fd123ea5a5a4..6fecc12a1f51 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -24,9 +24,8 @@
  * occurring in the opposite direction.
  */
 
-.weak memmove
 ENTRY(__memmove)
-ENTRY(memmove)
+WEAK(memmove)
 	UNWIND(	.fnstart			)
 
 		subs	ip, r0, r1
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 0e7ff0423f50..9817cb258c1a 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -13,10 +13,9 @@
 	.text
 	.align	5
 
-.weak memset
 ENTRY(__memset)
 ENTRY(mmioset)
-ENTRY(memset)
+WEAK(memset)
 UNWIND( .fnstart         )
 	ands	r3, r0, #3		@ 1 unaligned?
 	mov	ip, r0			@ preserve r0 as return value
-- 
cgit v1.2.3


From df8eda0f1f58e2419875046f57c27c4d72378575 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 10 Nov 2020 16:59:30 +0100
Subject: ARM: 9023/1: Spelling s/mmeory/memory/

Fix a misspelling of the word "memory".

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index d32f7652a5bf..f9de7658d9b3 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -1136,7 +1136,7 @@ void __init setup_arch(char **cmdline_p)
 	efi_init();
 	/*
 	 * Make sure the calculation for lowmem/highmem is set appropriately
-	 * before reserving/allocating any mmeory
+	 * before reserving/allocating any memory
 	 */
 	adjust_lowmem_bounds();
 	arm_memblock_init(mdesc);
-- 
cgit v1.2.3


From 730b5764ea8526e48bdb85a24ed96d62de435940 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 10 Nov 2020 16:58:41 +0100
Subject: ARM: 9024/1: Drop useless cast of "u64" to "long long"

As "u64" is equivalent to "unsigned long long", there is no need to cast
a "u64" parameter for printing it using the "0x%08llx" format specifier.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index f9de7658d9b3..1a5edf562e85 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -765,7 +765,7 @@ int __init arm_add_memory(u64 start, u64 size)
 #ifndef CONFIG_PHYS_ADDR_T_64BIT
 	if (aligned_start > ULONG_MAX) {
 		pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n",
-			(long long)start);
+			start);
 		return -EINVAL;
 	}
 
-- 
cgit v1.2.3


From 28187dc8ebd938d574edfc6d9e0f9c51c21ff3f4 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Tue, 17 Nov 2020 00:46:39 +0100
Subject: ARM: 9025/1: Kconfig: CPU_BIG_ENDIAN depends on !LD_IS_LLD

LLD does not yet support any big endian architectures. Make this config
non-selectable when using LLD until LLD is fixed.

Link: https://github.com/ClangBuiltLinux/linux/issues/965

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/mm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/arm')

diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 65e4482e3849..02692fbe2db5 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -743,6 +743,7 @@ config SWP_EMULATE
 config CPU_BIG_ENDIAN
 	bool "Build big-endian kernel"
 	depends on ARCH_SUPPORTS_BIG_ENDIAN
+	depends on !LD_IS_LLD
 	help
 	  Say Y if you plan on running a kernel in big-endian mode.
 	  Note that your board must be properly built and your board
-- 
cgit v1.2.3


From 331b9d02d77e0e33f273d8328d9f5453efddd926 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Tue, 17 Nov 2020 00:49:25 +0100
Subject: ARM: 9026/1: unwind: remove old check for GCC <= 4.2

Since
commit 0bddd227f3dc ("Documentation: update for gcc 4.9 requirement")
the minimum supported version of GCC is gcc-4.9. It's now safe to remove
this code.

Link: https://github.com/ClangBuiltLinux/linux/issues/427

Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/unwind.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c
index f35eb584a18a..59fdf257bf8b 100644
--- a/arch/arm/kernel/unwind.c
+++ b/arch/arm/kernel/unwind.c
@@ -18,9 +18,6 @@
 #warning Your compiler does not have EABI support.
 #warning    ARM unwind is known to compile only with EABI compilers.
 #warning    Change compiler or disable ARM_UNWIND option.
-#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) && !defined(__clang__)
-#warning Your compiler is too buggy; it is known to not compile ARM unwind support.
-#warning    Change compiler or disable ARM_UNWIND option.
 #endif
 #endif /* __CHECKER__ */
 
-- 
cgit v1.2.3


From 4d576cab16f57e1f87978f6997a725179398341e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 17 Nov 2020 10:23:28 +0100
Subject: ARM: 9028/1: disable KASAN in call stack capturing routines

KASAN uses the routines in stacktrace.c to capture the call stack each
time memory gets allocated or freed. Some of these routines are also
used to log CPU and memory context when exceptions are taken, and so
in some cases, memory accesses may be made that are not strictly in
line with the KASAN constraints, and may therefore trigger false KASAN
positives.

So follow the example set by other architectures, and simply disable
KASAN instrumentation for these routines.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 89e5d864e923..f6431b46866e 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -21,6 +21,9 @@ obj-y		:= elf.o entry-common.o irq.o opcodes.o \
 		   setup.o signal.o sigreturn_codes.o \
 		   stacktrace.o sys_arm.o time.o traps.o
 
+KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_traps.o := n
+
 ifneq ($(CONFIG_ARM_UNWIND),y)
 obj-$(CONFIG_FRAME_POINTER)	+= return_address.o
 endif
-- 
cgit v1.2.3


From 3c9f5708b7aed6a963e2aefccbd1854802de163e Mon Sep 17 00:00:00 2001
From: Jian Cai <jiancai@google.com>
Date: Tue, 17 Nov 2020 23:11:36 +0100
Subject: ARM: 9029/1: Make iwmmxt.S support Clang's integrated assembler

This patch replaces 6 IWMMXT instructions Clang's integrated assembler
does not support in iwmmxt.S using macros, while making sure GNU
assembler still emit the same instructions. This should be easier than
providing full IWMMXT support in Clang.  This is one of the last bits of
kernel code that could be compiled but not assembled with clang. Once
all of it works with IAS, we no longer need to special-case 32-bit Arm
in Kbuild, or turn off CONFIG_IWMMXT when build-testing.

"Intel Wireless MMX Technology - Developer Guide - August, 2002" should
be referenced for the encoding schemes of these extensions.

Link: https://github.com/ClangBuiltLinux/linux/issues/975

Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Suggested-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Jian Cai <jiancai@google.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/iwmmxt.S | 89 ++++++++++++++++++++++++------------------------
 arch/arm/kernel/iwmmxt.h | 47 +++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 44 deletions(-)
 create mode 100644 arch/arm/kernel/iwmmxt.h

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S
index 0dcae787b004..d2b4ac06e4ed 100644
--- a/arch/arm/kernel/iwmmxt.S
+++ b/arch/arm/kernel/iwmmxt.S
@@ -16,6 +16,7 @@
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/assembler.h>
+#include "iwmmxt.h"
 
 #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B)
 #define PJ4(code...)		code
@@ -113,33 +114,33 @@ concan_save:
 
 concan_dump:
 
-	wstrw	wCSSF, [r1, #MMX_WCSSF]
-	wstrw	wCASF, [r1, #MMX_WCASF]
-	wstrw	wCGR0, [r1, #MMX_WCGR0]
-	wstrw	wCGR1, [r1, #MMX_WCGR1]
-	wstrw	wCGR2, [r1, #MMX_WCGR2]
-	wstrw	wCGR3, [r1, #MMX_WCGR3]
+	wstrw	wCSSF, r1, MMX_WCSSF
+	wstrw	wCASF, r1, MMX_WCASF
+	wstrw	wCGR0, r1, MMX_WCGR0
+	wstrw	wCGR1, r1, MMX_WCGR1
+	wstrw	wCGR2, r1, MMX_WCGR2
+	wstrw	wCGR3, r1, MMX_WCGR3
 
 1:	@ MUP? wRn
 	tst	r2, #0x2
 	beq	2f
 
-	wstrd	wR0,  [r1, #MMX_WR0]
-	wstrd	wR1,  [r1, #MMX_WR1]
-	wstrd	wR2,  [r1, #MMX_WR2]
-	wstrd	wR3,  [r1, #MMX_WR3]
-	wstrd	wR4,  [r1, #MMX_WR4]
-	wstrd	wR5,  [r1, #MMX_WR5]
-	wstrd	wR6,  [r1, #MMX_WR6]
-	wstrd	wR7,  [r1, #MMX_WR7]
-	wstrd	wR8,  [r1, #MMX_WR8]
-	wstrd	wR9,  [r1, #MMX_WR9]
-	wstrd	wR10, [r1, #MMX_WR10]
-	wstrd	wR11, [r1, #MMX_WR11]
-	wstrd	wR12, [r1, #MMX_WR12]
-	wstrd	wR13, [r1, #MMX_WR13]
-	wstrd	wR14, [r1, #MMX_WR14]
-	wstrd	wR15, [r1, #MMX_WR15]
+	wstrd	wR0,  r1, MMX_WR0
+	wstrd	wR1,  r1, MMX_WR1
+	wstrd	wR2,  r1, MMX_WR2
+	wstrd	wR3,  r1, MMX_WR3
+	wstrd	wR4,  r1, MMX_WR4
+	wstrd	wR5,  r1, MMX_WR5
+	wstrd	wR6,  r1, MMX_WR6
+	wstrd	wR7,  r1, MMX_WR7
+	wstrd	wR8,  r1, MMX_WR8
+	wstrd	wR9,  r1, MMX_WR9
+	wstrd	wR10, r1, MMX_WR10
+	wstrd	wR11, r1, MMX_WR11
+	wstrd	wR12, r1, MMX_WR12
+	wstrd	wR13, r1, MMX_WR13
+	wstrd	wR14, r1, MMX_WR14
+	wstrd	wR15, r1, MMX_WR15
 
 2:	teq	r0, #0				@ anything to load?
 	reteq	lr				@ if not, return
@@ -147,30 +148,30 @@ concan_dump:
 concan_load:
 
 	@ Load wRn
-	wldrd	wR0,  [r0, #MMX_WR0]
-	wldrd	wR1,  [r0, #MMX_WR1]
-	wldrd	wR2,  [r0, #MMX_WR2]
-	wldrd	wR3,  [r0, #MMX_WR3]
-	wldrd	wR4,  [r0, #MMX_WR4]
-	wldrd	wR5,  [r0, #MMX_WR5]
-	wldrd	wR6,  [r0, #MMX_WR6]
-	wldrd	wR7,  [r0, #MMX_WR7]
-	wldrd	wR8,  [r0, #MMX_WR8]
-	wldrd	wR9,  [r0, #MMX_WR9]
-	wldrd	wR10, [r0, #MMX_WR10]
-	wldrd	wR11, [r0, #MMX_WR11]
-	wldrd	wR12, [r0, #MMX_WR12]
-	wldrd	wR13, [r0, #MMX_WR13]
-	wldrd	wR14, [r0, #MMX_WR14]
-	wldrd	wR15, [r0, #MMX_WR15]
+	wldrd	wR0,  r0, MMX_WR0
+	wldrd	wR1,  r0, MMX_WR1
+	wldrd	wR2,  r0, MMX_WR2
+	wldrd	wR3,  r0, MMX_WR3
+	wldrd	wR4,  r0, MMX_WR4
+	wldrd	wR5,  r0, MMX_WR5
+	wldrd	wR6,  r0, MMX_WR6
+	wldrd	wR7,  r0, MMX_WR7
+	wldrd	wR8,  r0, MMX_WR8
+	wldrd	wR9,  r0, MMX_WR9
+	wldrd	wR10, r0, MMX_WR10
+	wldrd	wR11, r0, MMX_WR11
+	wldrd	wR12, r0, MMX_WR12
+	wldrd	wR13, r0, MMX_WR13
+	wldrd	wR14, r0, MMX_WR14
+	wldrd	wR15, r0, MMX_WR15
 
 	@ Load wCx
-	wldrw	wCSSF, [r0, #MMX_WCSSF]
-	wldrw	wCASF, [r0, #MMX_WCASF]
-	wldrw	wCGR0, [r0, #MMX_WCGR0]
-	wldrw	wCGR1, [r0, #MMX_WCGR1]
-	wldrw	wCGR2, [r0, #MMX_WCGR2]
-	wldrw	wCGR3, [r0, #MMX_WCGR3]
+	wldrw	wCSSF, r0, MMX_WCSSF
+	wldrw	wCASF, r0, MMX_WCASF
+	wldrw	wCGR0, r0, MMX_WCGR0
+	wldrw	wCGR1, r0, MMX_WCGR1
+	wldrw	wCGR2, r0, MMX_WCGR2
+	wldrw	wCGR3, r0, MMX_WCGR3
 
 	@ clear CUP/MUP (only if r1 != 0)
 	teq	r1, #0
diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h
new file mode 100644
index 000000000000..fb627286f5bb
--- /dev/null
+++ b/arch/arm/kernel/iwmmxt.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __IWMMXT_H__
+#define __IWMMXT_H__
+
+.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+.set .LwR\b, \b
+.set .Lr\b, \b
+.endr
+
+.set .LwCSSF, 0x2
+.set .LwCASF, 0x3
+.set .LwCGR0, 0x8
+.set .LwCGR1, 0x9
+.set .LwCGR2, 0xa
+.set .LwCGR3, 0xb
+
+.macro wldrd, reg:req, base:req, offset:req
+.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+.macro wldrw, reg:req, base:req, offset:req
+.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+.macro wstrd, reg:req, base:req, offset:req
+.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+.macro wstrw, reg:req, base:req, offset:req
+.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2)
+.endm
+
+#ifdef __clang__
+
+#define wCon c1
+
+.macro tmrc, dest:req, control:req
+mrc p1, 0, \dest, \control, c0, 0
+.endm
+
+.macro tmcr, control:req, src:req
+mcr p1, 0, \src, \control, c0, 0
+.endm
+#endif
+
+#endif
-- 
cgit v1.2.3


From f77ac2e378be9dd61eb88728f0840642f045d9d1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 19 Nov 2020 18:09:16 +0100
Subject: ARM: 9030/1: entry: omit FP emulation for UND exceptions taken in
 kernel mode

There are a couple of problems with the exception entry code that deals
with FP exceptions (which are reported as UND exceptions) when building
the kernel in Thumb2 mode:
- the conditional branch to vfp_kmode_exception in vfp_support_entry()
  may be out of range for its target, depending on how the linker decides
  to arrange the sections;
- when the UND exception is taken in kernel mode, the emulation handling
  logic is entered via the 'call_fpe' label, which means we end up using
  the wrong value/mask pairs to match and detect the NEON opcodes.

Since UND exceptions in kernel mode are unlikely to occur on a hot path
(as opposed to the user mode version which is invoked for VFP support
code and lazy restore), we can use the existing undef hook machinery for
any kernel mode instruction emulation that is needed, including calling
the existing vfp_kmode_exception() routine for unexpected cases. So drop
the call to call_fpe, and instead, install an undef hook that will get
called for NEON and VFP instructions that trigger an UND exception in
kernel mode.

While at it, make sure that the PC correction is accurate for the
execution mode where the exception was taken, by checking the PSR
Thumb bit.

Cc: Dmitry Osipenko <digetx@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Fixes: eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input sections")
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/entry-armv.S | 25 ++--------------------
 arch/arm/vfp/vfphw.S         |  5 -----
 arch/arm/vfp/vfpmodule.c     | 49 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 49 insertions(+), 30 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index c4220f51fcf3..0ea8529a4872 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -252,31 +252,10 @@ __und_svc:
 #else
 	svc_entry
 #endif
-	@
-	@ call emulation code, which returns using r9 if it has emulated
-	@ the instruction, or the more conventional lr if we are to treat
-	@ this as a real undefined instruction
-	@
-	@  r0 - instruction
-	@
-#ifndef CONFIG_THUMB2_KERNEL
-	ldr	r0, [r4, #-4]
-#else
-	mov	r1, #2
-	ldrh	r0, [r4, #-2]			@ Thumb instruction at LR - 2
-	cmp	r0, #0xe800			@ 32-bit instruction if xx >= 0
-	blo	__und_svc_fault
-	ldrh	r9, [r4]			@ bottom 16 bits
-	add	r4, r4, #2
-	str	r4, [sp, #S_PC]
-	orr	r0, r9, r0, lsl #16
-#endif
-	badr	r9, __und_svc_finish
-	mov	r2, r4
-	bl	call_fpe
 
 	mov	r1, #4				@ PC correction to apply
-__und_svc_fault:
+ THUMB(	tst	r5, #PSR_T_BIT		)	@ exception taken in Thumb mode?
+ THUMB(	movne	r1, #2			)	@ if so, fix up PC correction
 	mov	r0, sp				@ struct pt_regs *regs
 	bl	__und_fault
 
diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S
index 4fcff9f59947..d5837bf05a9a 100644
--- a/arch/arm/vfp/vfphw.S
+++ b/arch/arm/vfp/vfphw.S
@@ -79,11 +79,6 @@ ENTRY(vfp_support_entry)
 	DBGSTR3	"instr %08x pc %08x state %p", r0, r2, r10
 
 	.fpu	vfpv2
-	ldr	r3, [sp, #S_PSR]	@ Neither lazy restore nor FP exceptions
-	and	r3, r3, #MODE_MASK	@ are supported in kernel mode
-	teq	r3, #USR_MODE
-	bne	vfp_kmode_exception	@ Returns through lr
-
 	VFPFMRX	r1, FPEXC		@ Is the VFP enabled?
 	DBGSTR1	"fpexc %08x", r1
 	tst	r1, #FPEXC_EN
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index 8c9e7f9f0277..c3b6451c18bd 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -23,6 +23,7 @@
 #include <asm/cputype.h>
 #include <asm/system_info.h>
 #include <asm/thread_notify.h>
+#include <asm/traps.h>
 #include <asm/vfp.h>
 
 #include "vfpinstr.h"
@@ -642,7 +643,9 @@ static int vfp_starting_cpu(unsigned int unused)
 	return 0;
 }
 
-void vfp_kmode_exception(void)
+#ifdef CONFIG_KERNEL_MODE_NEON
+
+static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr)
 {
 	/*
 	 * If we reach this point, a floating point exception has been raised
@@ -660,9 +663,51 @@ void vfp_kmode_exception(void)
 		pr_crit("BUG: unsupported FP instruction in kernel mode\n");
 	else
 		pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n");
+	pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC));
+	return 1;
 }
 
-#ifdef CONFIG_KERNEL_MODE_NEON
+static struct undef_hook vfp_kmode_exception_hook[] = {{
+	.instr_mask	= 0xfe000000,
+	.instr_val	= 0xf2000000,
+	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
+	.cpsr_val	= SVC_MODE,
+	.fn		= vfp_kmode_exception,
+}, {
+	.instr_mask	= 0xff100000,
+	.instr_val	= 0xf4000000,
+	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
+	.cpsr_val	= SVC_MODE,
+	.fn		= vfp_kmode_exception,
+}, {
+	.instr_mask	= 0xef000000,
+	.instr_val	= 0xef000000,
+	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
+	.cpsr_val	= SVC_MODE | PSR_T_BIT,
+	.fn		= vfp_kmode_exception,
+}, {
+	.instr_mask	= 0xff100000,
+	.instr_val	= 0xf9000000,
+	.cpsr_mask	= MODE_MASK | PSR_T_BIT,
+	.cpsr_val	= SVC_MODE | PSR_T_BIT,
+	.fn		= vfp_kmode_exception,
+}, {
+	.instr_mask	= 0x0c000e00,
+	.instr_val	= 0x0c000a00,
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= SVC_MODE,
+	.fn		= vfp_kmode_exception,
+}};
+
+static int __init vfp_kmode_exception_hook_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++)
+		register_undef_hook(&vfp_kmode_exception_hook[i]);
+	return 0;
+}
+core_initcall(vfp_kmode_exception_hook_init);
 
 /*
  * Kernel-side NEON support functions
-- 
cgit v1.2.3


From e64ab473dddaffdfc4bd0b385204f472f2cb00d6 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@fluxnic.net>
Date: Mon, 30 Nov 2020 22:42:36 +0100
Subject: ARM: 9034/1: __div64_32(): straighten up inline asm constraints

The ARM version of __div64_32() encapsulates a call to __do_div64 with
non-standard argument passing. In particular, __n is a 64-bit input
argument assigned to r0-r1 and __rem is an output argument sharing half
of that r0-r1 register pair.

With __n being an input argument, the compiler is in its right to
presume that r0-r1 would still hold the value of __n past the inline
assembly statement. Normally, the compiler would have assigned non
overlapping registers to __n and __rem if the value for __n is needed
again.

However, here we enforce our own register assignment and gcc fails to
notice the conflict. In practice this doesn't cause any problem as __n
is considered dead after the asm statement and *n is overwritten.
However this is not always guaranteed and clang rightfully complains.

Let's fix it properly by making __n into an input-output variable. This
makes it clear that those registers representing __n have been modified.
Then we can extract __rem as the high part of __n with plain C code.

This asm constraint "abuse" was likely relied upon back when gcc didn't
handle 64-bit values optimally. Turns out that gcc is now able to
optimize things and produces the same code with this patch applied.

Reported-by: Antony Yu <swpenim@gmail.com>
Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/div64.h | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h
index 898e9c78a7e7..595e538f5bfb 100644
--- a/arch/arm/include/asm/div64.h
+++ b/arch/arm/include/asm/div64.h
@@ -21,29 +21,20 @@
  * assembly implementation with completely non standard calling convention
  * for arguments and results (beware).
  */
-
-#ifdef __ARMEB__
-#define __xh "r0"
-#define __xl "r1"
-#else
-#define __xl "r0"
-#define __xh "r1"
-#endif
-
 static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
 {
 	register unsigned int __base      asm("r4") = base;
 	register unsigned long long __n   asm("r0") = *n;
 	register unsigned long long __res asm("r2");
-	register unsigned int __rem       asm(__xh);
-	asm(	__asmeq("%0", __xh)
+	unsigned int __rem;
+	asm(	__asmeq("%0", "r0")
 		__asmeq("%1", "r2")
-		__asmeq("%2", "r0")
-		__asmeq("%3", "r4")
+		__asmeq("%2", "r4")
 		"bl	__do_div64"
-		: "=r" (__rem), "=r" (__res)
-		: "r" (__n), "r" (__base)
+		: "+r" (__n), "=r" (__res)
+		: "r" (__base)
 		: "ip", "lr", "cc");
+	__rem = __n >> 32;
 	*n = __res;
 	return __rem;
 }
-- 
cgit v1.2.3


From 3cce9d44321e460e7c88cdec4e4537a6e9ad7c0d Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Sat, 19 Dec 2020 16:29:58 +0100
Subject: ARM: 9044/1: vfp: use undef hook for VFP support detection

Commit f77ac2e378be9dd6 ("ARM: 9030/1: entry: omit FP emulation for UND
exceptions taken in kernel mode") failed to take into account that there
is in fact a case where we relied on this code path: during boot, the
VFP detection code issues a read of FPSID, which will trigger an undef
exception on cores that lack VFP support.

So let's reinstate this logic using an undef hook which is registered
only for the duration of the initcall to vpf_init(), and which sets
VFP_arch to a non-zero value - as before - if no VFP support is present.

Fixes: f77ac2e378be9dd6 ("ARM: 9030/1: entry: omit FP emulation for UND ...")
Reported-by: "kernelci.org bot" <bot@kernelci.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/vfp/entry.S     | 17 -----------------
 arch/arm/vfp/vfpmodule.c | 25 ++++++++++++++++++++-----
 2 files changed, 20 insertions(+), 22 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S
index 0186cf9da890..27b0a1f27fbd 100644
--- a/arch/arm/vfp/entry.S
+++ b/arch/arm/vfp/entry.S
@@ -37,20 +37,3 @@ ENDPROC(vfp_null_entry)
 	.align	2
 .LCvfp:
 	.word	vfp_vector
-
-@ This code is called if the VFP does not exist. It needs to flag the
-@ failure to the VFP initialisation code.
-
-	__INIT
-ENTRY(vfp_testing_entry)
-	dec_preempt_count_ti r10, r4
-	ldr	r0, VFP_arch_address
-	str	r0, [r0]		@ set to non-zero value
-	ret	r9			@ we have handled the fault
-ENDPROC(vfp_testing_entry)
-
-	.align	2
-VFP_arch_address:
-	.word	VFP_arch
-
-	__FINIT
diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c
index c3b6451c18bd..2cb355c1b5b7 100644
--- a/arch/arm/vfp/vfpmodule.c
+++ b/arch/arm/vfp/vfpmodule.c
@@ -32,7 +32,6 @@
 /*
  * Our undef handlers (in entry.S)
  */
-asmlinkage void vfp_testing_entry(void);
 asmlinkage void vfp_support_entry(void);
 asmlinkage void vfp_null_entry(void);
 
@@ -43,7 +42,7 @@ asmlinkage void (*vfp_vector)(void) = vfp_null_entry;
  * Used in startup: set to non-zero if VFP checks fail
  * After startup, holds VFP architecture
  */
-unsigned int VFP_arch;
+static unsigned int __initdata VFP_arch;
 
 /*
  * The pointer to the vfpstate structure of the thread which currently
@@ -437,7 +436,7 @@ static void vfp_enable(void *unused)
  * present on all CPUs within a SMP complex. Needs to be called prior to
  * vfp_init().
  */
-void vfp_disable(void)
+void __init vfp_disable(void)
 {
 	if (VFP_arch) {
 		pr_debug("%s: should be called prior to vfp_init\n", __func__);
@@ -707,7 +706,7 @@ static int __init vfp_kmode_exception_hook_init(void)
 		register_undef_hook(&vfp_kmode_exception_hook[i]);
 	return 0;
 }
-core_initcall(vfp_kmode_exception_hook_init);
+subsys_initcall(vfp_kmode_exception_hook_init);
 
 /*
  * Kernel-side NEON support functions
@@ -753,6 +752,21 @@ EXPORT_SYMBOL(kernel_neon_end);
 
 #endif /* CONFIG_KERNEL_MODE_NEON */
 
+static int __init vfp_detect(struct pt_regs *regs, unsigned int instr)
+{
+	VFP_arch = UINT_MAX;	/* mark as not present */
+	regs->ARM_pc += 4;
+	return 0;
+}
+
+static struct undef_hook vfp_detect_hook __initdata = {
+	.instr_mask	= 0x0c000e00,
+	.instr_val	= 0x0c000a00,
+	.cpsr_mask	= MODE_MASK,
+	.cpsr_val	= SVC_MODE,
+	.fn		= vfp_detect,
+};
+
 /*
  * VFP support code initialisation.
  */
@@ -773,10 +787,11 @@ static int __init vfp_init(void)
 	 * The handler is already setup to just log calls, so
 	 * we just need to read the VFPSID register.
 	 */
-	vfp_vector = vfp_testing_entry;
+	register_undef_hook(&vfp_detect_hook);
 	barrier();
 	vfpsid = fmrx(FPSID);
 	barrier();
+	unregister_undef_hook(&vfp_detect_hook);
 	vfp_vector = vfp_null_entry;
 
 	pr_info("VFP support v0.3: ");
-- 
cgit v1.2.3


From 6c7a6d22fcef9181239ea7248c6b0c4117b9325e Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Fri, 27 Nov 2020 13:07:22 +0100
Subject: ARM: 9031/1: hyp-stub: remove unused .L__boot_cpu_mode_offset symbol

Commit aaac3733171fca94 ("ARM: kvm: replace open coded VA->PA calculations
with adr_l call") removed all uses of .L__boot_cpu_mode_offset, so there
is no longer a need to define it.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/hyp-stub.S | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S
index 26d8e03b1dd3..327f63ec42c6 100644
--- a/arch/arm/kernel/hyp-stub.S
+++ b/arch/arm/kernel/hyp-stub.S
@@ -228,12 +228,6 @@ ENTRY(__hyp_soft_restart)
 	ret	lr
 ENDPROC(__hyp_soft_restart)
 
-#ifndef ZIMAGE
-.align 2
-.L__boot_cpu_mode_offset:
-	.long	__boot_cpu_mode - .
-#endif
-
 .align 5
 ENTRY(__hyp_stub_vectors)
 __hyp_stub_reset:	W(b)	.
-- 
cgit v1.2.3


From 76460d613d9b4096f3567bd444e3fc275db1b96b Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 30 Nov 2020 10:28:20 +0100
Subject: ARM: 9032/1: arm/mm: Convert PUD level pgtable helper macros into
 functions

Macros used as functions can be problematic from the compiler perspective.
There was a build failure report caused primarily because of non reference
of an argument variable. Hence convert PUD level pgtable helper macros into
functions in order to avoid such problems in the future. In the process, it

fixes the argument variables sequence in set_pud() which probably remained
hidden for being a macro.

https://lore.kernel.org/linux-mm/202011020749.5XQ3Hfzc-lkp@intel.com/
https://lore.kernel.org/linux-mm/5fa49698.Vu2O3r+dU20UoEJ+%25lkp@intel.com/

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/include/asm/pgtable-2level.h | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h
index 3502c2f746ca..9d4f5eef410b 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -177,11 +177,28 @@
  * the pud: the pud entry is never bad, always exists, and can't be set or
  * cleared.
  */
-#define pud_none(pud)		(0)
-#define pud_bad(pud)		(0)
-#define pud_present(pud)	(1)
-#define pud_clear(pudp)		do { } while (0)
-#define set_pud(pud,pudp)	do { } while (0)
+static inline int pud_none(pud_t pud)
+{
+	return 0;
+}
+
+static inline int pud_bad(pud_t pud)
+{
+	return 0;
+}
+
+static inline int pud_present(pud_t pud)
+{
+	return 1;
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+}
+
+static inline void set_pud(pud_t *pudp, pud_t pud)
+{
+}
 
 static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
 {
-- 
cgit v1.2.3


From 27bde183b0d3b0e8e84c80db1864a5c7bd20b5e7 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 30 Nov 2020 11:24:09 +0100
Subject: ARM: 9033/1: arm/smp: Drop the macro S(x,s)

Mapping between IPI type index and its string is direct without requiring
an additional offset. Hence the existing macro S(x, s) is now redundant
and can just be dropped. This also makes the code clean and simple.

Cc: Marc Zyngier <maz@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/smp.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index 48099c6e1e4a..6ab2b0ad5f40 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -524,14 +524,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 }
 
 static const char *ipi_types[NR_IPI] __tracepoint_string = {
-#define S(x,s)	[x] = s
-	S(IPI_WAKEUP, "CPU wakeup interrupts"),
-	S(IPI_TIMER, "Timer broadcast interrupts"),
-	S(IPI_RESCHEDULE, "Rescheduling interrupts"),
-	S(IPI_CALL_FUNC, "Function call interrupts"),
-	S(IPI_CPU_STOP, "CPU stop interrupts"),
-	S(IPI_IRQ_WORK, "IRQ work interrupts"),
-	S(IPI_COMPLETION, "completion interrupts"),
+	[IPI_WAKEUP]		= "CPU wakeup interrupts",
+	[IPI_TIMER]		= "Timer broadcast interrupts",
+	[IPI_RESCHEDULE]	= "Rescheduling interrupts",
+	[IPI_CALL_FUNC]		= "Function call interrupts",
+	[IPI_CPU_STOP]		= "CPU stop interrupts",
+	[IPI_IRQ_WORK]		= "IRQ work interrupts",
+	[IPI_COMPLETION]	= "completion interrupts",
 };
 
 static void smp_cross_call(const struct cpumask *target, unsigned int ipinr);
-- 
cgit v1.2.3


From 0557ac83fd1a0a7cd6909665bad50006507115a0 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 4 Dec 2020 10:37:47 +0100
Subject: ARM: 9035/1: uncompress: Add be32tocpu macro

DTB stores all values as 32-bit big-endian integers.
Add a macro to convert such values to native CPU endianness, to reduce
duplication.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/head.S | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 2e04ec5b5446..14d1995106c2 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -165,6 +165,16 @@
 		orr	\res, \res, \tmp1, lsl #24
 		.endm
 
+		.macro	be32tocpu, val, tmp
+#ifndef __ARMEB__
+		/* convert to little endian */
+		eor	\tmp, \val, \val, ror #16
+		bic	\tmp, \tmp, #0x00ff0000
+		mov	\val, \val, ror #8
+		eor	\val, \val, \tmp, lsr #8
+#endif
+		.endm
+
 		.section ".start", "ax"
 /*
  * sort out different calling conventions
@@ -345,13 +355,7 @@ restart:	adr	r0, LC1
 
 		/* Get the initial DTB size */
 		ldr	r5, [r6, #4]
-#ifndef __ARMEB__
-		/* convert to little endian */
-		eor	r1, r5, r5, ror #16
-		bic	r1, r1, #0x00ff0000
-		mov	r5, r5, ror #8
-		eor	r5, r5, r1, lsr #8
-#endif
+		be32tocpu r5, r1
 		dbgadtb	r6, r5
 		/* 50% DTB growth should be good enough */
 		add	r5, r5, r5, lsr #1
@@ -403,13 +407,7 @@ restart:	adr	r0, LC1
 
 		/* Get the current DTB size */
 		ldr	r5, [r6, #4]
-#ifndef __ARMEB__
-		/* convert r5 (dtb size) to little endian */
-		eor	r1, r5, r5, ror #16
-		bic	r1, r1, #0x00ff0000
-		mov	r5, r5, ror #8
-		eor	r5, r5, r1, lsr #8
-#endif
+		be32tocpu r5, r1
 
 		/* preserve 64-bit alignment */
 		add	r5, r5, #7
-- 
cgit v1.2.3


From 1ecec38547d415054fdb63a231234f44396b6d06 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 4 Dec 2020 10:37:14 +0100
Subject: ARM: 9036/1: uncompress: Fix dbgadtb size parameter name

The dbgadtb macro is passed the size of the appended DTB, not the end
address.

Fixes: c03e41470e901123 ("ARM: 9010/1: uncompress: Print the location of appended DTB")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/head.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 14d1995106c2..4fb699769e6a 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -116,7 +116,7 @@
 		/*
 		 * Debug print of the final appended DTB location
 		 */
-		.macro dbgadtb, begin, end
+		.macro dbgadtb, begin, size
 #ifdef DEBUG
 		kputc   #'D'
 		kputc   #'T'
@@ -129,7 +129,7 @@
 		kputc	#'('
 		kputc	#'0'
 		kputc	#'x'
-		kphex	\end, 8		/* End of appended DTB */
+		kphex	\size, 8	/* Size of appended DTB */
 		kputc	#')'
 		kputc	#'\n'
 #endif
-- 
cgit v1.2.3


From 551b39efc6ffdc7a881122fbac0caa2a27a464d8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 4 Dec 2020 10:39:10 +0100
Subject: ARM: 9037/1: uncompress: Add OF_DT_MAGIC macro

The DTB magic marker is stored as a 32-bit big-endian value, and thus
depends on the CPU's endianness.  Add a macro to define this value in
native endianness, to reduce #ifdef clutter and (future) duplication.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/boot/compressed/head.S | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S
index 4fb699769e6a..8d546cb10921 100644
--- a/arch/arm/boot/compressed/head.S
+++ b/arch/arm/boot/compressed/head.S
@@ -11,6 +11,12 @@
 
 #include "efi-header.S"
 
+#ifdef __ARMEB__
+#define OF_DT_MAGIC 0xd00dfeed
+#else
+#define OF_DT_MAGIC 0xedfe0dd0
+#endif
+
  AR_CLASS(	.arch	armv7-a	)
  M_CLASS(	.arch	armv7-m	)
 
@@ -335,11 +341,7 @@ restart:	adr	r0, LC1
  */
 
 		ldr	lr, [r6, #0]
-#ifndef __ARMEB__
-		ldr	r1, =0xedfe0dd0		@ sig is 0xd00dfeed big endian
-#else
-		ldr	r1, =0xd00dfeed
-#endif
+		ldr	r1, =OF_DT_MAGIC
 		cmp	lr, r1
 		bne	dtb_check_done		@ not found
 
-- 
cgit v1.2.3


From 0cda9bc15dfc459bd178d6ba93389df52dd57957 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 8 Dec 2020 01:34:16 +0100
Subject: ARM: 9038/1: Link with '-z norelro'

When linking a multi_v7_defconfig + CONFIG_KASAN=y kernel with
LD=ld.lld, the following error occurs:

$ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- LLVM=1 zImage
ld.lld: error: section: .exit.data is not contiguous with other relro sections

LLD defaults to '-z relro', which is unneeded for the kernel because
program headers are not used nor is there any position independent code
generation or linking for ARM. Add '-z norelro' to LDFLAGS_vmlinux to
avoid this error.

Link: https://github.com/ClangBuiltLinux/linux/issues/1189

Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm')

diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 4d76eab2b22d..3c0a64cefe52 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -10,7 +10,7 @@
 #
 # Copyright (C) 1995-2001 by Russell King
 
-LDFLAGS_vmlinux	:= --no-undefined -X --pic-veneer
+LDFLAGS_vmlinux	:= --no-undefined -X --pic-veneer -z norelro
 ifeq ($(CONFIG_CPU_ENDIAN_BE8),y)
 LDFLAGS_vmlinux	+= --be8
 KBUILD_LDFLAGS_MODULE	+= --be8
-- 
cgit v1.2.3


From 10fce53c0ef8f6e79115c3d9e0d7ea1338c3fa37 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 17 Nov 2020 08:41:01 +0100
Subject: ARM: 9027/1: head.S: explicitly map DT even if it lives in the first
 physical section

The early ATAGS/DT mapping code uses SECTION_SHIFT to mask low order
bits of R2, and decides that no ATAGS/DTB were provided if the resulting
value is 0x0.

This means that on systems where DRAM starts at 0x0 (such as Raspberry
Pi), no explicit mapping of the DT will be created if R2 points into the
first 1 MB section of memory. This was not a problem before, because the
decompressed kernel is loaded at the base of DRAM and mapped using
sections as well, and so as long as the DT is referenced via a virtual
address that uses the same translation (the linear map, in this case),
things work fine.

However, commit 7a1be318f579 ("9012/1: move device tree mapping out of
linear region") changes this, and now the DT is referenced via a virtual
address that is disjoint from the linear mapping of DRAM, and so we need
the early code to create the DT mapping unconditionally.

So let's create the early DT mapping for any value of R2 != 0x0.

Reported-by: "kernelci.org bot" <bot@kernelci.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
---
 arch/arm/kernel/head.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/arm')

diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 9b18d8c66129..26ffd462a975 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -274,10 +274,10 @@ __create_page_tables:
 	 * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
 	 */
 	mov	r0, r2, lsr #SECTION_SHIFT
-	movs	r0, r0, lsl #SECTION_SHIFT
+	cmp	r2, #0
 	ldrne	r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER)
 	addne	r3, r3, r4
-	orrne	r6, r7, r0
+	orrne	r6, r7, r0, lsl #SECTION_SHIFT
 	strne	r6, [r3], #1 << PMD_ORDER
 	addne	r6, r6, #1 << SECTION_SHIFT
 	strne	r6, [r3]
-- 
cgit v1.2.3