From e9a2f8b599d0bc22a1b13e69527246ac39c697b4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 11 Oct 2020 10:20:16 +0100 Subject: ARM: 9011/1: centralize phys-to-virt conversion of DT/ATAGS address Before moving the DT mapping out of the linear region, let's prepare for this change by removing all the phys-to-virt translations of the __atags_pointer variable, and perform this translation only once at setup time. Tested-by: Linus Walleij Reviewed-by: Linus Walleij Acked-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/prom.h | 4 ++-- arch/arm/kernel/atags.h | 4 ++-- arch/arm/kernel/atags_parse.c | 6 +++--- arch/arm/kernel/devtree.c | 6 +++--- arch/arm/kernel/setup.c | 14 +++++++++----- arch/arm/mm/mmu.c | 4 ++-- 6 files changed, 21 insertions(+), 17 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/prom.h b/arch/arm/include/asm/prom.h index 1e36c40533c1..402e3f34c7ed 100644 --- a/arch/arm/include/asm/prom.h +++ b/arch/arm/include/asm/prom.h @@ -9,12 +9,12 @@ #ifdef CONFIG_OF -extern const struct machine_desc *setup_machine_fdt(unsigned int dt_phys); +extern const struct machine_desc *setup_machine_fdt(void *dt_virt); extern void __init arm_dt_init_cpu_maps(void); #else /* CONFIG_OF */ -static inline const struct machine_desc *setup_machine_fdt(unsigned int dt_phys) +static inline const struct machine_desc *setup_machine_fdt(void *dt_virt) { return NULL; } diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index 067e12edc341..f2819c25b602 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -2,11 +2,11 @@ void convert_to_tag_list(struct tag *tags); #ifdef CONFIG_ATAGS -const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, +const struct machine_desc *setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr); #else static inline const struct machine_desc * __init __noreturn -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *__atags_vaddr, unsigned int machine_nr) { early_print("no ATAGS support: can't continue\n"); while (true); diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c index 6c12d9fe694e..373b61f9a4f0 100644 --- a/arch/arm/kernel/atags_parse.c +++ b/arch/arm/kernel/atags_parse.c @@ -174,7 +174,7 @@ static void __init squash_mem_tags(struct tag *tag) } const struct machine_desc * __init -setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +setup_machine_tags(void *atags_vaddr, unsigned int machine_nr) { struct tag *tags = (struct tag *)&default_tags; const struct machine_desc *mdesc = NULL, *p; @@ -195,8 +195,8 @@ setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) if (!mdesc) return NULL; - if (__atags_pointer) - tags = phys_to_virt(__atags_pointer); + if (atags_vaddr) + tags = atags_vaddr; else if (mdesc->atag_offset) tags = (void *)(PAGE_OFFSET + mdesc->atag_offset); diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c index 7f0745a97e20..28311dd0fee6 100644 --- a/arch/arm/kernel/devtree.c +++ b/arch/arm/kernel/devtree.c @@ -203,12 +203,12 @@ static const void * __init arch_get_next_mach(const char *const **match) /** * setup_machine_fdt - Machine setup when an dtb was passed to the kernel - * @dt_phys: physical address of dt blob + * @dt_virt: virtual address of dt blob * * If a dtb was passed to the kernel in r2, then use it to choose the * correct machine_desc and to setup the system. */ -const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) +const struct machine_desc * __init setup_machine_fdt(void *dt_virt) { const struct machine_desc *mdesc, *mdesc_best = NULL; @@ -221,7 +221,7 @@ const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) mdesc_best = &__mach_desc_GENERIC_DT; #endif - if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) + if (!dt_virt || !early_init_dt_verify(dt_virt)) return NULL; mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3f65d0ac9f63..306bcd9844be 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -89,6 +89,7 @@ unsigned int cacheid __read_mostly; EXPORT_SYMBOL(cacheid); unsigned int __atags_pointer __initdata; +void *atags_vaddr __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); @@ -1081,19 +1082,22 @@ void __init hyp_mode_check(void) void __init setup_arch(char **cmdline_p) { - const struct machine_desc *mdesc; + const struct machine_desc *mdesc = NULL; + + if (__atags_pointer) + atags_vaddr = phys_to_virt(__atags_pointer); setup_processor(); - mdesc = setup_machine_fdt(__atags_pointer); + if (atags_vaddr) + mdesc = setup_machine_fdt(atags_vaddr); if (!mdesc) - mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type); + mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type); if (!mdesc) { early_print("\nError: invalid dtb and unrecognized/unsupported machine ID\n"); early_print(" r1=0x%08x, r2=0x%08x\n", __machine_arch_type, __atags_pointer); if (__atags_pointer) - early_print(" r2[]=%*ph\n", 16, - phys_to_virt(__atags_pointer)); + early_print(" r2[]=%*ph\n", 16, atags_vaddr); dump_machine_table(); } diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index ab69250a86bc..55991fe60054 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1489,7 +1489,7 @@ static void __init map_lowmem(void) } #ifdef CONFIG_ARM_PV_FIXUP -extern unsigned long __atags_pointer; +extern void *atags_vaddr; typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); pgtables_remap lpae_pgtables_remap_asm; @@ -1520,7 +1520,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc) */ lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); pa_pgd = __pa(swapper_pg_dir); - boot_data = __va(__atags_pointer); + boot_data = atags_vaddr; barrier(); pr_info("Switching physical address space to 0x%08llx\n", -- cgit v1.2.3 From 7a1be318f5795cb66fa0dc86b3ace427fe68057f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 11 Oct 2020 10:21:37 +0100 Subject: ARM: 9012/1: move device tree mapping out of linear region On ARM, setting up the linear region is tricky, given the constraints around placement and alignment of the memblocks, and how the kernel itself as well as the DT are placed in physical memory. Let's simplify matters a bit, by moving the device tree mapping to the top of the address space, right between the end of the vmalloc region and the start of the the fixmap region, and create a read-only mapping for it that is independent of the size of the linear region, and how it is organized. Since this region was formerly used as a guard region, which will now be populated fully on LPAE builds by this read-only mapping (which will still be able to function as a guard region for stray writes), bump the start of the [underutilized] fixmap region by 512 KB as well, to ensure that there is always a proper guard region here. Doing so still leaves ample room for the fixmap space, even with NR_CPUS set to its maximum value of 32. Tested-by: Linus Walleij Reviewed-by: Linus Walleij Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/fixmap.h | 2 +- arch/arm/include/asm/memory.h | 5 +++++ arch/arm/kernel/head.S | 5 ++--- arch/arm/kernel/setup.c | 11 ++++++++--- arch/arm/mm/init.c | 1 - arch/arm/mm/mmu.c | 20 ++++++++++++++------ arch/arm/mm/pv-fixup-asm.S | 4 ++-- 7 files changed, 32 insertions(+), 16 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index fc56fc3e1931..9575b404019c 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -2,7 +2,7 @@ #ifndef _ASM_FIXMAP_H #define _ASM_FIXMAP_H -#define FIXADDR_START 0xffc00000UL +#define FIXADDR_START 0xffc80000UL #define FIXADDR_END 0xfff00000UL #define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 99035b5891ef..bb79e52aeb90 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -67,6 +67,10 @@ */ #define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) +#define FDT_FIXED_BASE UL(0xff800000) +#define FDT_FIXED_SIZE (2 * PMD_SIZE) +#define FDT_VIRT_ADDR(physaddr) ((void *)(FDT_FIXED_BASE | (physaddr) % PMD_SIZE)) + #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* * Allow 16MB-aligned ioremap pages @@ -107,6 +111,7 @@ extern unsigned long vectors_base; #define MODULES_VADDR PAGE_OFFSET #define XIP_VIRT_ADDR(physaddr) (physaddr) +#define FDT_VIRT_ADDR(physaddr) ((void *)(physaddr)) #endif /* !CONFIG_MMU */ diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f8904227e7fd..9b18d8c66129 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -275,9 +275,8 @@ __create_page_tables: */ mov r0, r2, lsr #SECTION_SHIFT movs r0, r0, lsl #SECTION_SHIFT - subne r3, r0, r8 - addne r3, r3, #PAGE_OFFSET - addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) + ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER) + addne r3, r3, r4 orrne r6, r7, r0 strne r6, [r3], #1 << PMD_ORDER addne r6, r6, #1 << SECTION_SHIFT diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 306bcd9844be..694aa6b4bd03 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -89,7 +90,6 @@ unsigned int cacheid __read_mostly; EXPORT_SYMBOL(cacheid); unsigned int __atags_pointer __initdata; -void *atags_vaddr __initdata; unsigned int system_rev; EXPORT_SYMBOL(system_rev); @@ -1083,13 +1083,18 @@ void __init hyp_mode_check(void) void __init setup_arch(char **cmdline_p) { const struct machine_desc *mdesc = NULL; + void *atags_vaddr = NULL; if (__atags_pointer) - atags_vaddr = phys_to_virt(__atags_pointer); + atags_vaddr = FDT_VIRT_ADDR(__atags_pointer); setup_processor(); - if (atags_vaddr) + if (atags_vaddr) { mdesc = setup_machine_fdt(atags_vaddr); + if (mdesc) + memblock_reserve(__atags_pointer, + fdt_totalsize(atags_vaddr)); + } if (!mdesc) mdesc = setup_machine_tags(atags_vaddr, __machine_arch_type); if (!mdesc) { diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index d57112a276f5..a391804c7ce3 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -223,7 +223,6 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) if (mdesc->reserve) mdesc->reserve(); - early_init_fdt_reserve_self(); early_init_fdt_scan_reserved_mem(); /* reserve memory for DMA contiguous allocations */ diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 55991fe60054..fa259825310c 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -39,6 +39,8 @@ #include "mm.h" #include "tcm.h" +extern unsigned long __atags_pointer; + /* * empty_zero_page is a special page that is used for * zero-initialized data and COW. @@ -946,7 +948,7 @@ static void __init create_mapping(struct map_desc *md) return; } - if ((md->type == MT_DEVICE || md->type == MT_ROM) && + if (md->type == MT_DEVICE && md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", @@ -1333,6 +1335,15 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); + if (__atags_pointer) { + /* create a read-only mapping of the device tree */ + map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK); + map.virtual = FDT_FIXED_BASE; + map.length = FDT_FIXED_SIZE; + map.type = MT_ROM; + create_mapping(&map); + } + /* * Map the kernel if it is XIP. * It is always first in the modulearea. @@ -1489,8 +1500,7 @@ static void __init map_lowmem(void) } #ifdef CONFIG_ARM_PV_FIXUP -extern void *atags_vaddr; -typedef void pgtables_remap(long long offset, unsigned long pgd, void *bdata); +typedef void pgtables_remap(long long offset, unsigned long pgd); pgtables_remap lpae_pgtables_remap_asm; /* @@ -1503,7 +1513,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) unsigned long pa_pgd; unsigned int cr, ttbcr; long long offset; - void *boot_data; if (!mdesc->pv_fixup) return; @@ -1520,7 +1529,6 @@ static void __init early_paging_init(const struct machine_desc *mdesc) */ lpae_pgtables_remap = (pgtables_remap *)(unsigned long)__pa(lpae_pgtables_remap_asm); pa_pgd = __pa(swapper_pg_dir); - boot_data = atags_vaddr; barrier(); pr_info("Switching physical address space to 0x%08llx\n", @@ -1556,7 +1564,7 @@ static void __init early_paging_init(const struct machine_desc *mdesc) * needs to be assembly. It's fairly simple, as we're using the * temporary tables setup by the initial assembly code. */ - lpae_pgtables_remap(offset, pa_pgd, boot_data); + lpae_pgtables_remap(offset, pa_pgd); /* Re-enable the caches and cacheable TLB walks */ asm volatile("mcr p15, 0, %0, c2, c0, 2" : : "r" (ttbcr)); diff --git a/arch/arm/mm/pv-fixup-asm.S b/arch/arm/mm/pv-fixup-asm.S index 8eade0416739..5c5e1952000a 100644 --- a/arch/arm/mm/pv-fixup-asm.S +++ b/arch/arm/mm/pv-fixup-asm.S @@ -39,8 +39,8 @@ ENTRY(lpae_pgtables_remap_asm) /* Update level 2 entries for the boot data */ add r7, r2, #0x1000 - add r7, r7, r3, lsr #SECTION_SHIFT - L2_ORDER - bic r7, r7, #(1 << L2_ORDER) - 1 + movw r3, #FDT_FIXED_BASE >> (SECTION_SHIFT - L2_ORDER) + add r7, r7, r3 ldrd r4, r5, [r7] adds r4, r4, r0 adc r5, r5, r1 -- cgit v1.2.3 From d5d44e7e3507b0ad868f68e0c5bca6a57afa1b8b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:50:09 +0100 Subject: ARM: 9013/2: Disable KASan instrumentation for some code Disable instrumentation for arch/arm/boot/compressed/* since that code is executed before the kernel has even set up its mappings and definately out of scope for KASan. Disable instrumentation of arch/arm/vdso/* because that code is not linked with the kernel image, so the KASan management code would fail to link. Disable instrumentation of arch/arm/mm/physaddr.c. See commit ec6d06efb0ba ("arm64: Add support for CONFIG_DEBUG_VIRTUAL") for more details. Disable kasan check in the function unwind_pop_register because it does not matter that kasan checks failed when unwind_pop_register() reads the stack memory of a task. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Florian Fainelli Reported-by: Marc Zyngier Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/boot/compressed/Makefile | 1 + arch/arm/kernel/unwind.c | 6 +++++- arch/arm/mm/Makefile | 2 ++ arch/arm/vdso/Makefile | 2 ++ 4 files changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 47f001ca5499..a815b1ae990d 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -24,6 +24,7 @@ OBJS += hyp-stub.o endif GCOV_PROFILE := n +KASAN_SANITIZE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index d2bd0df2318d..f35eb584a18a 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -236,7 +236,11 @@ static int unwind_pop_register(struct unwind_ctrl_block *ctrl, if (*vsp >= (unsigned long *)ctrl->sp_high) return -URC_FAILURE; - ctrl->vrs[reg] = *(*vsp)++; + /* Use READ_ONCE_NOCHECK here to avoid this memory access + * from being tracked by KASAN. + */ + ctrl->vrs[reg] = READ_ONCE_NOCHECK(*(*vsp)); + (*vsp)++; return URC_OK; } diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 7cb1699fbfc4..99699c32d8a5 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -7,6 +7,7 @@ obj-y := extable.o fault.o init.o iomap.o obj-y += dma-mapping$(MMUEXT).o obj-$(CONFIG_MMU) += fault-armv.o flush.o idmap.o ioremap.o \ mmap.o pgd.o mmu.o pageattr.o +KASAN_SANITIZE_mmu.o := n ifneq ($(CONFIG_MMU),y) obj-y += nommu.o @@ -16,6 +17,7 @@ endif obj-$(CONFIG_ARM_PTDUMP_CORE) += dump.o obj-$(CONFIG_ARM_PTDUMP_DEBUGFS) += ptdump_debugfs.o obj-$(CONFIG_MODULES) += proc-syms.o +KASAN_SANITIZE_physaddr.o := n obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_ALIGNMENT_TRAP) += alignment.o diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 150ce6e6a5d3..b558bee0e1f6 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -42,6 +42,8 @@ GCOV_PROFILE := n # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n +KASAN_SANITIZE := n + # Force dependency $(obj)/vdso.o : $(obj)/vdso.so -- cgit v1.2.3 From d6d51a96c7d63b7450860a3037f2d62388286a52 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:52:08 +0100 Subject: ARM: 9014/2: Replace string mem* functions for KASan Functions like memset()/memmove()/memcpy() do a lot of memory accesses. If a bad pointer is passed to one of these functions it is important to catch this. Compiler instrumentation cannot do this since these functions are written in assembly. KASan replaces these memory functions with instrumented variants. The original functions are declared as weak symbols so that the strong definitions in mm/kasan/kasan.c can replace them. The original functions have aliases with a '__' prefix in their name, so we can call the non-instrumented variant if needed. We must use __memcpy()/__memset() in place of memcpy()/memset() when we copy .data to RAM and when we clear .bss, because kasan_early_init cannot be called before the initialization of .data and .bss. For the kernel compression and EFI libstub's custom string libraries we need a special quirk: even if these are built without KASan enabled, they rely on the global headers for their custom string libraries, which means that e.g. memcpy() will be defined to __memcpy() and we get link failures. Since these implementations are written i C rather than assembly we use e.g. __alias(memcpy) to redirected any users back to the local implementation. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Russell King - ARM Linux Signed-off-by: Ahmad Fatoum Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/boot/compressed/string.c | 19 +++++++++++++++++++ arch/arm/include/asm/string.h | 26 ++++++++++++++++++++++++++ arch/arm/kernel/head-common.S | 4 ++-- arch/arm/lib/memcpy.S | 3 +++ arch/arm/lib/memmove.S | 5 ++++- arch/arm/lib/memset.S | 3 +++ 6 files changed, 57 insertions(+), 3 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/string.c b/arch/arm/boot/compressed/string.c index ade5079bebbf..8c0fa276d994 100644 --- a/arch/arm/boot/compressed/string.c +++ b/arch/arm/boot/compressed/string.c @@ -7,6 +7,25 @@ #include +/* + * The decompressor is built without KASan but uses the same redirects as the + * rest of the kernel when CONFIG_KASAN is enabled, defining e.g. memcpy() + * to __memcpy() but since we are not linking with the main kernel string + * library in the decompressor, that will lead to link failures. + * + * Undefine KASan's versions, define the wrapped functions and alias them to + * the right names so that when e.g. __memcpy() appear in the code, it will + * still be linked to this local version of memcpy(). + */ +#ifdef CONFIG_KASAN +#undef memcpy +#undef memmove +#undef memset +void *__memcpy(void *__dest, __const void *__src, size_t __n) __alias(memcpy); +void *__memmove(void *__dest, __const void *__src, size_t count) __alias(memmove); +void *__memset(void *s, int c, size_t count) __alias(memset); +#endif + void *memcpy(void *__dest, __const void *__src, size_t __n) { int i = 0; diff --git a/arch/arm/include/asm/string.h b/arch/arm/include/asm/string.h index 111a1d8a41dd..6c607c68f3ad 100644 --- a/arch/arm/include/asm/string.h +++ b/arch/arm/include/asm/string.h @@ -5,6 +5,9 @@ /* * We don't do inline string functions, since the * optimised inline asm versions are not small. + * + * The __underscore versions of some functions are for KASan to be able + * to replace them with instrumented versions. */ #define __HAVE_ARCH_STRRCHR @@ -15,15 +18,18 @@ extern char * strchr(const char * s, int c); #define __HAVE_ARCH_MEMCPY extern void * memcpy(void *, const void *, __kernel_size_t); +extern void *__memcpy(void *dest, const void *src, __kernel_size_t n); #define __HAVE_ARCH_MEMMOVE extern void * memmove(void *, const void *, __kernel_size_t); +extern void *__memmove(void *dest, const void *src, __kernel_size_t n); #define __HAVE_ARCH_MEMCHR extern void * memchr(const void *, int, __kernel_size_t); #define __HAVE_ARCH_MEMSET extern void * memset(void *, int, __kernel_size_t); +extern void *__memset(void *s, int c, __kernel_size_t n); #define __HAVE_ARCH_MEMSET32 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); @@ -39,4 +45,24 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) return __memset64(p, v, n * 8, v >> 32); } +/* + * For files that are not instrumented (e.g. mm/slub.c) we + * must use non-instrumented versions of the mem* + * functions named __memcpy() etc. All such kernel code has + * been tagged with KASAN_SANITIZE_file.o = n, which means + * that the address sanitization argument isn't passed to the + * compiler, and __SANITIZE_ADDRESS__ is not set. As a result + * these defines kick in. + */ +#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) +#define memcpy(dst, src, len) __memcpy(dst, src, len) +#define memmove(dst, src, len) __memmove(dst, src, len) +#define memset(s, c, n) __memset(s, c, n) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + +#endif + #endif diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 4a3982812a40..6840c7c60a85 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -95,7 +95,7 @@ __mmap_switched: THUMB( ldmia r4!, {r0, r1, r2, r3} ) THUMB( mov sp, r3 ) sub r2, r2, r1 - bl memcpy @ copy .data to RAM + bl __memcpy @ copy .data to RAM #endif ARM( ldmia r4!, {r0, r1, sp} ) @@ -103,7 +103,7 @@ __mmap_switched: THUMB( mov sp, r3 ) sub r2, r1, r0 mov r1, #0 - bl memset @ clear .bss + bl __memset @ clear .bss ldmia r4, {r0, r1, r2, r3} str r9, [r0] @ Save processor ID diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 09a333153dc6..ad4625d16e11 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -58,6 +58,8 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +.weak memcpy +ENTRY(__memcpy) ENTRY(mmiocpy) ENTRY(memcpy) @@ -65,3 +67,4 @@ ENTRY(memcpy) ENDPROC(memcpy) ENDPROC(mmiocpy) +ENDPROC(__memcpy) diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index b50e5770fb44..fd123ea5a5a4 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -24,12 +24,14 @@ * occurring in the opposite direction. */ +.weak memmove +ENTRY(__memmove) ENTRY(memmove) UNWIND( .fnstart ) subs ip, r0, r1 cmphi r2, ip - bls memcpy + bls __memcpy stmfd sp!, {r0, r4, lr} UNWIND( .fnend ) @@ -222,3 +224,4 @@ ENTRY(memmove) 18: backward_copy_shift push=24 pull=8 ENDPROC(memmove) +ENDPROC(__memmove) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 6ca4535c47fb..0e7ff0423f50 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -13,6 +13,8 @@ .text .align 5 +.weak memset +ENTRY(__memset) ENTRY(mmioset) ENTRY(memset) UNWIND( .fnstart ) @@ -132,6 +134,7 @@ UNWIND( .fnstart ) UNWIND( .fnend ) ENDPROC(memset) ENDPROC(mmioset) +ENDPROC(__memset) ENTRY(__memset32) UNWIND( .fnstart ) -- cgit v1.2.3 From c12366ba441da2f6f2b915410aca2b5b39c16514 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:53:46 +0100 Subject: ARM: 9015/2: Define the virtual space of KASan's shadow region Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB addressable by a 32bit architecture) out of the virtual address space to use as shadow memory for KASan as follows: +----+ 0xffffffff | | | | |-> Static kernel image (vmlinux) BSS and page table | |/ +----+ PAGE_OFFSET | | | | |-> Loadable kernel modules virtual address space area | |/ +----+ MODULES_VADDR = KASAN_SHADOW_END | | | | |-> The shadow area of kernel virtual address. | |/ +----+-> TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the | | shadow address of MODULES_VADDR | | | | | | | | |-> The user space area in lowmem. The kernel address | | | sanitizer do not use this space, nor does it map it. | | | | | | | | | | | | | |/ ------ 0 0 .. TASK_SIZE is the memory that can be used by shared userspace/kernelspace. It us used for userspace processes and for passing parameters and memory buffers in system calls etc. We do not need to shadow this area. KASAN_SHADOW_START: This value begins with the MODULE_VADDR's shadow address. It is the start of kernel virtual space. Since we have modules to load, we need to cover also that area with shadow memory so we can find memory bugs in modules. KASAN_SHADOW_END This value is the 0x100000000's shadow address: the mapping that would be after the end of the kernel memory at 0xffffffff. It is the end of kernel address sanitizer shadow area. It is also the start of the module area. KASAN_SHADOW_OFFSET: This value is used to map an address to the corresponding shadow address by the following formula: shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; As you would expect, >> 3 is equal to dividing by 8, meaning each byte in the shadow memory covers 8 bytes of kernel memory, so one bit shadow memory per byte of kernel memory is used. The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending on the VMSPLIT layout of the system: the kernel and userspace can split up lowmem in different ways according to needs, so we calculate the shadow offset depending on this. When kasan is enabled, the definition of TASK_SIZE is not an 8-bit rotated constant, so we need to modify the TASK_SIZE access code in the *.s file. The kernel and modules may use different amounts of memory, according to the VMSPLIT configuration, which in turn determines the PAGE_OFFSET. We use the following KASAN_SHADOW_OFFSETs depending on how the virtual memory is split up: - 0x1f000000 if we have 1G userspace / 3G kernelspace split: - The kernel address space is 3G (0xc0000000) - PAGE_OFFSET is then set to 0x40000000 so the kernel static image (vmlinux) uses addresses 0x40000000 .. 0xffffffff - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0x3f000000 so the modules use addresses 0x3f000000 .. 0x3fffffff - So the addresses 0x3f000000 .. 0xffffffff need to be covered with shadow memory. That is 0xc1000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x18200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0x26e00000, to KASAN_SHADOW_END at 0x3effffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0x3f000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0x26e00000 = (0x3f000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0x26e00000 - (0x3f000000 >> 3) KASAN_SHADOW_OFFSET = 0x26e00000 - 0x07e00000 KASAN_SHADOW_OFFSET = 0x1f000000 - 0x5f000000 if we have 2G userspace / 2G kernelspace split: - The kernel space is 2G (0x80000000) - PAGE_OFFSET is set to 0x80000000 so the kernel static image uses 0x80000000 .. 0xffffffff. - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0x7f000000 so the modules use addresses 0x7f000000 .. 0x7fffffff - So the addresses 0x7f000000 .. 0xffffffff need to be covered with shadow memory. That is 0x81000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x10200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0x6ee00000, to KASAN_SHADOW_END at 0x7effffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0x7f000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0x6ee00000 = (0x7f000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0x6ee00000 - (0x7f000000 >> 3) KASAN_SHADOW_OFFSET = 0x6ee00000 - 0x0fe00000 KASAN_SHADOW_OFFSET = 0x5f000000 - 0x9f000000 if we have 3G userspace / 1G kernelspace split, and this is the default split for ARM: - The kernel address space is 1GB (0x40000000) - PAGE_OFFSET is set to 0xc0000000 so the kernel static image uses 0xc0000000 .. 0xffffffff. - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0xbf000000 so the modules use addresses 0xbf000000 .. 0xbfffffff - So the addresses 0xbf000000 .. 0xffffffff need to be covered with shadow memory. That is 0x41000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x08200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0xb6e00000, to KASAN_SHADOW_END at 0xbfffffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0xbf000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0xb6e00000 = (0xbf000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0xb6e00000 - (0xbf000000 >> 3) KASAN_SHADOW_OFFSET = 0xb6e00000 - 0x17e00000 KASAN_SHADOW_OFFSET = 0x9f000000 - 0x8f000000 if we have 3G userspace / 1G kernelspace with full 1 GB low memory (VMSPLIT_3G_OPT): - The kernel address space is 1GB (0x40000000) - PAGE_OFFSET is set to 0xb0000000 so the kernel static image uses 0xb0000000 .. 0xffffffff. - On top of that we have the MODULES_VADDR which under the worst case (using ARM instructions) is PAGE_OFFSET - 16M (0x01000000) = 0xaf000000 so the modules use addresses 0xaf000000 .. 0xaffffff - So the addresses 0xaf000000 .. 0xffffffff need to be covered with shadow memory. That is 0x51000000 bytes of memory. - 1/8 of that is needed for its shadow memory, so 0x0a200000 bytes of shadow memory is needed. We "steal" that from the remaining lowmem. - The KASAN_SHADOW_START becomes 0xa4e00000, to KASAN_SHADOW_END at 0xaeffffff. - Now we can calculate the KASAN_SHADOW_OFFSET for any kernel address as 0xaf000000 needs to map to the first byte of shadow memory and 0xffffffff needs to map to the last byte of shadow memory. Since: SHADOW_ADDR = (address >> 3) + KASAN_SHADOW_OFFSET 0xa4e00000 = (0xaf000000 >> 3) + KASAN_SHADOW_OFFSET KASAN_SHADOW_OFFSET = 0xa4e00000 - (0xaf000000 >> 3) KASAN_SHADOW_OFFSET = 0xa4e00000 - 0x15e00000 KASAN_SHADOW_OFFSET = 0x8f000000 - The default value of 0xffffffff for KASAN_SHADOW_OFFSET is an error value. We should always match one of the above shadow offsets. When we do this, TASK_SIZE will sometimes get a bit odd values that will not fit into immediate mov assembly instructions. To account for this, we need to rewrite some assembly using TASK_SIZE like this: - mov r1, #TASK_SIZE + ldr r1, =TASK_SIZE or - cmp r4, #TASK_SIZE + ldr r0, =TASK_SIZE + cmp r4, r0 this is done to avoid the immediate #TASK_SIZE that need to fit into a limited number of bits. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Cc: Mike Rapoport Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Ard Biesheuvel Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/Kconfig | 9 +++++ arch/arm/include/asm/kasan_def.h | 81 ++++++++++++++++++++++++++++++++++++++ arch/arm/include/asm/memory.h | 5 +++ arch/arm/include/asm/uaccess-asm.h | 2 +- arch/arm/kernel/entry-armv.S | 3 +- arch/arm/kernel/entry-common.S | 9 +++-- arch/arm/mm/mmu.c | 18 +++++++++ 7 files changed, 122 insertions(+), 5 deletions(-) create mode 100644 arch/arm/include/asm/kasan_def.h (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b50..194032568b7a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1323,6 +1323,15 @@ config PAGE_OFFSET default 0xB0000000 if VMSPLIT_3G_OPT default 0xC0000000 +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0x1f000000 if PAGE_OFFSET=0x40000000 + default 0x5f000000 if PAGE_OFFSET=0x80000000 + default 0x9f000000 if PAGE_OFFSET=0xC0000000 + default 0x8f000000 if PAGE_OFFSET=0xB0000000 + default 0xffffffff + config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/arch/arm/include/asm/kasan_def.h b/arch/arm/include/asm/kasan_def.h new file mode 100644 index 000000000000..5739605aa7cf --- /dev/null +++ b/arch/arm/include/asm/kasan_def.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/arm/include/asm/kasan_def.h + * + * Copyright (c) 2018 Huawei Technologies Co., Ltd. + * + * Author: Abbott Liu + */ + +#ifndef __ASM_KASAN_DEF_H +#define __ASM_KASAN_DEF_H + +#ifdef CONFIG_KASAN + +/* + * Define KASAN_SHADOW_OFFSET,KASAN_SHADOW_START and KASAN_SHADOW_END for + * the Arm kernel address sanitizer. We are "stealing" lowmem (the 4GB + * addressable by a 32bit architecture) out of the virtual address + * space to use as shadow memory for KASan as follows: + * + * +----+ 0xffffffff + * | | \ + * | | |-> Static kernel image (vmlinux) BSS and page table + * | |/ + * +----+ PAGE_OFFSET + * | | \ + * | | |-> Loadable kernel modules virtual address space area + * | |/ + * +----+ MODULES_VADDR = KASAN_SHADOW_END + * | | \ + * | | |-> The shadow area of kernel virtual address. + * | |/ + * +----+-> TASK_SIZE (start of kernel space) = KASAN_SHADOW_START the + * | |\ shadow address of MODULES_VADDR + * | | | + * | | | + * | | |-> The user space area in lowmem. The kernel address + * | | | sanitizer do not use this space, nor does it map it. + * | | | + * | | | + * | | | + * | | | + * | |/ + * ------ 0 + * + * 1) KASAN_SHADOW_START + * This value begins with the MODULE_VADDR's shadow address. It is the + * start of kernel virtual space. Since we have modules to load, we need + * to cover also that area with shadow memory so we can find memory + * bugs in modules. + * + * 2) KASAN_SHADOW_END + * This value is the 0x100000000's shadow address: the mapping that would + * be after the end of the kernel memory at 0xffffffff. It is the end of + * kernel address sanitizer shadow area. It is also the start of the + * module area. + * + * 3) KASAN_SHADOW_OFFSET: + * This value is used to map an address to the corresponding shadow + * address by the following formula: + * + * shadow_addr = (address >> 3) + KASAN_SHADOW_OFFSET; + * + * As you would expect, >> 3 is equal to dividing by 8, meaning each + * byte in the shadow memory covers 8 bytes of kernel memory, so one + * bit shadow memory per byte of kernel memory is used. + * + * The KASAN_SHADOW_OFFSET is provided in a Kconfig option depending + * on the VMSPLIT layout of the system: the kernel and userspace can + * split up lowmem in different ways according to needs, so we calculate + * the shadow offset depending on this. + */ + +#define KASAN_SHADOW_SCALE_SHIFT 3 +#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL) +#define KASAN_SHADOW_END ((UL(1) << (32 - KASAN_SHADOW_SCALE_SHIFT)) \ + + KASAN_SHADOW_OFFSET) +#define KASAN_SHADOW_START ((KASAN_SHADOW_END >> 3) + KASAN_SHADOW_OFFSET) + +#endif +#endif diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index bb79e52aeb90..598dbdca2017 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,6 +18,7 @@ #ifdef CONFIG_NEED_MACH_MEMORY_H #include #endif +#include /* PAGE_OFFSET - the virtual address of the start of the kernel image */ #define PAGE_OFFSET UL(CONFIG_PAGE_OFFSET) @@ -28,7 +29,11 @@ * TASK_SIZE - the maximum size of a user space task. * TASK_UNMAPPED_BASE - the lower boundary of the mmap VM area */ +#ifndef CONFIG_KASAN #define TASK_SIZE (UL(CONFIG_PAGE_OFFSET) - UL(SZ_16M)) +#else +#define TASK_SIZE (KASAN_SHADOW_START) +#endif #define TASK_UNMAPPED_BASE ALIGN(TASK_SIZE / 3, SZ_16M) /* diff --git a/arch/arm/include/asm/uaccess-asm.h b/arch/arm/include/asm/uaccess-asm.h index 907571fd05c6..e6eb7a2aaf1e 100644 --- a/arch/arm/include/asm/uaccess-asm.h +++ b/arch/arm/include/asm/uaccess-asm.h @@ -85,7 +85,7 @@ */ .macro uaccess_entry, tsk, tmp0, tmp1, tmp2, disable ldr \tmp1, [\tsk, #TI_ADDR_LIMIT] - mov \tmp2, #TASK_SIZE + ldr \tmp2, =TASK_SIZE str \tmp2, [\tsk, #TI_ADDR_LIMIT] DACR( mrc p15, 0, \tmp0, c3, c0, 0) DACR( str \tmp0, [sp, #SVC_DACR]) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 55a47df04773..c4220f51fcf3 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -427,7 +427,8 @@ ENDPROC(__fiq_abt) @ if it was interrupted in a critical region. Here we @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. - cmp r4, #TASK_SIZE + ldr r0, =TASK_SIZE + cmp r4, r0 blhs kuser_cmpxchg64_fixup #endif #endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 271cb8a1eba1..fee279e28a72 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -50,7 +50,8 @@ __ret_fast_syscall: UNWIND(.cantunwind ) disable_irq_notrace @ disable interrupts ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK @@ -87,7 +88,8 @@ __ret_fast_syscall: #endif disable_irq_notrace @ disable interrupts ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] @ re-check for syscall tracing tst r1, #_TIF_SYSCALL_WORK | _TIF_WORK_MASK @@ -128,7 +130,8 @@ ret_slow_syscall: disable_irq_notrace @ disable interrupts ENTRY(ret_to_user_from_irq) ldr r2, [tsk, #TI_ADDR_LIMIT] - cmp r2, #TASK_SIZE + ldr r1, =TASK_SIZE + cmp r2, r1 blne addr_limit_check_failed ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index fa259825310c..c06ebfbc48c4 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -1255,8 +1256,25 @@ static inline void prepare_page_table(void) /* * Clear out all the mappings below the kernel image. */ +#ifdef CONFIG_KASAN + /* + * KASan's shadow memory inserts itself between the TASK_SIZE + * and MODULES_VADDR. Do not clear the KASan shadow memory mappings. + */ + for (addr = 0; addr < KASAN_SHADOW_START; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); + /* + * Skip over the KASan shadow area. KASAN_SHADOW_END is sometimes + * equal to MODULES_VADDR and then we exit the pmd clearing. If we + * are using a thumb-compiled kernel, there there will be 8MB more + * to clear as KASan always offset to 16 MB below MODULES_VADDR. + */ + for (addr = KASAN_SHADOW_END; addr < MODULES_VADDR; addr += PMD_SIZE) + pmd_clear(pmd_off_k(addr)); +#else for (addr = 0; addr < MODULES_VADDR; addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); +#endif #ifdef CONFIG_XIP_KERNEL /* The XIP kernel is mapped in the module area -- skip over it */ -- cgit v1.2.3 From 5615f69bc2097452ecc954f5264d784e158d6801 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:55:16 +0100 Subject: ARM: 9016/2: Initialize the mapping of KASan shadow memory This patch initializes KASan shadow region's page table and memory. There are two stage for KASan initializing: 1. At early boot stage the whole shadow region is mapped to just one physical page (kasan_zero_page). It is finished by the function kasan_early_init which is called by __mmap_switched(arch/arm/kernel/ head-common.S) 2. After the calling of paging_init, we use kasan_zero_page as zero shadow for some memory that KASan does not need to track, and we allocate a new shadow space for the other memory that KASan need to track. These issues are finished by the function kasan_init which is call by setup_arch. When using KASan we also need to increase the THREAD_SIZE_ORDER from 1 to 2 as the extra calls for shadow memory uses quite a bit of stack. As we need to make a temporary copy of the PGD when setting up shadow memory we create a helpful PGD_SIZE definition for both LPAE and non-LPAE setups. The KASan core code unconditionally calls pud_populate() so this needs to be changed from BUG() to do {} while (0) when building with KASan enabled. After the initial development by Andre Ryabinin several modifications have been made to this code: Abbott Liu - Add support ARM LPAE: If LPAE is enabled, KASan shadow region's mapping table need be copied in the pgd_alloc() function. - Change kasan_pte_populate,kasan_pmd_populate,kasan_pud_populate, kasan_pgd_populate from .meminit.text section to .init.text section. Reported by Florian Fainelli Linus Walleij : - Drop the custom mainpulation of TTBR0 and just use cpu_switch_mm() to switch the pgd table. - Adopt to handle 4th level page tabel folding. - Rewrite the entire page directory and page entry initialization sequence to be recursive based on ARM64:s kasan_init.c. Ard Biesheuvel : - Necessary underlying fixes. - Crucial bug fixes to the memory set-up code. Co-developed-by: Andrey Ryabinin Co-developed-by: Abbott Liu Co-developed-by: Ard Biesheuvel Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Cc: Mike Rapoport Acked-by: Mike Rapoport Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Reported-by: Russell King - ARM Linux Reported-by: Florian Fainelli Signed-off-by: Andrey Ryabinin Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Ard Biesheuvel Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/include/asm/kasan.h | 33 +++++ arch/arm/include/asm/pgalloc.h | 8 +- arch/arm/include/asm/thread_info.h | 8 + arch/arm/kernel/head-common.S | 3 + arch/arm/kernel/setup.c | 2 + arch/arm/mm/Makefile | 3 + arch/arm/mm/kasan_init.c | 291 +++++++++++++++++++++++++++++++++++++ arch/arm/mm/pgd.c | 16 +- 8 files changed, 362 insertions(+), 2 deletions(-) create mode 100644 arch/arm/include/asm/kasan.h create mode 100644 arch/arm/mm/kasan_init.c (limited to 'arch/arm') diff --git a/arch/arm/include/asm/kasan.h b/arch/arm/include/asm/kasan.h new file mode 100644 index 000000000000..303c35df3135 --- /dev/null +++ b/arch/arm/include/asm/kasan.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/arm/include/asm/kasan.h + * + * Copyright (c) 2015 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * + */ + +#ifndef __ASM_KASAN_H +#define __ASM_KASAN_H + +#ifdef CONFIG_KASAN + +#include + +#define KASAN_SHADOW_SCALE_SHIFT 3 + +/* + * The compiler uses a shadow offset assuming that addresses start + * from 0. Kernel addresses don't start from 0, so shadow + * for kernel really starts from 'compiler's shadow offset' + + * ('kernel address space start' >> KASAN_SHADOW_SCALE_SHIFT) + */ + +asmlinkage void kasan_early_init(void); +extern void kasan_init(void); + +#else +static inline void kasan_init(void) { } +#endif + +#endif diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index 15f4674715f8..fdee1f04f4f3 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -21,6 +21,7 @@ #define _PAGE_KERNEL_TABLE (PMD_TYPE_TABLE | PMD_BIT4 | PMD_DOMAIN(DOMAIN_KERNEL)) #ifdef CONFIG_ARM_LPAE +#define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) { @@ -28,14 +29,19 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) } #else /* !CONFIG_ARM_LPAE */ +#define PGD_SIZE (PAGE_SIZE << 2) /* * Since we have only two-level page tables, these are trivial */ #define pmd_alloc_one(mm,addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(mm, pmd) do { } while (0) +#ifdef CONFIG_KASAN +/* The KASan core unconditionally calls pud_populate() on all architectures */ +#define pud_populate(mm,pmd,pte) do { } while (0) +#else #define pud_populate(mm,pmd,pte) BUG() - +#endif #endif /* CONFIG_ARM_LPAE */ extern pgd_t *pgd_alloc(struct mm_struct *mm); diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 536b6b979f63..56fae7861fd3 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -13,7 +13,15 @@ #include #include +#ifdef CONFIG_KASAN +/* + * KASan uses a lot of extra stack space so the thread size order needs to + * be increased. + */ +#define THREAD_SIZE_ORDER 2 +#else #define THREAD_SIZE_ORDER 1 +#endif #define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER) #define THREAD_START_SP (THREAD_SIZE - 8) diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 6840c7c60a85..89c80154b9ef 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -111,6 +111,9 @@ __mmap_switched: str r8, [r2] @ Save atags pointer cmp r3, #0 strne r10, [r3] @ Save control register values +#ifdef CONFIG_KASAN + bl kasan_early_init +#endif mov lr, #0 b start_kernel ENDPROC(__mmap_switched) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 694aa6b4bd03..f4bd8b654255 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -59,6 +59,7 @@ #include #include #include +#include #include "atags.h" @@ -1145,6 +1146,7 @@ void __init setup_arch(char **cmdline_p) early_ioremap_reset(); paging_init(mdesc); + kasan_init(); request_standard_resources(mdesc); if (mdesc->restart) diff --git a/arch/arm/mm/Makefile b/arch/arm/mm/Makefile index 99699c32d8a5..4536159bc8fa 100644 --- a/arch/arm/mm/Makefile +++ b/arch/arm/mm/Makefile @@ -113,3 +113,6 @@ obj-$(CONFIG_CACHE_L2X0_PMU) += cache-l2x0-pmu.o obj-$(CONFIG_CACHE_XSC3L2) += cache-xsc3l2.o obj-$(CONFIG_CACHE_TAUROS2) += cache-tauros2.o obj-$(CONFIG_CACHE_UNIPHIER) += cache-uniphier.o + +KASAN_SANITIZE_kasan_init.o := n +obj-$(CONFIG_KASAN) += kasan_init.o diff --git a/arch/arm/mm/kasan_init.c b/arch/arm/mm/kasan_init.c new file mode 100644 index 000000000000..9c348042a724 --- /dev/null +++ b/arch/arm/mm/kasan_init.c @@ -0,0 +1,291 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file contains kasan initialization code for ARM. + * + * Copyright (c) 2018 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin + * Author: Linus Walleij + */ + +#define pr_fmt(fmt) "kasan: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mm.h" + +static pgd_t tmp_pgd_table[PTRS_PER_PGD] __initdata __aligned(PGD_SIZE); + +pmd_t tmp_pmd_table[PTRS_PER_PMD] __page_aligned_bss; + +static __init void *kasan_alloc_block(size_t size) +{ + return memblock_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_KASAN, NUMA_NO_NODE); +} + +static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pte_t *ptep = pte_offset_kernel(pmdp, addr); + + do { + pte_t entry; + void *p; + + next = addr + PAGE_SIZE; + + if (!early) { + if (!pte_none(READ_ONCE(*ptep))) + continue; + + p = kasan_alloc_block(PAGE_SIZE); + if (!p) { + panic("%s failed to allocate shadow page for address 0x%lx\n", + __func__, addr); + return; + } + memset(p, KASAN_SHADOW_INIT, PAGE_SIZE); + entry = pfn_pte(virt_to_pfn(p), + __pgprot(pgprot_val(PAGE_KERNEL))); + } else if (pte_none(READ_ONCE(*ptep))) { + /* + * The early shadow memory is mapping all KASan + * operations to one and the same page in memory, + * "kasan_early_shadow_page" so that the instrumentation + * will work on a scratch area until we can set up the + * proper KASan shadow memory. + */ + entry = pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(_L_PTE_DEFAULT | L_PTE_DIRTY | L_PTE_XN)); + } else { + /* + * Early shadow mappings are PMD_SIZE aligned, so if the + * first entry is already set, they must all be set. + */ + return; + } + + set_pte_at(&init_mm, addr, ptep, entry); + } while (ptep++, addr = next, addr != end); +} + +/* + * The pmd (page middle directory) is only used on LPAE + */ +static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, + unsigned long end, bool early) +{ + unsigned long next; + pmd_t *pmdp = pmd_offset(pudp, addr); + + do { + if (pmd_none(*pmdp)) { + /* + * We attempt to allocate a shadow block for the PMDs + * used by the PTEs for this address if it isn't already + * allocated. + */ + void *p = early ? kasan_early_shadow_pte : + kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pmd_populate_kernel(&init_mm, pmdp, p); + flush_pmd_entry(pmdp); + } + + next = pmd_addr_end(addr, end); + kasan_pte_populate(pmdp, addr, next, early); + } while (pmdp++, addr = next, addr != end); +} + +static void __init kasan_pgd_populate(unsigned long addr, unsigned long end, + bool early) +{ + unsigned long next; + pgd_t *pgdp; + p4d_t *p4dp; + pud_t *pudp; + + pgdp = pgd_offset_k(addr); + + do { + /* + * Allocate and populate the shadow block of p4d folded into + * pud folded into pmd if it doesn't already exist + */ + if (!early && pgd_none(*pgdp)) { + void *p = kasan_alloc_block(PAGE_SIZE); + + if (!p) { + panic("%s failed to allocate shadow block for address 0x%lx\n", + __func__, addr); + return; + } + pgd_populate(&init_mm, pgdp, p); + } + + next = pgd_addr_end(addr, end); + /* + * We just immediately jump over the p4d and pud page + * directories since we believe ARM32 will never gain four + * nor five level page tables. + */ + p4dp = p4d_offset(pgdp, addr); + pudp = pud_offset(p4dp, addr); + + kasan_pmd_populate(pudp, addr, next, early); + } while (pgdp++, addr = next, addr != end); +} + +extern struct proc_info_list *lookup_processor_type(unsigned int); + +void __init kasan_early_init(void) +{ + struct proc_info_list *list; + + /* + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm/mm/proc-*.S + */ + list = lookup_processor_type(read_cpuid_id()); + if (list) { +#ifdef MULTI_CPU + processor = *list->proc; +#endif + } + + BUILD_BUG_ON((KASAN_SHADOW_END - (1UL << 29)) != KASAN_SHADOW_OFFSET); + /* + * We walk the page table and set all of the shadow memory to point + * to the scratch page. + */ + kasan_pgd_populate(KASAN_SHADOW_START, KASAN_SHADOW_END, true); +} + +static void __init clear_pgds(unsigned long start, + unsigned long end) +{ + for (; start && start < end; start += PMD_SIZE) + pmd_clear(pmd_off_k(start)); +} + +static int __init create_mapping(void *start, void *end) +{ + void *shadow_start, *shadow_end; + + shadow_start = kasan_mem_to_shadow(start); + shadow_end = kasan_mem_to_shadow(end); + + pr_info("Mapping kernel virtual memory block: %px-%px at shadow: %px-%px\n", + start, end, shadow_start, shadow_end); + + kasan_pgd_populate((unsigned long)shadow_start & PAGE_MASK, + PAGE_ALIGN((unsigned long)shadow_end), false); + return 0; +} + +void __init kasan_init(void) +{ + phys_addr_t pa_start, pa_end; + u64 i; + + /* + * We are going to perform proper setup of shadow memory. + * + * At first we should unmap early shadow (clear_pgds() call bellow). + * However, instrumented code can't execute without shadow memory. + * + * To keep the early shadow memory MMU tables around while setting up + * the proper shadow memory, we copy swapper_pg_dir (the initial page + * table) to tmp_pgd_table and use that to keep the early shadow memory + * mapped until the full shadow setup is finished. Then we swap back + * to the proper swapper_pg_dir. + */ + + memcpy(tmp_pgd_table, swapper_pg_dir, sizeof(tmp_pgd_table)); +#ifdef CONFIG_ARM_LPAE + /* We need to be in the same PGD or this won't work */ + BUILD_BUG_ON(pgd_index(KASAN_SHADOW_START) != + pgd_index(KASAN_SHADOW_END)); + memcpy(tmp_pmd_table, + pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_START)), + sizeof(tmp_pmd_table)); + set_pgd(&tmp_pgd_table[pgd_index(KASAN_SHADOW_START)], + __pgd(__pa(tmp_pmd_table) | PMD_TYPE_TABLE | L_PGD_SWAPPER)); +#endif + cpu_switch_mm(tmp_pgd_table, &init_mm); + local_flush_tlb_all(); + + clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); + + kasan_populate_early_shadow(kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)-1UL) + 1); + + for_each_mem_range(i, &pa_start, &pa_end) { + void *start = __va(pa_start); + void *end = __va(pa_end); + + /* Do not attempt to shadow highmem */ + if (pa_start >= arm_lowmem_limit) { + pr_info("Skip highmem block at %pa-%pa\n", &pa_start, &pa_end); + continue; + } + if (pa_end > arm_lowmem_limit) { + pr_info("Truncating shadow for memory block at %pa-%pa to lowmem region at %pa\n", + &pa_start, &pa_end, &arm_lowmem_limit); + end = __va(arm_lowmem_limit); + } + if (start >= end) { + pr_info("Skipping invalid memory block %pa-%pa (virtual %p-%p)\n", + &pa_start, &pa_end, start, end); + continue; + } + + create_mapping(start, end); + } + + /* + * 1. The module global variables are in MODULES_VADDR ~ MODULES_END, + * so we need to map this area. + * 2. PKMAP_BASE ~ PKMAP_BASE+PMD_SIZE's shadow and MODULES_VADDR + * ~ MODULES_END's shadow is in the same PMD_SIZE, so we can't + * use kasan_populate_zero_shadow. + */ + create_mapping((void *)MODULES_VADDR, (void *)(PKMAP_BASE + PMD_SIZE)); + + /* + * KAsan may reuse the contents of kasan_early_shadow_pte directly, so + * we should make sure that it maps the zero page read-only. + */ + for (i = 0; i < PTRS_PER_PTE; i++) + set_pte_at(&init_mm, KASAN_SHADOW_START + i*PAGE_SIZE, + &kasan_early_shadow_pte[i], + pfn_pte(virt_to_pfn(kasan_early_shadow_page), + __pgprot(pgprot_val(PAGE_KERNEL) + | L_PTE_RDONLY))); + + cpu_switch_mm(swapper_pg_dir, &init_mm); + local_flush_tlb_all(); + + memset(kasan_early_shadow_page, 0, PAGE_SIZE); + pr_info("Kernel address sanitizer initialized\n"); + init_task.kasan_depth = 0; +} diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index c5e1b27046a8..f8e9bc58a84f 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -66,7 +66,21 @@ pgd_t *pgd_alloc(struct mm_struct *mm) new_pmd = pmd_alloc(mm, new_pud, 0); if (!new_pmd) goto no_pmd; -#endif +#ifdef CONFIG_KASAN + /* + * Copy PMD table for KASAN shadow mappings. + */ + init_pgd = pgd_offset_k(TASK_SIZE); + init_p4d = p4d_offset(init_pgd, TASK_SIZE); + init_pud = pud_offset(init_p4d, TASK_SIZE); + init_pmd = pmd_offset(init_pud, TASK_SIZE); + new_pmd = pmd_offset(new_pud, TASK_SIZE); + memcpy(new_pmd, init_pmd, + (pmd_index(MODULES_VADDR) - pmd_index(TASK_SIZE)) + * sizeof(pmd_t)); + clean_dcache_area(new_pmd, PTRS_PER_PMD * sizeof(pmd_t)); +#endif /* CONFIG_KASAN */ +#endif /* CONFIG_LPAE */ if (!vectors_high()) { /* -- cgit v1.2.3 From 421015713b306e47af95d4d61cdfbd96d462e4cb Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 25 Oct 2020 23:56:18 +0100 Subject: ARM: 9017/2: Enable KASan for ARM This patch enables the kernel address sanitizer for ARM. XIP_KERNEL has not been tested and is therefore not allowed for now. Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Cc: kasan-dev@googlegroups.com Acked-by: Dmitry Vyukov Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel # QEMU/KVM/mach-virt/LPAE/8G Tested-by: Florian Fainelli # Brahma SoCs Tested-by: Ahmad Fatoum # i.MX6Q Signed-off-by: Abbott Liu Signed-off-by: Florian Fainelli Signed-off-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 194032568b7a..2c5eb5c2b310 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -67,6 +67,7 @@ config ARM select HAVE_ARCH_BITREVERSE if (CPU_32v7M || CPU_32v7) && !CPU_32v6 select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL && !CPU_ENDIAN_BE32 && MMU select HAVE_ARCH_KGDB if !CPU_ENDIAN_BE32 && MMU + select HAVE_ARCH_KASAN if MMU && !XIP_KERNEL select HAVE_ARCH_MMAP_RND_BITS if MMU select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT -- cgit v1.2.3 From fc2933c133744305236793025b00c2f7d258b687 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 28 Oct 2020 14:20:55 +0100 Subject: ARM: 9020/1: mm: use correct section size macro to describe the FDT virtual address Commit 149a3ffe62b9dbc3 ("9012/1: move device tree mapping out of linear region") created a permanent, read-only section mapping of the device tree blob provided by the firmware, and added a set of macros to get the base and size of the virtually mapped FDT based on the physical address. However, while the mapping code uses the SECTION_SIZE macro correctly, the macros use PMD_SIZE instead, which means something entirely different on ARM when using short descriptors, and is therefore not the right quantity to use here. So replace PMD_SIZE with SECTION_SIZE. While at it, change the names of the macro and its parameter to clarify that it returns the virtual address of the start of the FDT, based on the physical address in memory. Tested-by: Joel Stanley Tested-by: Marek Szyprowski Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 6 +++--- arch/arm/kernel/setup.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 598dbdca2017..38a163f50130 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -73,8 +73,8 @@ #define XIP_VIRT_ADDR(physaddr) (MODULES_VADDR + ((physaddr) & 0x000fffff)) #define FDT_FIXED_BASE UL(0xff800000) -#define FDT_FIXED_SIZE (2 * PMD_SIZE) -#define FDT_VIRT_ADDR(physaddr) ((void *)(FDT_FIXED_BASE | (physaddr) % PMD_SIZE)) +#define FDT_FIXED_SIZE (2 * SECTION_SIZE) +#define FDT_VIRT_BASE(physbase) ((void *)(FDT_FIXED_BASE | (physbase) % SECTION_SIZE)) #if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE) /* @@ -116,7 +116,7 @@ extern unsigned long vectors_base; #define MODULES_VADDR PAGE_OFFSET #define XIP_VIRT_ADDR(physaddr) (physaddr) -#define FDT_VIRT_ADDR(physaddr) ((void *)(physaddr)) +#define FDT_VIRT_BASE(physbase) ((void *)(physbase)) #endif /* !CONFIG_MMU */ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f4bd8b654255..d32f7652a5bf 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1087,7 +1087,7 @@ void __init setup_arch(char **cmdline_p) void *atags_vaddr = NULL; if (__atags_pointer) - atags_vaddr = FDT_VIRT_ADDR(__atags_pointer); + atags_vaddr = FDT_VIRT_BASE(__atags_pointer); setup_processor(); if (atags_vaddr) { -- cgit v1.2.3 From 4e79f0211b473f8e1eab8211a9fd50cc41a3a061 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 21 Sep 2020 00:10:16 +0200 Subject: ARM: p2v: fix handling of LPAE translation in BE mode When running in BE mode on LPAE hardware with a PA-to-VA translation that exceeds 4 GB, we patch bits 39:32 of the offset into the wrong byte of the opcode. So fix that, by rotating the offset in r0 to the right by 8 bits, which will put the 8-bit immediate in bits 31:24. Note that this will also move bit #22 in its correct place when applying the rotation to the constant #0x400000. Fixes: d9a790df8e984 ("ARM: 7883/1: fix mov to mvn conversion in case of 64 bit phys_addr_t and BE") Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f8904227e7fd..98c1e68bdfcb 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -671,12 +671,8 @@ ARM_BE8(rev16 ip, ip) ldrcc r7, [r4], #4 @ use branch for delay slot bcc 1b bx lr -#else -#ifdef CONFIG_CPU_ENDIAN_BE8 - moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction #else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction -#endif b 2f 1: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @@ -685,7 +681,7 @@ ARM_BE8(rev16 ip, ip) tst ip, #0x000f0000 @ check the rotation field orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 + orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 #else bic ip, ip, #0x000000ff tst ip, #0xf00 @ check the rotation field -- cgit v1.2.3 From 0b1674638a5c69cbace63278625c199100955490 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:23:39 +0300 Subject: ARM: assembler: introduce adr_l, ldr_l and str_l macros Like arm64, ARM supports position independent code sequences that produce symbol references with a greater reach than the ordinary adr/ldr instructions. Since on ARM, the adrl pseudo-instruction is only supported in ARM mode (and not at all when using Clang), having a adr_l macro like we do on arm64 is useful, and increases symmetry as well. Currently, we use open coded instruction sequences involving literals and arithmetic operations. Instead, we can use movw/movt pairs on v7 CPUs, circumventing the D-cache entirely. E.g., on v7+ CPUs, we can emit a PC-relative reference as follows: movw , #:lower16: - (1f + 8) movt , #:upper16: - (1f + 8) 1: add , , pc For older CPUs, we can emit the literal into a subsection, allowing it to be emitted out of line while retaining the ability to perform arithmetic on label offsets. E.g., on pre-v7 CPUs, we can emit a PC-relative reference as follows: ldr , 2f 1: add , , pc .subsection 1 2: .long - (1b + 8) .previous This is allowed by the assembler because, unlike ordinary sections, subsections are combined into a single section in the object file, and so the label references are not true cross-section references that are visible as relocations. (Subsections have been available in binutils since 2004 at least, so they should not cause any issues with older toolchains.) So use the above to implement the macros mov_l, adr_l, ldr_l and str_l, all of which will use movw/movt pairs on v7 and later CPUs, and use PC-relative literals otherwise. Reviewed-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/assembler.h | 84 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index feac2c8b86f2..72627c5fb3b2 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -494,4 +494,88 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #define _ASM_NOKPROBE(entry) #endif + .macro __adldst_l, op, reg, sym, tmp, c + .if __LINUX_ARM_ARCH__ < 7 + ldr\c \tmp, .La\@ + .subsection 1 + .align 2 +.La\@: .long \sym - .Lpc\@ + .previous + .else + .ifnb \c + THUMB( ittt \c ) + .endif + movw\c \tmp, #:lower16:\sym - .Lpc\@ + movt\c \tmp, #:upper16:\sym - .Lpc\@ + .endif + +#ifndef CONFIG_THUMB2_KERNEL + .set .Lpc\@, . + 8 // PC bias + .ifc \op, add + add\c \reg, \tmp, pc + .else + \op\c \reg, [pc, \tmp] + .endif +#else +.Lb\@: add\c \tmp, \tmp, pc + /* + * In Thumb-2 builds, the PC bias depends on whether we are currently + * emitting into a .arm or a .thumb section. The size of the add opcode + * above will be 2 bytes when emitting in Thumb mode and 4 bytes when + * emitting in ARM mode, so let's use this to account for the bias. + */ + .set .Lpc\@, . + (. - .Lb\@) + + .ifnc \op, add + \op\c \reg, [\tmp] + .endif +#endif + .endm + + /* + * mov_l - move a constant value or [relocated] address into a register + */ + .macro mov_l, dst:req, imm:req + .if __LINUX_ARM_ARCH__ < 7 + ldr \dst, =\imm + .else + movw \dst, #:lower16:\imm + movt \dst, #:upper16:\imm + .endif + .endm + + /* + * adr_l - adr pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro adr_l, dst:req, sym:req, cond + __adldst_l add, \dst, \sym, \dst, \cond + .endm + + /* + * ldr_l - ldr pseudo-op with unlimited range + * + * @dst: destination register + * @sym: name of the symbol + * @cond: conditional opcode suffix + */ + .macro ldr_l, dst:req, sym:req, cond + __adldst_l ldr, \dst, \sym, \dst, \cond + .endm + + /* + * str_l - str pseudo-op with unlimited range + * + * @src: source register + * @sym: name of the symbol + * @tmp: mandatory scratch register + * @cond: conditional opcode suffix + */ + .macro str_l, src:req, sym:req, tmp:req, cond + __adldst_l str, \src, \sym, \tmp, \cond + .endm + #endif /* __ASM_ASSEMBLER_H__ */ -- cgit v1.2.3 From 22f2d23098f7d34fc5142531cffb241d14611684 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:24:37 +0300 Subject: ARM: module: add support for place relative relocations When using the new adr_l/ldr_l/str_l macros to refer to external symbols from modules, the linker may emit place relative ELF relocations that need to be fixed up by the module loader. So add support for these. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/elf.h | 5 +++++ arch/arm/kernel/module.c | 20 ++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/elf.h b/arch/arm/include/asm/elf.h index b078d992414b..0ac62a54b73c 100644 --- a/arch/arm/include/asm/elf.h +++ b/arch/arm/include/asm/elf.h @@ -51,6 +51,7 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_NONE 0 #define R_ARM_PC24 1 #define R_ARM_ABS32 2 +#define R_ARM_REL32 3 #define R_ARM_CALL 28 #define R_ARM_JUMP24 29 #define R_ARM_TARGET1 38 @@ -58,11 +59,15 @@ typedef struct user_fp elf_fpregset_t; #define R_ARM_PREL31 42 #define R_ARM_MOVW_ABS_NC 43 #define R_ARM_MOVT_ABS 44 +#define R_ARM_MOVW_PREL_NC 45 +#define R_ARM_MOVT_PREL 46 #define R_ARM_THM_CALL 10 #define R_ARM_THM_JUMP24 30 #define R_ARM_THM_MOVW_ABS_NC 47 #define R_ARM_THM_MOVT_ABS 48 +#define R_ARM_THM_MOVW_PREL_NC 49 +#define R_ARM_THM_MOVT_PREL 50 /* * These are used to set parameters in the core dumps. diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index e15444b25ca0..beac45e89ba6 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -185,14 +185,24 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, *(u32 *)loc |= offset & 0x7fffffff; break; + case R_ARM_REL32: + *(u32 *)loc += sym->st_value - loc; + break; + case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: + case R_ARM_MOVW_PREL_NC: + case R_ARM_MOVT_PREL: offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_PREL) offset >>= 16; tmp &= 0xfff0f000; @@ -283,6 +293,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: + case R_ARM_THM_MOVW_PREL_NC: + case R_ARM_THM_MOVT_PREL: upper = __mem_to_opcode_thumb16(*(u16 *)loc); lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); @@ -302,7 +314,11 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset = (offset ^ 0x8000) - 0x8000; offset += sym->st_value; - if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVW_PREL_NC) + offset -= loc; + if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS || + ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_PREL) offset >>= 16; upper = (u16)((upper & 0xfbf0) | -- cgit v1.2.3 From eae78e1a97201a81a851342ad9659b60f61a3951 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 17:51:55 +0200 Subject: ARM: p2v: move patching code to separate assembler source file Move the phys2virt patching code into a separate .S file before doing some work on it. Suggested-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/head.S | 138 ---------------------------------------- arch/arm/kernel/phys2virt.S | 151 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 138 deletions(-) create mode 100644 arch/arm/kernel/phys2virt.S (limited to 'arch/arm') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 89e5d864e923..9e465efcc8b6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_ARM_PATCH_PHYS_VIRT) += phys2virt.o # This is executed very early using a temporary stack when no memory allocator # nor global data is available. Everything has to be allocated on the stack. diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 98c1e68bdfcb..7e3f36809011 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -586,142 +586,4 @@ ENTRY(fixup_smp) ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -#ifdef __ARMEB__ -#define LOW_OFFSET 0x4 -#define HIGH_OFFSET 0x0 -#else -#define LOW_OFFSET 0x0 -#define HIGH_OFFSET 0x4 -#endif - -#ifdef CONFIG_ARM_PATCH_PHYS_VIRT - -/* __fixup_pv_table - patch the stub instructions with the delta between - * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and - * can be expressed by an immediate shifter operand. The stub instruction - * has a form of '(add|sub) rd, rn, #imm'. - */ - __HEAD -__fixup_pv_table: - adr r0, 1f - ldmia r0, {r3-r7} - mvn ip, #0 - subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET - add r4, r4, r3 @ adjust table start address - add r5, r5, r3 @ adjust table end address - add r6, r6, r3 @ adjust __pv_phys_pfn_offset address - add r7, r7, r3 @ adjust __pv_offset address - mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN - str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset - strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits - mov r6, r3, lsr #24 @ constant for add/sub instructions - teq r3, r6, lsl #24 @ must be 16MiB aligned -THUMB( it ne @ cross section branch ) - bne __error - str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits - b __fixup_a_pv_table -ENDPROC(__fixup_pv_table) - - .align -1: .long . - .long __pv_table_begin - .long __pv_table_end -2: .long __pv_phys_pfn_offset - .long __pv_offset - - .text -__fixup_a_pv_table: - adr r0, 3f - ldr r6, [r0] - add r6, r6, r3 - ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word - ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word - mov r6, r6, lsr #24 - cmn r0, #1 -#ifdef CONFIG_THUMB2_KERNEL - moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq 2f - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 - b 2f -1: add r7, r3 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne 2f - ldrh ip, [r7] -ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 -ARM_BE8(rev16 ip, ip) - strh ip, [r7] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - bx lr -#else - moveq r0, #0x400000 @ set bit 22, mov to mvn instruction - b 2f -1: ldr ip, [r7, r3] -#ifdef CONFIG_CPU_ENDIAN_BE8 - @ in BE8, we load data in BE, but instructions still in LE - bic ip, ip, #0xff000000 - tst ip, #0x000f0000 @ check the rotation field - orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 - biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 -#else - bic ip, ip, #0x000000ff - tst ip, #0xf00 @ check the rotation field - orrne ip, ip, r6 @ mask in offset bits 31-24 - biceq ip, ip, #0x400000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 -#endif - str ip, [r7, r3] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - ret lr -#endif -ENDPROC(__fixup_a_pv_table) - - .align -3: .long __pv_offset - -ENTRY(fixup_pv_table) - stmfd sp!, {r4 - r7, lr} - mov r3, #0 @ no offset - mov r4, r0 @ r0 = table start - add r5, r0, r1 @ r1 = table size - bl __fixup_a_pv_table - ldmfd sp!, {r4 - r7, pc} -ENDPROC(fixup_pv_table) - - .data - .align 2 - .globl __pv_phys_pfn_offset - .type __pv_phys_pfn_offset, %object -__pv_phys_pfn_offset: - .word 0 - .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset - - .globl __pv_offset - .type __pv_offset, %object -__pv_offset: - .quad 0 - .size __pv_offset, . -__pv_offset -#endif - #include "head-common.S" diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S new file mode 100644 index 000000000000..7c17fbfeeedd --- /dev/null +++ b/arch/arm/kernel/phys2virt.S @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 1994-2002 Russell King + * Copyright (c) 2003 ARM Limited + * All Rights Reserved + */ + +#include +#include +#include +#include + +#ifdef __ARMEB__ +#define LOW_OFFSET 0x4 +#define HIGH_OFFSET 0x0 +#else +#define LOW_OFFSET 0x0 +#define HIGH_OFFSET 0x4 +#endif + +/* + * __fixup_pv_table - patch the stub instructions with the delta between + * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be + * 16MiB aligned. + * + * Called from head.S, which expects the following registers to be preserved: + * r1 = machine no, r2 = atags or dtb, + * r8 = phys_offset, r9 = cpuid, r10 = procinfo + */ + __HEAD +ENTRY(__fixup_pv_table) + adr r0, 1f + ldmia r0, {r3-r7} + mvn ip, #0 + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET + add r4, r4, r3 @ adjust table start address + add r5, r5, r3 @ adjust table end address + add r6, r6, r3 @ adjust __pv_phys_pfn_offset address + add r7, r7, r3 @ adjust __pv_offset address + mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN + str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits + mov r6, r3, lsr #24 @ constant for add/sub instructions + teq r3, r6, lsl #24 @ must be 16MiB aligned + bne 0f + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits + b __fixup_a_pv_table +0: mov r0, r0 @ deadloop on error + b 0b +ENDPROC(__fixup_pv_table) + + .align +1: .long . + .long __pv_table_begin + .long __pv_table_end +2: .long __pv_phys_pfn_offset + .long __pv_offset + + .text +__fixup_a_pv_table: + adr r0, 3f + ldr r6, [r0] + add r6, r6, r3 + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word + mov r6, r6, lsr #24 + cmn r0, #1 +#ifdef CONFIG_THUMB2_KERNEL + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction + lsls r6, #24 + beq 2f + clz r7, r6 + lsr r6, #24 + lsl r6, r7 + bic r6, #0x0080 + lsrs r7, #1 + orrcs r6, #0x0080 + orr r6, r6, r7, lsl #12 + orr r6, #0x4000 + b 2f +1: add r7, r3 + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + tst ip, #0x4000 + and ip, #0x8f00 + orrne ip, r6 @ mask in offset bits 31-24 + orreq ip, r0 @ mask in offset bits 7-0 +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] + bne 2f + ldrh ip, [r7] +ARM_BE8(rev16 ip, ip) + bic ip, #0x20 + orr ip, ip, r0, lsr #16 +ARM_BE8(rev16 ip, ip) + strh ip, [r7] +2: cmp r4, r5 + ldrcc r7, [r4], #4 @ use branch for delay slot + bcc 1b + bx lr +#else + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + b 2f +1: ldr ip, [r7, r3] +#ifdef CONFIG_CPU_ENDIAN_BE8 + @ in BE8, we load data in BE, but instructions still in LE + bic ip, ip, #0xff000000 + tst ip, #0x000f0000 @ check the rotation field + orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 + biceq ip, ip, #0x00004000 @ clear bit 22 + orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 +#else + bic ip, ip, #0x000000ff + tst ip, #0xf00 @ check the rotation field + orrne ip, ip, r6 @ mask in offset bits 31-24 + biceq ip, ip, #0x400000 @ clear bit 22 + orreq ip, ip, r0 @ mask in offset bits 7-0 +#endif + str ip, [r7, r3] +2: cmp r4, r5 + ldrcc r7, [r4], #4 @ use branch for delay slot + bcc 1b + ret lr +#endif +ENDPROC(__fixup_a_pv_table) + + .align +3: .long __pv_offset + +ENTRY(fixup_pv_table) + stmfd sp!, {r4 - r7, lr} + mov r3, #0 @ no offset + mov r4, r0 @ r0 = table start + add r5, r0, r1 @ r1 = table size + bl __fixup_a_pv_table + ldmfd sp!, {r4 - r7, pc} +ENDPROC(fixup_pv_table) + + .data + .align 2 + .globl __pv_phys_pfn_offset + .type __pv_phys_pfn_offset, %object +__pv_phys_pfn_offset: + .word 0 + .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset + + .globl __pv_offset + .type __pv_offset, %object +__pv_offset: + .quad 0 + .size __pv_offset, . -__pv_offset -- cgit v1.2.3 From 4b16421c3e955f440eb45546db6ce33d47f29c78 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Sep 2020 21:29:36 +0300 Subject: ARM: p2v: factor out shared loop processing The ARM and Thumb2 versions of the p2v patching loop have some overlap at the end of the loop, so factor that out. As numeric labels are not required to be unique, and may therefore be ambiguous, use named local labels for the start and end of the loop instead. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/phys2virt.S | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 7c17fbfeeedd..8fb1f7bcc720 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -68,7 +68,7 @@ __fixup_a_pv_table: #ifdef CONFIG_THUMB2_KERNEL moveq r0, #0x200000 @ set bit 21, mov to mvn instruction lsls r6, #24 - beq 2f + beq .Lnext clz r7, r6 lsr r6, #24 lsl r6, r7 @@ -77,8 +77,8 @@ __fixup_a_pv_table: orrcs r6, #0x0080 orr r6, r6, r7, lsl #12 orr r6, #0x4000 - b 2f -1: add r7, r3 + b .Lnext +.Lloop: add r7, r3 ldrh ip, [r7, #2] ARM_BE8(rev16 ip, ip) tst ip, #0x4000 @@ -87,21 +87,17 @@ ARM_BE8(rev16 ip, ip) orreq ip, r0 @ mask in offset bits 7-0 ARM_BE8(rev16 ip, ip) strh ip, [r7, #2] - bne 2f + bne .Lnext ldrh ip, [r7] ARM_BE8(rev16 ip, ip) bic ip, #0x20 orr ip, ip, r0, lsr #16 ARM_BE8(rev16 ip, ip) strh ip, [r7] -2: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b - bx lr #else moveq r0, #0x400000 @ set bit 22, mov to mvn instruction - b 2f -1: ldr ip, [r7, r3] + b .Lnext +.Lloop: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 @ in BE8, we load data in BE, but instructions still in LE bic ip, ip, #0xff000000 @@ -117,11 +113,13 @@ ARM_BE8(rev16 ip, ip) orreq ip, ip, r0 @ mask in offset bits 7-0 #endif str ip, [r7, r3] -2: cmp r4, r5 +#endif + +.Lnext: + cmp r4, r5 ldrcc r7, [r4], #4 @ use branch for delay slot - bcc 1b + bcc .Lloop ret lr -#endif ENDPROC(__fixup_a_pv_table) .align -- cgit v1.2.3 From 7a94849e81b5c10e71f0a555300313c2789d9b0d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 17 Sep 2020 21:36:46 +0300 Subject: ARM: p2v: factor out BE8 handling The big and little endian versions of the ARM p2v patching routine only differ in the values of the constants, so factor those out into macros so that we only have one version of the logic sequence to maintain. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/phys2virt.S | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 8fb1f7bcc720..5031e5a2e78b 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -95,23 +95,25 @@ ARM_BE8(rev16 ip, ip) ARM_BE8(rev16 ip, ip) strh ip, [r7] #else - moveq r0, #0x400000 @ set bit 22, mov to mvn instruction - b .Lnext -.Lloop: ldr ip, [r7, r3] #ifdef CONFIG_CPU_ENDIAN_BE8 - @ in BE8, we load data in BE, but instructions still in LE - bic ip, ip, #0xff000000 - tst ip, #0x000f0000 @ check the rotation field - orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 - biceq ip, ip, #0x00004000 @ clear bit 22 - orreq ip, ip, r0, ror #8 @ mask in offset bits 7-0 +@ in BE8, we load data in BE, but instructions still in LE +#define PV_BIT22 0x00004000 +#define PV_IMM8_MASK 0xff000000 +#define PV_ROT_MASK 0x000f0000 #else - bic ip, ip, #0x000000ff - tst ip, #0xf00 @ check the rotation field - orrne ip, ip, r6 @ mask in offset bits 31-24 - biceq ip, ip, #0x400000 @ clear bit 22 - orreq ip, ip, r0 @ mask in offset bits 7-0 +#define PV_BIT22 0x00400000 +#define PV_IMM8_MASK 0x000000ff +#define PV_ROT_MASK 0xf00 #endif + + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + b .Lnext +.Lloop: ldr ip, [r7, r3] + bic ip, ip, #PV_IMM8_MASK + tst ip, #PV_ROT_MASK @ check the rotation field + orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + biceq ip, ip, #PV_BIT22 @ clear bit 22 + orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22) str ip, [r7, r3] #endif -- cgit v1.2.3 From 0869f3b9da38889faef2ccafcf675c713d4a3aa8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Sep 2020 10:00:24 +0300 Subject: ARM: p2v: drop redundant 'type' argument from __pv_stub We always pass the same value for 'type' so pull it into the __pv_stub macro itself. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/memory.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 99035b5891ef..eb3c8e6e960a 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -183,14 +183,14 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) -#define __pv_stub(from,to,instr,type) \ +#define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ " .pushsection .pv_table,\"a\"\n" \ " .long 1b\n" \ " .popsection\n" \ : "=r" (to) \ - : "r" (from), "I" (type)) + : "r" (from), "I" (__PV_BITS_31_24)) #define __pv_stub_mov_hi(t) \ __asm__ volatile("@ __pv_stub_mov\n" \ @@ -217,7 +217,7 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) phys_addr_t t; if (sizeof(phys_addr_t) == 4) { - __pv_stub(x, t, "add", __PV_BITS_31_24); + __pv_stub(x, t, "add"); } else { __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); @@ -235,7 +235,7 @@ static inline unsigned long __phys_to_virt(phys_addr_t x) * assembler expression receives 32 bit argument * in place where 'r' 32 bit operand is expected. */ - __pv_stub((unsigned long) x, t, "sub", __PV_BITS_31_24); + __pv_stub((unsigned long) x, t, "sub"); return t; } -- cgit v1.2.3 From 2730e8eaa4f2baccc03296e0c5ee109c0673fe5f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 18:23:35 +0200 Subject: ARM: p2v: use relative references in patch site arrays Free up a register in the p2v patching code by switching to relative references, which don't require keeping the phys-to-virt displacement live in a register. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/memory.h | 6 +++--- arch/arm/kernel/phys2virt.S | 18 +++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index eb3c8e6e960a..4121662dea5a 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -187,7 +187,7 @@ extern const void *__pv_table_begin, *__pv_table_end; __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - .\n" \ " .popsection\n" \ : "=r" (to) \ : "r" (from), "I" (__PV_BITS_31_24)) @@ -196,7 +196,7 @@ extern const void *__pv_table_begin, *__pv_table_end; __asm__ volatile("@ __pv_stub_mov\n" \ "1: mov %R0, %1\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - .\n" \ " .popsection\n" \ : "=r" (t) \ : "I" (__PV_BITS_7_0)) @@ -206,7 +206,7 @@ extern const void *__pv_table_begin, *__pv_table_end; "1: adds %Q0, %1, %2\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b\n" \ + " .long 1b - .\n" \ " .popsection\n" \ : "+r" (y) \ : "r" (x), "I" (__PV_BITS_31_24) \ diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 5031e5a2e78b..8e4be15e1559 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -58,9 +58,7 @@ ENDPROC(__fixup_pv_table) .text __fixup_a_pv_table: - adr r0, 3f - ldr r6, [r0] - add r6, r6, r3 + adr_l r6, __pv_offset ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word mov r6, r6, lsr #24 @@ -78,7 +76,8 @@ __fixup_a_pv_table: orr r6, r6, r7, lsl #12 orr r6, #0x4000 b .Lnext -.Lloop: add r7, r3 +.Lloop: add r7, r4 + adds r4, #4 ldrh ip, [r7, #2] ARM_BE8(rev16 ip, ip) tst ip, #0x4000 @@ -108,28 +107,25 @@ ARM_BE8(rev16 ip, ip) moveq r0, #0x400000 @ set bit 22, mov to mvn instruction b .Lnext -.Lloop: ldr ip, [r7, r3] +.Lloop: ldr ip, [r7, r4] bic ip, ip, #PV_IMM8_MASK tst ip, #PV_ROT_MASK @ check the rotation field orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 biceq ip, ip, #PV_BIT22 @ clear bit 22 orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22) - str ip, [r7, r3] + str ip, [r7, r4] + add r4, r4, #4 #endif .Lnext: cmp r4, r5 - ldrcc r7, [r4], #4 @ use branch for delay slot + ldrcc r7, [r4] @ use branch for delay slot bcc .Lloop ret lr ENDPROC(__fixup_a_pv_table) - .align -3: .long __pv_offset - ENTRY(fixup_pv_table) stmfd sp!, {r4 - r7, lr} - mov r3, #0 @ no offset mov r4, r0 @ r0 = table start add r5, r0, r1 @ r1 = table size bl __fixup_a_pv_table -- cgit v1.2.3 From 0e3db6c9d7f6fd0ee263325027e8d3fdac5a4c9e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 19:19:32 +0200 Subject: ARM: p2v: simplify __fixup_pv_table() Declutter the code in __fixup_pv_table() by using the new adr_l/str_l macros to take PC relative references to external symbols, and by using the value of PHYS_OFFSET passed in r8 to calculate the p2v offset. Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/phys2virt.S | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index 8e4be15e1559..be8fb0d89877 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -29,33 +29,27 @@ */ __HEAD ENTRY(__fixup_pv_table) - adr r0, 1f - ldmia r0, {r3-r7} - mvn ip, #0 - subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET - add r4, r4, r3 @ adjust table start address - add r5, r5, r3 @ adjust table end address - add r6, r6, r3 @ adjust __pv_phys_pfn_offset address - add r7, r7, r3 @ adjust __pv_offset address mov r0, r8, lsr #PAGE_SHIFT @ convert to PFN - str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset - strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits - mov r6, r3, lsr #24 @ constant for add/sub instructions - teq r3, r6, lsl #24 @ must be 16MiB aligned + str_l r0, __pv_phys_pfn_offset, r3 + + adr_l r0, __pv_offset + subs r3, r8, #PAGE_OFFSET @ PHYS_OFFSET - PAGE_OFFSET + mvn ip, #0 + strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits + str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits + + mov r0, r3, lsr #24 @ constant for add/sub instructions + teq r3, r0, lsl #24 @ must be 16MiB aligned bne 0f - str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits + + adr_l r4, __pv_table_begin + adr_l r5, __pv_table_end b __fixup_a_pv_table + 0: mov r0, r0 @ deadloop on error b 0b ENDPROC(__fixup_pv_table) - .align -1: .long . - .long __pv_table_begin - .long __pv_table_end -2: .long __pv_phys_pfn_offset - .long __pv_offset - .text __fixup_a_pv_table: adr_l r6, __pv_offset -- cgit v1.2.3 From e8e00f5afb087912fb3edb225ee373aa6499bb79 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sun, 20 Sep 2020 23:02:25 +0200 Subject: ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE In preparation for reducing the phys-to-virt minimum relative alignment from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW instructions that can more easily be manipulated to carry a 12-bit immediate. Note that the non-LPAE ARM sequence is not updated: MOVW may not be supported on non-LPAE platforms, and the sequence itself can be updated more easily to apply the 12 bits of displacement. For Thumb2, which has many more versions of opcodes, switch to a sequence that can be patched by the same patching code for both versions. Note that the Thumb2 opcodes for MOVW and MVN are unambiguous, and have no rotation bits in their immediate fields, so there is no need to use placeholder constants in the asm blocks. While at it, drop the 'volatile' qualifiers from the asm blocks: the code does not have any side effects that are invisible to the compiler, so it is free to omit these sequences if the outputs are not used. Suggested-by: Russell King Acked-by: Nicolas Pitre Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/memory.h | 44 +++++++++---- arch/arm/kernel/phys2virt.S | 147 +++++++++++++++++++++++++++++++++--------- 2 files changed, 148 insertions(+), 43 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 4121662dea5a..ccf55cef6ab9 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define PHYS_OFFSET ((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT) #define PHYS_PFN_OFFSET (__pv_phys_pfn_offset) +#ifndef CONFIG_THUMB2_KERNEL #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ @@ -192,25 +193,45 @@ extern const void *__pv_table_begin, *__pv_table_end; : "=r" (to) \ : "r" (from), "I" (__PV_BITS_31_24)) -#define __pv_stub_mov_hi(t) \ - __asm__ volatile("@ __pv_stub_mov\n" \ - "1: mov %R0, %1\n" \ +#define __pv_add_carry_stub(x, y) \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " adds %Q0, %1, %R0, lsl #24\n" \ + "1: mov %R0, %2\n" \ + " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "=r" (t) \ - : "I" (__PV_BITS_7_0)) + : "=&r" (y) \ + : "r" (x), "I" (__PV_BITS_7_0) \ + : "cc") + +#else +#define __pv_stub(from,to,instr) \ + __asm__("@ __pv_stub\n" \ + "0: movw %0, #0\n" \ + " lsl %0, #24\n" \ + " " instr " %0, %1, %0\n" \ + " .pushsection .pv_table,\"a\"\n" \ + " .long 0b - .\n" \ + " .popsection\n" \ + : "=&r" (to) \ + : "r" (from)) #define __pv_add_carry_stub(x, y) \ - __asm__ volatile("@ __pv_add_carry_stub\n" \ - "1: adds %Q0, %1, %2\n" \ + __asm__("@ __pv_add_carry_stub\n" \ + "0: movw %R0, #0\n" \ + " lsls %R0, #24\n" \ + " adds %Q0, %1, %R0\n" \ + "1: mvn %R0, #0\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 0b - ., 1b - .\n" \ " .popsection\n" \ - : "+r" (y) \ - : "r" (x), "I" (__PV_BITS_31_24) \ + : "=&r" (y) \ + : "r" (x) \ : "cc") +#endif static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) { @@ -219,7 +240,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x) if (sizeof(phys_addr_t) == 4) { __pv_stub(x, t, "add"); } else { - __pv_stub_mov_hi(t); __pv_add_carry_stub(x, t); } return t; diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index be8fb0d89877..a4e364689663 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 1994-2002 Russell King - * Copyright (c) 2003 ARM Limited + * Copyright (c) 2003, 2020 ARM Limited * All Rights Reserved */ @@ -58,55 +58,140 @@ __fixup_a_pv_table: mov r6, r6, lsr #24 cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL + @ + @ The Thumb-2 versions of the patchable sequences are + @ + @ phys-to-virt: movw , #offset<31:24> + @ lsl , #24 + @ sub , , + @ + @ virt-to-phys (non-LPAE): movw , #offset<31:24> + @ lsl , #24 + @ add , , + @ + @ virt-to-phys (LPAE): movw , #offset<31:24> + @ lsl , #24 + @ adds , , + @ mov , #offset<39:32> + @ adc , , #0 + @ + @ In the non-LPAE case, all patchable instructions are MOVW + @ instructions, where we need to patch in the offset into the + @ second halfword of the opcode (the 16-bit immediate is encoded + @ as imm4:i:imm3:imm8) + @ + @ 15 11 10 9 4 3 0 15 14 12 11 8 7 0 + @ +-----------+---+-------------+------++---+------+----+------+ + @ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+-------------+------++---+------+----+------+ + @ + @ In the LPAE case, we also need to patch in the high word of the + @ offset into the immediate field of the MOV instruction, or patch it + @ to a MVN instruction if the offset is negative. In this case, we + @ need to inspect the first halfword of the opcode, to check whether + @ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if + @ needed. The encoding of the immediate is rather complex for values + @ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower + @ order bits, which can be patched into imm8 directly (and i:imm3 + @ cleared) + @ + @ 15 11 10 9 5 0 15 14 12 11 8 7 0 + @ +-----------+---+---------------------++---+------+----+------+ + @ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 | + @ +-----------+---+---------------------++---+------+----+------+ + @ moveq r0, #0x200000 @ set bit 21, mov to mvn instruction - lsls r6, #24 - beq .Lnext - clz r7, r6 - lsr r6, #24 - lsl r6, r7 - bic r6, #0x0080 - lsrs r7, #1 - orrcs r6, #0x0080 - orr r6, r6, r7, lsl #12 - orr r6, #0x4000 b .Lnext .Lloop: add r7, r4 - adds r4, #4 - ldrh ip, [r7, #2] -ARM_BE8(rev16 ip, ip) - tst ip, #0x4000 - and ip, #0x8f00 - orrne ip, r6 @ mask in offset bits 31-24 - orreq ip, r0 @ mask in offset bits 7-0 -ARM_BE8(rev16 ip, ip) - strh ip, [r7, #2] - bne .Lnext + adds r4, #4 @ clears Z flag +#ifdef CONFIG_ARM_LPAE ldrh ip, [r7] ARM_BE8(rev16 ip, ip) - bic ip, #0x20 - orr ip, ip, r0, lsr #16 + tst ip, #0x200 @ MOVW has bit 9 set, MVN has it clear + bne 0f @ skip to MOVW handling (Z flag is clear) + bic ip, #0x20 @ clear bit 5 (MVN -> MOV) + orr ip, ip, r0, lsr #16 @ MOV -> MVN if offset < 0 ARM_BE8(rev16 ip, ip) strh ip, [r7] + @ Z flag is set +0: +#endif + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + and ip, #0xf00 @ clear everything except Rd field + orreq ip, r0 @ Z flag set -> MOV/MVN -> patch in high bits + orrne ip, r6 @ Z flag clear -> MOVW -> patch in low bits +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] #else #ifdef CONFIG_CPU_ENDIAN_BE8 @ in BE8, we load data in BE, but instructions still in LE -#define PV_BIT22 0x00004000 +#define PV_BIT24 0x00000001 #define PV_IMM8_MASK 0xff000000 -#define PV_ROT_MASK 0x000f0000 #else -#define PV_BIT22 0x00400000 +#define PV_BIT24 0x01000000 #define PV_IMM8_MASK 0x000000ff -#define PV_ROT_MASK 0xf00 #endif + @ + @ The ARM versions of the patchable sequences are + @ + @ phys-to-virt: sub , , #offset<31:24>, lsl #24 + @ + @ virt-to-phys (non-LPAE): add , , #offset<31:24>, lsl #24 + @ + @ virt-to-phys (LPAE): movw , #offset<31:24> + @ adds , , , lsl #24 + @ mov , #offset<39:32> + @ adc , , #0 + @ + @ In the non-LPAE case, all patchable instructions are ADD or SUB + @ instructions, where we need to patch in the offset into the + @ immediate field of the opcode, which is emitted with the correct + @ rotation value. (The effective value of the immediate is imm12<7:0> + @ rotated right by [2 * imm12<11:8>] bits) + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ ADD | cond | 0 0 1 0 1 0 0 0 | Rn | Rd | imm12 | + @ SUB | cond | 0 0 1 0 0 1 0 0 | Rn | Rd | imm12 | + @ MOV | cond | 0 0 1 1 1 0 1 0 | Rn | Rd | imm12 | + @ MVN | cond | 0 0 1 1 1 1 1 0 | Rn | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ + @ In the LPAE case, we use a MOVW instruction to carry the low offset + @ word, and patch in the high word of the offset into the immediate + @ field of the subsequent MOV instruction, or patch it to a MVN + @ instruction if the offset is negative. We can distinguish MOVW + @ instructions based on bits 23:22 of the opcode, and ADD/SUB can be + @ distinguished from MOV/MVN (all using the encodings above) using + @ bit 24. + @ + @ 31 28 27 23 22 20 19 16 15 12 11 0 + @ +------+-----------------+------+------+-------+ + @ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 | Rd | imm12 | + @ +------+-----------------+------+------+-------+ + @ moveq r0, #0x400000 @ set bit 22, mov to mvn instruction b .Lnext .Lloop: ldr ip, [r7, r4] +#ifdef CONFIG_ARM_LPAE + tst ip, #PV_BIT24 @ ADD/SUB have bit 24 clear + beq 1f +ARM_BE8(rev ip, ip) + tst ip, #0xc00000 @ MOVW has bits 23:22 clear + bic ip, ip, #0x400000 @ clear bit 22 + bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction + orreq ip, ip, r6 @ MOVW -> mask in offset bits 31-24 + orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) +ARM_BE8(rev ip, ip) + b 2f +1: +#endif bic ip, ip, #PV_IMM8_MASK - tst ip, #PV_ROT_MASK @ check the rotation field - orrne ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 - biceq ip, ip, #PV_BIT22 @ clear bit 22 - orreq ip, ip, r0 ARM_BE8(, ror #8) @ mask in offset bits 7-0 (or bit 22) + orr ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 +2: str ip, [r7, r4] add r4, r4, #4 #endif -- cgit v1.2.3 From 9443076e4330a14ae2c6114307668b98a8293b77 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 18 Sep 2020 11:55:42 +0300 Subject: ARM: p2v: reduce p2v alignment requirement to 2 MiB The ARM kernel's linear map starts at PAGE_OFFSET, which maps to a physical address (PHYS_OFFSET) that is platform specific, and is discovered at boot. Since we don't want to slow down translations between physical and virtual addresses by keeping the offset in a variable in memory, we implement this by patching the code performing the translation, and putting the offset between PAGE_OFFSET and the start of physical RAM directly into the instruction opcodes. As we only patch up to 8 bits of offset, yielding 4 GiB >> 8 == 16 MiB of granularity, we have to round up PHYS_OFFSET to the next multiple if the start of physical RAM is not a multiple of 16 MiB. This wastes some physical RAM, since the memory that was skipped will now live below PAGE_OFFSET, making it inaccessible to the kernel. We can improve this by changing the patchable sequences and the patching logic to carry more bits of offset: 11 bits gives us 4 GiB >> 11 == 2 MiB of granularity, and so we will never waste more than that amount by rounding up the physical start of DRAM to the next multiple of 2 MiB. (Note that 2 MiB granularity guarantees that the linear mapping can be created efficiently, whereas less than 2 MiB may result in the linear mapping needing another level of page tables) This helps Zhen Lei's scenario, where the start of DRAM is known to be occupied. It also helps EFI boot, which relies on the firmware's page allocator to allocate space for the decompressed kernel as low as possible. And if the KASLR patches ever land for 32-bit, it will give us 3 more bits of randomization of the placement of the kernel inside the linear region. For the ARM code path, it simply comes down to using two add/sub instructions instead of one for the carryless version, and patching each of them with the correct immediate depending on the rotation field. For the LPAE calculation, which has to deal with a carry, it patches the MOVW instruction with up to 12 bits of offset (but we only need 11 bits anyway) For the Thumb2 code path, patching more than 11 bits of displacement would be somewhat cumbersome, but the 11 bits we need fit nicely into the second word of the u16[2] opcode, so we simply update the immediate assignment and the left shift to create an addend of the right magnitude. Suggested-by: Zhen Lei Acked-by: Nicolas Pitre Acked-by: Linus Walleij Signed-off-by: Ard Biesheuvel --- arch/arm/Kconfig | 2 +- arch/arm/include/asm/memory.h | 13 ++++++++----- arch/arm/kernel/phys2virt.S | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 20 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index fe2f17eb2b50..d8d62b8e72e4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -243,7 +243,7 @@ config ARM_PATCH_PHYS_VIRT kernel in system memory. This can only be used with non-XIP MMU kernels where the base - of physical memory is at a 16MB boundary. + of physical memory is at a 2 MiB boundary. Only disable this option if you know that you do not require this feature (eg, building a kernel for a single machine) and diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index ccf55cef6ab9..2611be35f26b 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -173,6 +173,7 @@ extern unsigned long vectors_base; * so that all we need to do is modify the 8-bit constant field. */ #define __PV_BITS_31_24 0x81000000 +#define __PV_BITS_23_16 0x810000 #define __PV_BITS_7_0 0x81 extern unsigned long __pv_phys_pfn_offset; @@ -187,16 +188,18 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "1: " instr " %0, %1, %2\n" \ + "2: " instr " %0, %0, %3\n" \ " .pushsection .pv_table,\"a\"\n" \ - " .long 1b - .\n" \ + " .long 1b - ., 2b - .\n" \ " .popsection\n" \ : "=r" (to) \ - : "r" (from), "I" (__PV_BITS_31_24)) + : "r" (from), "I" (__PV_BITS_31_24), \ + "I"(__PV_BITS_23_16)) #define __pv_add_carry_stub(x, y) \ __asm__("@ __pv_add_carry_stub\n" \ "0: movw %R0, #0\n" \ - " adds %Q0, %1, %R0, lsl #24\n" \ + " adds %Q0, %1, %R0, lsl #20\n" \ "1: mov %R0, %2\n" \ " adc %R0, %R0, #0\n" \ " .pushsection .pv_table,\"a\"\n" \ @@ -210,7 +213,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_stub(from,to,instr) \ __asm__("@ __pv_stub\n" \ "0: movw %0, #0\n" \ - " lsl %0, #24\n" \ + " lsl %0, #21\n" \ " " instr " %0, %1, %0\n" \ " .pushsection .pv_table,\"a\"\n" \ " .long 0b - .\n" \ @@ -221,7 +224,7 @@ extern const void *__pv_table_begin, *__pv_table_end; #define __pv_add_carry_stub(x, y) \ __asm__("@ __pv_add_carry_stub\n" \ "0: movw %R0, #0\n" \ - " lsls %R0, #24\n" \ + " lsls %R0, #21\n" \ " adds %Q0, %1, %R0\n" \ "1: mvn %R0, #0\n" \ " adc %R0, %R0, #0\n" \ diff --git a/arch/arm/kernel/phys2virt.S b/arch/arm/kernel/phys2virt.S index a4e364689663..fb53db78fe78 100644 --- a/arch/arm/kernel/phys2virt.S +++ b/arch/arm/kernel/phys2virt.S @@ -21,7 +21,7 @@ /* * __fixup_pv_table - patch the stub instructions with the delta between * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be - * 16MiB aligned. + * 2 MiB aligned. * * Called from head.S, which expects the following registers to be preserved: * r1 = machine no, r2 = atags or dtb, @@ -38,8 +38,8 @@ ENTRY(__fixup_pv_table) strcc ip, [r0, #HIGH_OFFSET] @ save to __pv_offset high bits str r3, [r0, #LOW_OFFSET] @ save to __pv_offset low bits - mov r0, r3, lsr #24 @ constant for add/sub instructions - teq r3, r0, lsl #24 @ must be 16MiB aligned + mov r0, r3, lsr #21 @ constant for add/sub instructions + teq r3, r0, lsl #21 @ must be 2 MiB aligned bne 0f adr_l r4, __pv_table_begin @@ -55,22 +55,21 @@ __fixup_a_pv_table: adr_l r6, __pv_offset ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word - mov r6, r6, lsr #24 cmn r0, #1 #ifdef CONFIG_THUMB2_KERNEL @ @ The Thumb-2 versions of the patchable sequences are @ - @ phys-to-virt: movw , #offset<31:24> - @ lsl , #24 + @ phys-to-virt: movw , #offset<31:21> + @ lsl , #21 @ sub , , @ - @ virt-to-phys (non-LPAE): movw , #offset<31:24> - @ lsl , #24 + @ virt-to-phys (non-LPAE): movw , #offset<31:21> + @ lsl , #21 @ add , , @ - @ virt-to-phys (LPAE): movw , #offset<31:24> - @ lsl , #24 + @ virt-to-phys (LPAE): movw , #offset<31:21> + @ lsl , #21 @ adds , , @ mov , #offset<39:32> @ adc , , #0 @@ -102,6 +101,9 @@ __fixup_a_pv_table: @ +-----------+---+---------------------++---+------+----+------+ @ moveq r0, #0x200000 @ set bit 21, mov to mvn instruction + lsrs r3, r6, #29 @ isolate top 3 bits of displacement + ubfx r6, r6, #21, #8 @ put bits 28:21 into the MOVW imm8 field + bfi r6, r3, #12, #3 @ put bits 31:29 into the MOVW imm3 field b .Lnext .Lloop: add r7, r4 adds r4, #4 @ clears Z flag @@ -129,20 +131,24 @@ ARM_BE8(rev16 ip, ip) @ in BE8, we load data in BE, but instructions still in LE #define PV_BIT24 0x00000001 #define PV_IMM8_MASK 0xff000000 +#define PV_IMMR_MSB 0x00080000 #else #define PV_BIT24 0x01000000 #define PV_IMM8_MASK 0x000000ff +#define PV_IMMR_MSB 0x00000800 #endif @ @ The ARM versions of the patchable sequences are @ @ phys-to-virt: sub , , #offset<31:24>, lsl #24 + @ sub , , #offset<23:16>, lsl #16 @ @ virt-to-phys (non-LPAE): add , , #offset<31:24>, lsl #24 + @ add , , #offset<23:16>, lsl #16 @ - @ virt-to-phys (LPAE): movw , #offset<31:24> - @ adds , , , lsl #24 + @ virt-to-phys (LPAE): movw , #offset<31:20> + @ adds , , , lsl #20 @ mov , #offset<39:32> @ adc , , #0 @ @@ -174,6 +180,9 @@ ARM_BE8(rev16 ip, ip) @ +------+-----------------+------+------+-------+ @ moveq r0, #0x400000 @ set bit 22, mov to mvn instruction + mov r3, r6, lsr #16 @ put offset bits 31-16 into r3 + mov r6, r6, lsr #24 @ put offset bits 31-24 into r6 + and r3, r3, #0xf0 @ only keep offset bits 23-20 in r3 b .Lnext .Lloop: ldr ip, [r7, r4] #ifdef CONFIG_ARM_LPAE @@ -183,14 +192,17 @@ ARM_BE8(rev ip, ip) tst ip, #0xc00000 @ MOVW has bits 23:22 clear bic ip, ip, #0x400000 @ clear bit 22 bfc ip, #0, #12 @ clear imm12 field of MOV[W] instruction - orreq ip, ip, r6 @ MOVW -> mask in offset bits 31-24 + orreq ip, ip, r6, lsl #4 @ MOVW -> mask in offset bits 31-24 + orreq ip, ip, r3, lsr #4 @ MOVW -> mask in offset bits 23-20 orrne ip, ip, r0 @ MOV -> mask in offset bits 7-0 (or bit 22) ARM_BE8(rev ip, ip) b 2f 1: #endif + tst ip, #PV_IMMR_MSB @ rotation value >= 16 ? bic ip, ip, #PV_IMM8_MASK - orr ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + orreq ip, ip, r6 ARM_BE8(, lsl #24) @ mask in offset bits 31-24 + orrne ip, ip, r3 ARM_BE8(, lsl #24) @ mask in offset bits 23-20 2: str ip, [r7, r4] add r4, r4, #4 -- cgit v1.2.3 From 67e3f828bd4bf5e4eb4214dc4eb227d8f1c8a877 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 12:28:01 +0300 Subject: ARM: efistub: replace adrl pseudo-op with adr_l macro invocation The ARM 'adrl' pseudo instruction is a bit problematic, as it does not exist in Thumb mode, and it is not implemented by Clang either. Since the Thumb variant has a slightly bigger range, it is sometimes necessary to emit the 'adrl' variant in ARM mode where Thumb mode can use adr just fine. However, that still leaves the Clang issue, which does not appear to be supporting this any time soon. So let's switch to the adr_l macro, which works for both ARM and Thumb, and has unlimited range. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/boot/compressed/head.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b5446..5b591dacbaaf 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1440,8 +1440,7 @@ ENTRY(efi_enter_kernel) mov r4, r0 @ preserve image base mov r8, r1 @ preserve DT pointer - ARM( adrl r0, call_cache_fn ) - THUMB( adr r0, call_cache_fn ) + adr_l r0, call_cache_fn adr r1, 0f @ clean the region of code we bl cache_clean_flush @ may run with the MMU off -- cgit v1.2.3 From 62c4a2e202b18e1d7176875b7e7af240f340596b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:06 +0300 Subject: ARM: head-common.S: use PC-relative insn sequence for __proc_info Replace the open coded PC relative offset calculations with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head-common.S | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 4a3982812a40..9a5ab6c19568 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -170,11 +170,12 @@ ENDPROC(lookup_processor_type) * r9 = cpuid (preserved) */ __lookup_processor_type: - adr r3, __lookup_processor_type_data - ldmia r3, {r4 - r6} - sub r3, r3, r4 @ get offset between virt&phys - add r5, r5, r3 @ convert virt addresses to - add r6, r6, r3 @ physical address space + /* + * Look in for information about the __proc_info + * structure. + */ + adr_l r5, __proc_info_begin + adr_l r6, __proc_info_end 1: ldmia r5, {r3, r4} @ value, mask and r4, r4, r9 @ mask wanted bits teq r3, r4 @@ -186,17 +187,6 @@ __lookup_processor_type: 2: ret lr ENDPROC(__lookup_processor_type) -/* - * Look in for information about the __proc_info structure. - */ - .align 2 - .type __lookup_processor_type_data, %object -__lookup_processor_type_data: - .long . - .long __proc_info_begin - .long __proc_info_end - .size __lookup_processor_type_data, . - __lookup_processor_type_data - __error_lpae: #ifdef CONFIG_DEBUG_LL adr r0, str_lpae -- cgit v1.2.3 From 172c34c9ff0144c3e1d96a9b54d6fecfe5d17c3c Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:16 +0300 Subject: ARM: head-common.S: use PC-relative insn sequence for idmap creation Replace the open coded PC relative offset calculations involving __turn_mmu_on and __turn_mmu_on_end with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 7e3f36809011..f5a636fee9d0 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -224,11 +224,8 @@ __create_page_tables: * Create identity mapping to cater for __enable_mmu. * This identity mapping will be removed by paging_init(). */ - adr r0, __turn_mmu_on_loc - ldmia r0, {r3, r5, r6} - sub r0, r0, r3 @ virt->phys offset - add r5, r5, r0 @ phys __turn_mmu_on - add r6, r6, r0 @ phys __turn_mmu_on_end + adr_l r5, __turn_mmu_on @ _pa(__turn_mmu_on) + adr_l r6, __turn_mmu_on_end @ _pa(__turn_mmu_on_end) mov r5, r5, lsr #SECTION_SHIFT mov r6, r6, lsr #SECTION_SHIFT @@ -351,11 +348,6 @@ __create_page_tables: ret lr ENDPROC(__create_page_tables) .ltorg - .align -__turn_mmu_on_loc: - .long . - .long __turn_mmu_on - .long __turn_mmu_on_end #if defined(CONFIG_SMP) .text -- cgit v1.2.3 From 91580f0dbf24c6d616091526a900213bc7aa48fe Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:23 +0300 Subject: ARM: head.S: use PC-relative insn sequence for secondary_data Replace the open coded PC relative offset calculations with adr_l and ldr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Note that it also removes a stale comment about the contents of r6. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index f5a636fee9d0..478506b2d51f 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -383,10 +383,8 @@ ENTRY(secondary_startup) /* * Use the page tables supplied from __cpu_up. */ - adr r4, __secondary_data - ldmia r4, {r5, r7, r12} @ address to jump to after - sub lr, r4, r5 @ mmu has been enabled - add r3, r7, lr + adr_l r3, secondary_data + mov_l r12, __secondary_switched ldrd r4, r5, [r3, #0] @ get secondary_data.pgdir ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps @@ -401,22 +399,13 @@ ARM_BE8(eor r4, r4, r5) @ without using a temp reg. ENDPROC(secondary_startup) ENDPROC(secondary_startup_arm) - /* - * r6 = &secondary_data - */ ENTRY(__secondary_switched) - ldr sp, [r7, #12] @ get secondary_data.stack + ldr_l r7, secondary_data + 12 @ get secondary_data.stack + mov sp, r7 mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) - .align - - .type __secondary_data, %object -__secondary_data: - .long . - .long secondary_data - .long __secondary_switched #endif /* defined(CONFIG_SMP) */ -- cgit v1.2.3 From 450abd38fe6c6313ce9bdd9dce81c1dd604f6fb0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:48:20 +0300 Subject: ARM: kernel: use relative references for UP/SMP alternatives Currently, the .alt.smp.init section contains the virtual addresses of the patch sites. Since patching may occur both before and after switching into virtual mode, this requires some manual handling of the address when applying the UP alternative. Let's simplify this by using relative offsets in the table entries: this allows us to simply add each entry's address to its contents, regardless of whether we are running in virtual mode or not. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/include/asm/assembler.h | 4 ++-- arch/arm/include/asm/processor.h | 2 +- arch/arm/kernel/head.S | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 72627c5fb3b2..6ed30421f697 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -259,7 +259,7 @@ */ #define ALT_UP(instr...) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ 9997: instr ;\ .if . - 9997b == 2 ;\ nop ;\ @@ -270,7 +270,7 @@ .popsection #define ALT_UP_B(label) \ .pushsection ".alt.smp.init", "a" ;\ - .long 9998b ;\ + .long 9998b - . ;\ W(b) . + (label - 9998b) ;\ .popsection #else diff --git a/arch/arm/include/asm/processor.h b/arch/arm/include/asm/processor.h index b9241051e5cb..9e6b97286307 100644 --- a/arch/arm/include/asm/processor.h +++ b/arch/arm/include/asm/processor.h @@ -96,7 +96,7 @@ unsigned long get_wchan(struct task_struct *p); #define __ALT_SMP_ASM(smp, up) \ "9998: " smp "\n" \ " .pushsection \".alt.smp.init\", \"a\"\n" \ - " .long 9998b\n" \ + " .long 9998b - .\n" \ " " up "\n" \ " .popsection\n" #else diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 478506b2d51f..cdc79fcee43e 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -546,14 +546,15 @@ smp_on_up: __do_fixup_smp_on_up: cmp r4, r5 reths lr - ldmia r4!, {r0, r6} - ARM( str r6, [r0, r3] ) - THUMB( add r0, r0, r3 ) + ldmia r4, {r0, r6} + ARM( str r6, [r0, r4] ) + THUMB( add r0, r0, r4 ) + add r4, r4, #8 #ifdef __ARMEB__ THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. #endif THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords - THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r0. THUMB( strh r6, [r0] ) b __do_fixup_smp_on_up ENDPROC(__do_fixup_smp_on_up) @@ -562,7 +563,6 @@ ENTRY(fixup_smp) stmfd sp!, {r4 - r6, lr} mov r4, r0 add r5, r0, r1 - mov r3, #0 bl __do_fixup_smp_on_up ldmfd sp!, {r4 - r6, pc} ENDPROC(fixup_smp) -- cgit v1.2.3 From 59d2f2827dfdccf8911d5e51465136b52ba623c4 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:29 +0300 Subject: ARM: head: use PC-relative insn sequence for __smp_alt Now that calling __do_fixup_smp_on_up() can be done without passing the physical-to-virtual offset in r3, we can replace the open coded PC relative offset calculations with a pair of adr_l invocations. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index cdc79fcee43e..5e031a0bf9a9 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -520,19 +520,11 @@ ARM_BE8(rev r0, r0) @ byteswap if big endian retne lr __fixup_smp_on_up: - adr r0, 1f - ldmia r0, {r3 - r5} - sub r3, r0, r3 - add r4, r4, r3 - add r5, r5, r3 + adr_l r4, __smpalt_begin + adr_l r5, __smpalt_end b __do_fixup_smp_on_up ENDPROC(__fixup_smp) - .align -1: .word . - .word __smpalt_begin - .word __smpalt_end - .pushsection .data .align 2 .globl smp_on_up -- cgit v1.2.3 From d74d2b225018baa0e04e080ee9e80b21667ba3a2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:34 +0300 Subject: ARM: sleep.S: use PC-relative insn sequence for sleep_save_sp/mpidr_hash Replace the open coded PC relative offset calculations with adr_l and ldr_l invocations. This removes some open coded PC relative arithmetic, avoids literal pools on v7+, and slightly reduces the footprint of the code. Note that ALT_SMP() expects a single instruction so move the macro invocation after it. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/sleep.S | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S index 5dc8b80bb693..43077e11dafd 100644 --- a/arch/arm/kernel/sleep.S +++ b/arch/arm/kernel/sleep.S @@ -72,8 +72,9 @@ ENTRY(__cpu_suspend) ldr r3, =sleep_save_sp stmfd sp!, {r0, r1} @ save suspend func arg and pointer ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] - ALT_SMP(ldr r0, =mpidr_hash) + ALT_SMP(W(nop)) @ don't use adr_l inside ALT_SMP() ALT_UP_B(1f) + adr_l r0, mpidr_hash /* This ldmia relies on the memory layout of the mpidr_hash struct */ ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts compute_mpidr_hash r0, r6, r7, r8, r2, r1 @@ -147,9 +148,8 @@ no_hyp: mov r1, #0 ALT_SMP(mrc p15, 0, r0, c0, c0, 5) ALT_UP_B(1f) - adr r2, mpidr_hash_ptr - ldr r3, [r2] - add r2, r2, r3 @ r2 = struct mpidr_hash phys address + adr_l r2, mpidr_hash @ r2 = struct mpidr_hash phys address + /* * This ldmia relies on the memory layout of the mpidr_hash * struct mpidr_hash. @@ -157,10 +157,7 @@ no_hyp: ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts compute_mpidr_hash r1, r4, r5, r6, r0, r3 1: - adr r0, _sleep_save_sp - ldr r2, [r0] - add r0, r0, r2 - ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr_l r0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr r0, [r0, r1, lsl #2] @ load phys pgd, stack, resume fn @@ -177,12 +174,6 @@ ENDPROC(cpu_resume_arm) ENDPROC(cpu_resume_no_hyp) #endif - .align 2 -_sleep_save_sp: - .long sleep_save_sp - . -mpidr_hash_ptr: - .long mpidr_hash - . @ mpidr_hash struct offset - .data .align 2 .type sleep_save_sp, #object -- cgit v1.2.3 From 3bcf906b194cebb6817cbb2f07b69e12aa5d7f51 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:46 +0300 Subject: ARM: head.S: use PC relative insn sequence to calculate PHYS_OFFSET Replace the open coded arithmetic with a simple adr_l/sub pair. This removes some open coded arithmetic involving virtual addresses, avoids literal pools on v7+, and slightly reduces the footprint of the code. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/kernel/head.S | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 5e031a0bf9a9..ae0b08b47f52 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -103,10 +103,8 @@ ENTRY(stext) #endif #ifndef CONFIG_XIP_KERNEL - adr r3, 2f - ldmia r3, {r4, r8} - sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) - add r8, r8, r4 @ PHYS_OFFSET + adr_l r8, _text @ __pa(_text) + sub r8, r8, #TEXT_OFFSET @ PHYS_OFFSET #else ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case #endif @@ -158,10 +156,6 @@ ENTRY(stext) 1: b __enable_mmu ENDPROC(stext) .ltorg -#ifndef CONFIG_XIP_KERNEL -2: .long . - .long PAGE_OFFSET -#endif /* * Setup the initial page tables. We only setup the barest -- cgit v1.2.3 From aaac3733171fca948c4fb66b78257620e3885339 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 14 Sep 2020 11:25:52 +0300 Subject: ARM: kvm: replace open coded VA->PA calculations with adr_l call Replace the open coded calculations of the actual physical address of the KVM stub vector table with a single adr_l invocation. Reviewed-by: Nicolas Pitre Signed-off-by: Ard Biesheuvel --- arch/arm/boot/compressed/head.S | 15 ++------------- arch/arm/kernel/hyp-stub.S | 27 ++++++++++++--------------- 2 files changed, 14 insertions(+), 28 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 5b591dacbaaf..9905fb7560df 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -468,15 +468,10 @@ dtb_check_done: /* * Compute the address of the hyp vectors after relocation. - * This requires some arithmetic since we cannot directly - * reference __hyp_stub_vectors in a PC-relative way. * Call __hyp_set_vectors with the new address so that we * can HVC again after the copy. */ -0: adr r0, 0b - movw r1, #:lower16:__hyp_stub_vectors - 0b - movt r1, #:upper16:__hyp_stub_vectors - 0b - add r0, r0, r1 + adr_l r0, __hyp_stub_vectors sub r0, r0, r5 add r0, r0, r10 bl __hyp_set_vectors @@ -627,17 +622,11 @@ not_relocated: mov r0, #0 cmp r0, #HYP_MODE @ if not booted in HYP mode... bne __enter_kernel @ boot kernel directly - adr r12, .L__hyp_reentry_vectors_offset - ldr r0, [r12] - add r0, r0, r12 - + adr_l r0, __hyp_reentry_vectors bl __hyp_set_vectors __HVC(0) @ otherwise bounce to hyp mode b . @ should never be reached - - .align 2 -.L__hyp_reentry_vectors_offset: .long __hyp_reentry_vectors - . #else b __enter_kernel #endif diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 26d8e03b1dd3..103d0bdb2b7e 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -24,41 +24,38 @@ ENTRY(__boot_cpu_mode) .text /* - * Save the primary CPU boot mode. Requires 3 scratch registers. + * Save the primary CPU boot mode. Requires 2 scratch registers. */ - .macro store_primary_cpu_mode reg1, reg2, reg3 + .macro store_primary_cpu_mode reg1, reg2 mrs \reg1, cpsr and \reg1, \reg1, #MODE_MASK - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - str \reg1, [\reg2, \reg3] + str_l \reg1, __boot_cpu_mode, \reg2 .endm /* * Compare the current mode with the one saved on the primary CPU. * If they don't match, record that fact. The Z bit indicates * if there's a match or not. - * Requires 3 additionnal scratch registers. + * Requires 2 additional scratch registers. */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 - adr \reg2, .L__boot_cpu_mode_offset - ldr \reg3, [\reg2] - ldr \reg1, [\reg2, \reg3] + .macro compare_cpu_mode_with_primary mode, reg1, reg2 + adr_l \reg2, __boot_cpu_mode + ldr \reg1, [\reg2] cmp \mode, \reg1 @ matches primary CPU boot mode? orrne \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH - strne \reg1, [\reg2, \reg3] @ record what happened and give up + strne \reg1, [\reg2] @ record what happened and give up .endm #else /* ZIMAGE */ - .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req + .macro store_primary_cpu_mode reg1:req, reg2:req .endm /* * The zImage loader only runs on one CPU, so we don't bother with mult-CPU * consistency checking: */ - .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + .macro compare_cpu_mode_with_primary mode, reg1, reg2 cmp \mode, \mode .endm @@ -73,7 +70,7 @@ ENTRY(__boot_cpu_mode) */ @ Call this from the primary CPU ENTRY(__hyp_stub_install) - store_primary_cpu_mode r4, r5, r6 + store_primary_cpu_mode r4, r5 ENDPROC(__hyp_stub_install) @ fall through... @@ -87,7 +84,7 @@ ENTRY(__hyp_stub_install_secondary) * If the secondary has booted with a different mode, give up * immediately. */ - compare_cpu_mode_with_primary r4, r5, r6, r7 + compare_cpu_mode_with_primary r4, r5, r6 retne lr /* -- cgit v1.2.3 From 735e8d93dc2b107f7891a9c2b1c4cfbea1fcbbbc Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 6 Nov 2020 21:46:11 +0100 Subject: ARM: 9022/1: Change arch/arm/lib/mem*.S to use WEAK instead of .weak Commit d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for KASan") add .weak directives to memcpy/memmove/memset to avoid collision with KASAN interceptors. This does not work with LLVM's integrated assembler (the assembly snippet `.weak memcpy ... .globl memcpy` produces a STB_GLOBAL memcpy while GNU as produces a STB_WEAK memcpy). LLVM 12 (since https://reviews.llvm.org/D90108) will error on such an overridden symbol binding. Use the appropriate WEAK macro instead. Link: https://github.com/ClangBuiltLinux/linux/issues/1190 -- Fixes: d6d51a96c7d6 ("ARM: 9014/2: Replace string mem* functions for KASan") Reported-by: Nick Desaulniers Signed-off-by: Fangrui Song Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Russell King --- arch/arm/lib/memcpy.S | 3 +-- arch/arm/lib/memmove.S | 3 +-- arch/arm/lib/memset.S | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index ad4625d16e11..e4caf48c089f 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -58,10 +58,9 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ -.weak memcpy ENTRY(__memcpy) ENTRY(mmiocpy) -ENTRY(memcpy) +WEAK(memcpy) #include "copy_template.S" diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S index fd123ea5a5a4..6fecc12a1f51 100644 --- a/arch/arm/lib/memmove.S +++ b/arch/arm/lib/memmove.S @@ -24,9 +24,8 @@ * occurring in the opposite direction. */ -.weak memmove ENTRY(__memmove) -ENTRY(memmove) +WEAK(memmove) UNWIND( .fnstart ) subs ip, r0, r1 diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 0e7ff0423f50..9817cb258c1a 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -13,10 +13,9 @@ .text .align 5 -.weak memset ENTRY(__memset) ENTRY(mmioset) -ENTRY(memset) +WEAK(memset) UNWIND( .fnstart ) ands r3, r0, #3 @ 1 unaligned? mov ip, r0 @ preserve r0 as return value -- cgit v1.2.3 From df8eda0f1f58e2419875046f57c27c4d72378575 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:59:30 +0100 Subject: ARM: 9023/1: Spelling s/mmeory/memory/ Fix a misspelling of the word "memory". Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d32f7652a5bf..f9de7658d9b3 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1136,7 +1136,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* * Make sure the calculation for lowmem/highmem is set appropriately - * before reserving/allocating any mmeory + * before reserving/allocating any memory */ adjust_lowmem_bounds(); arm_memblock_init(mdesc); -- cgit v1.2.3 From 730b5764ea8526e48bdb85a24ed96d62de435940 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Nov 2020 16:58:41 +0100 Subject: ARM: 9024/1: Drop useless cast of "u64" to "long long" As "u64" is equivalent to "unsigned long long", there is no need to cast a "u64" parameter for printing it using the "0x%08llx" format specifier. Signed-off-by: Geert Uytterhoeven Signed-off-by: Russell King --- arch/arm/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index f9de7658d9b3..1a5edf562e85 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -765,7 +765,7 @@ int __init arm_add_memory(u64 start, u64 size) #ifndef CONFIG_PHYS_ADDR_T_64BIT if (aligned_start > ULONG_MAX) { pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n", - (long long)start); + start); return -EINVAL; } -- cgit v1.2.3 From 28187dc8ebd938d574edfc6d9e0f9c51c21ff3f4 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 17 Nov 2020 00:46:39 +0100 Subject: ARM: 9025/1: Kconfig: CPU_BIG_ENDIAN depends on !LD_IS_LLD LLD does not yet support any big endian architectures. Make this config non-selectable when using LLD until LLD is fixed. Link: https://github.com/ClangBuiltLinux/linux/issues/965 Signed-off-by: Nick Desaulniers Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Reported-by: kbuild test robot Signed-off-by: Russell King --- arch/arm/mm/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm') diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 65e4482e3849..02692fbe2db5 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -743,6 +743,7 @@ config SWP_EMULATE config CPU_BIG_ENDIAN bool "Build big-endian kernel" depends on ARCH_SUPPORTS_BIG_ENDIAN + depends on !LD_IS_LLD help Say Y if you plan on running a kernel in big-endian mode. Note that your board must be properly built and your board -- cgit v1.2.3 From 331b9d02d77e0e33f273d8328d9f5453efddd926 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Tue, 17 Nov 2020 00:49:25 +0100 Subject: ARM: 9026/1: unwind: remove old check for GCC <= 4.2 Since commit 0bddd227f3dc ("Documentation: update for gcc 4.9 requirement") the minimum supported version of GCC is gcc-4.9. It's now safe to remove this code. Link: https://github.com/ClangBuiltLinux/linux/issues/427 Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Russell King --- arch/arm/kernel/unwind.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index f35eb584a18a..59fdf257bf8b 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -18,9 +18,6 @@ #warning Your compiler does not have EABI support. #warning ARM unwind is known to compile only with EABI compilers. #warning Change compiler or disable ARM_UNWIND option. -#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) && !defined(__clang__) -#warning Your compiler is too buggy; it is known to not compile ARM unwind support. -#warning Change compiler or disable ARM_UNWIND option. #endif #endif /* __CHECKER__ */ -- cgit v1.2.3 From 4d576cab16f57e1f87978f6997a725179398341e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 17 Nov 2020 10:23:28 +0100 Subject: ARM: 9028/1: disable KASAN in call stack capturing routines KASAN uses the routines in stacktrace.c to capture the call stack each time memory gets allocated or freed. Some of these routines are also used to log CPU and memory context when exceptions are taken, and so in some cases, memory accesses may be made that are not strictly in line with the KASAN constraints, and may therefore trigger false KASAN positives. So follow the example set by other architectures, and simply disable KASAN instrumentation for these routines. Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 89e5d864e923..f6431b46866e 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -21,6 +21,9 @@ obj-y := elf.o entry-common.o irq.o opcodes.o \ setup.o signal.o sigreturn_codes.o \ stacktrace.o sys_arm.o time.o traps.o +KASAN_SANITIZE_stacktrace.o := n +KASAN_SANITIZE_traps.o := n + ifneq ($(CONFIG_ARM_UNWIND),y) obj-$(CONFIG_FRAME_POINTER) += return_address.o endif -- cgit v1.2.3 From 3c9f5708b7aed6a963e2aefccbd1854802de163e Mon Sep 17 00:00:00 2001 From: Jian Cai Date: Tue, 17 Nov 2020 23:11:36 +0100 Subject: ARM: 9029/1: Make iwmmxt.S support Clang's integrated assembler This patch replaces 6 IWMMXT instructions Clang's integrated assembler does not support in iwmmxt.S using macros, while making sure GNU assembler still emit the same instructions. This should be easier than providing full IWMMXT support in Clang. This is one of the last bits of kernel code that could be compiled but not assembled with clang. Once all of it works with IAS, we no longer need to special-case 32-bit Arm in Kbuild, or turn off CONFIG_IWMMXT when build-testing. "Intel Wireless MMX Technology - Developer Guide - August, 2002" should be referenced for the encoding schemes of these extensions. Link: https://github.com/ClangBuiltLinux/linux/issues/975 Suggested-by: Nick Desaulniers Suggested-by: Ard Biesheuvel Acked-by: Ard Biesheuvel Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Jian Cai Signed-off-by: Russell King --- arch/arm/kernel/iwmmxt.S | 89 ++++++++++++++++++++++++------------------------ arch/arm/kernel/iwmmxt.h | 47 +++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 44 deletions(-) create mode 100644 arch/arm/kernel/iwmmxt.h (limited to 'arch/arm') diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index 0dcae787b004..d2b4ac06e4ed 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -16,6 +16,7 @@ #include #include #include +#include "iwmmxt.h" #if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) #define PJ4(code...) code @@ -113,33 +114,33 @@ concan_save: concan_dump: - wstrw wCSSF, [r1, #MMX_WCSSF] - wstrw wCASF, [r1, #MMX_WCASF] - wstrw wCGR0, [r1, #MMX_WCGR0] - wstrw wCGR1, [r1, #MMX_WCGR1] - wstrw wCGR2, [r1, #MMX_WCGR2] - wstrw wCGR3, [r1, #MMX_WCGR3] + wstrw wCSSF, r1, MMX_WCSSF + wstrw wCASF, r1, MMX_WCASF + wstrw wCGR0, r1, MMX_WCGR0 + wstrw wCGR1, r1, MMX_WCGR1 + wstrw wCGR2, r1, MMX_WCGR2 + wstrw wCGR3, r1, MMX_WCGR3 1: @ MUP? wRn tst r2, #0x2 beq 2f - wstrd wR0, [r1, #MMX_WR0] - wstrd wR1, [r1, #MMX_WR1] - wstrd wR2, [r1, #MMX_WR2] - wstrd wR3, [r1, #MMX_WR3] - wstrd wR4, [r1, #MMX_WR4] - wstrd wR5, [r1, #MMX_WR5] - wstrd wR6, [r1, #MMX_WR6] - wstrd wR7, [r1, #MMX_WR7] - wstrd wR8, [r1, #MMX_WR8] - wstrd wR9, [r1, #MMX_WR9] - wstrd wR10, [r1, #MMX_WR10] - wstrd wR11, [r1, #MMX_WR11] - wstrd wR12, [r1, #MMX_WR12] - wstrd wR13, [r1, #MMX_WR13] - wstrd wR14, [r1, #MMX_WR14] - wstrd wR15, [r1, #MMX_WR15] + wstrd wR0, r1, MMX_WR0 + wstrd wR1, r1, MMX_WR1 + wstrd wR2, r1, MMX_WR2 + wstrd wR3, r1, MMX_WR3 + wstrd wR4, r1, MMX_WR4 + wstrd wR5, r1, MMX_WR5 + wstrd wR6, r1, MMX_WR6 + wstrd wR7, r1, MMX_WR7 + wstrd wR8, r1, MMX_WR8 + wstrd wR9, r1, MMX_WR9 + wstrd wR10, r1, MMX_WR10 + wstrd wR11, r1, MMX_WR11 + wstrd wR12, r1, MMX_WR12 + wstrd wR13, r1, MMX_WR13 + wstrd wR14, r1, MMX_WR14 + wstrd wR15, r1, MMX_WR15 2: teq r0, #0 @ anything to load? reteq lr @ if not, return @@ -147,30 +148,30 @@ concan_dump: concan_load: @ Load wRn - wldrd wR0, [r0, #MMX_WR0] - wldrd wR1, [r0, #MMX_WR1] - wldrd wR2, [r0, #MMX_WR2] - wldrd wR3, [r0, #MMX_WR3] - wldrd wR4, [r0, #MMX_WR4] - wldrd wR5, [r0, #MMX_WR5] - wldrd wR6, [r0, #MMX_WR6] - wldrd wR7, [r0, #MMX_WR7] - wldrd wR8, [r0, #MMX_WR8] - wldrd wR9, [r0, #MMX_WR9] - wldrd wR10, [r0, #MMX_WR10] - wldrd wR11, [r0, #MMX_WR11] - wldrd wR12, [r0, #MMX_WR12] - wldrd wR13, [r0, #MMX_WR13] - wldrd wR14, [r0, #MMX_WR14] - wldrd wR15, [r0, #MMX_WR15] + wldrd wR0, r0, MMX_WR0 + wldrd wR1, r0, MMX_WR1 + wldrd wR2, r0, MMX_WR2 + wldrd wR3, r0, MMX_WR3 + wldrd wR4, r0, MMX_WR4 + wldrd wR5, r0, MMX_WR5 + wldrd wR6, r0, MMX_WR6 + wldrd wR7, r0, MMX_WR7 + wldrd wR8, r0, MMX_WR8 + wldrd wR9, r0, MMX_WR9 + wldrd wR10, r0, MMX_WR10 + wldrd wR11, r0, MMX_WR11 + wldrd wR12, r0, MMX_WR12 + wldrd wR13, r0, MMX_WR13 + wldrd wR14, r0, MMX_WR14 + wldrd wR15, r0, MMX_WR15 @ Load wCx - wldrw wCSSF, [r0, #MMX_WCSSF] - wldrw wCASF, [r0, #MMX_WCASF] - wldrw wCGR0, [r0, #MMX_WCGR0] - wldrw wCGR1, [r0, #MMX_WCGR1] - wldrw wCGR2, [r0, #MMX_WCGR2] - wldrw wCGR3, [r0, #MMX_WCGR3] + wldrw wCSSF, r0, MMX_WCSSF + wldrw wCASF, r0, MMX_WCASF + wldrw wCGR0, r0, MMX_WCGR0 + wldrw wCGR1, r0, MMX_WCGR1 + wldrw wCGR2, r0, MMX_WCGR2 + wldrw wCGR3, r0, MMX_WCGR3 @ clear CUP/MUP (only if r1 != 0) teq r1, #0 diff --git a/arch/arm/kernel/iwmmxt.h b/arch/arm/kernel/iwmmxt.h new file mode 100644 index 000000000000..fb627286f5bb --- /dev/null +++ b/arch/arm/kernel/iwmmxt.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __IWMMXT_H__ +#define __IWMMXT_H__ + +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 +.set .LwR\b, \b +.set .Lr\b, \b +.endr + +.set .LwCSSF, 0x2 +.set .LwCASF, 0x3 +.set .LwCGR0, 0x8 +.set .LwCGR1, 0x9 +.set .LwCGR2, 0xa +.set .LwCGR3, 0xb + +.macro wldrd, reg:req, base:req, offset:req +.inst 0xedd00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wldrw, reg:req, base:req, offset:req +.inst 0xfd900100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrd, reg:req, base:req, offset:req +.inst 0xedc00100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +.macro wstrw, reg:req, base:req, offset:req +.inst 0xfd800100 | (.L\reg << 12) | (.L\base << 16) | (\offset >> 2) +.endm + +#ifdef __clang__ + +#define wCon c1 + +.macro tmrc, dest:req, control:req +mrc p1, 0, \dest, \control, c0, 0 +.endm + +.macro tmcr, control:req, src:req +mcr p1, 0, \src, \control, c0, 0 +.endm +#endif + +#endif -- cgit v1.2.3 From f77ac2e378be9dd61eb88728f0840642f045d9d1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 19 Nov 2020 18:09:16 +0100 Subject: ARM: 9030/1: entry: omit FP emulation for UND exceptions taken in kernel mode There are a couple of problems with the exception entry code that deals with FP exceptions (which are reported as UND exceptions) when building the kernel in Thumb2 mode: - the conditional branch to vfp_kmode_exception in vfp_support_entry() may be out of range for its target, depending on how the linker decides to arrange the sections; - when the UND exception is taken in kernel mode, the emulation handling logic is entered via the 'call_fpe' label, which means we end up using the wrong value/mask pairs to match and detect the NEON opcodes. Since UND exceptions in kernel mode are unlikely to occur on a hot path (as opposed to the user mode version which is invoked for VFP support code and lazy restore), we can use the existing undef hook machinery for any kernel mode instruction emulation that is needed, including calling the existing vfp_kmode_exception() routine for unexpected cases. So drop the call to call_fpe, and instead, install an undef hook that will get called for NEON and VFP instructions that trigger an UND exception in kernel mode. While at it, make sure that the PC correction is accurate for the execution mode where the exception was taken, by checking the PSR Thumb bit. Cc: Dmitry Osipenko Cc: Kees Cook Fixes: eff8728fe698 ("vmlinux.lds.h: Add PGO and AutoFDO input sections") Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Reviewed-by: Nick Desaulniers Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 25 ++-------------------- arch/arm/vfp/vfphw.S | 5 ----- arch/arm/vfp/vfpmodule.c | 49 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 49 insertions(+), 30 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c4220f51fcf3..0ea8529a4872 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -252,31 +252,10 @@ __und_svc: #else svc_entry #endif - @ - @ call emulation code, which returns using r9 if it has emulated - @ the instruction, or the more conventional lr if we are to treat - @ this as a real undefined instruction - @ - @ r0 - instruction - @ -#ifndef CONFIG_THUMB2_KERNEL - ldr r0, [r4, #-4] -#else - mov r1, #2 - ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 - cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 - blo __und_svc_fault - ldrh r9, [r4] @ bottom 16 bits - add r4, r4, #2 - str r4, [sp, #S_PC] - orr r0, r9, r0, lsl #16 -#endif - badr r9, __und_svc_finish - mov r2, r4 - bl call_fpe mov r1, #4 @ PC correction to apply -__und_svc_fault: + THUMB( tst r5, #PSR_T_BIT ) @ exception taken in Thumb mode? + THUMB( movne r1, #2 ) @ if so, fix up PC correction mov r0, sp @ struct pt_regs *regs bl __und_fault diff --git a/arch/arm/vfp/vfphw.S b/arch/arm/vfp/vfphw.S index 4fcff9f59947..d5837bf05a9a 100644 --- a/arch/arm/vfp/vfphw.S +++ b/arch/arm/vfp/vfphw.S @@ -79,11 +79,6 @@ ENTRY(vfp_support_entry) DBGSTR3 "instr %08x pc %08x state %p", r0, r2, r10 .fpu vfpv2 - ldr r3, [sp, #S_PSR] @ Neither lazy restore nor FP exceptions - and r3, r3, #MODE_MASK @ are supported in kernel mode - teq r3, #USR_MODE - bne vfp_kmode_exception @ Returns through lr - VFPFMRX r1, FPEXC @ Is the VFP enabled? DBGSTR1 "fpexc %08x", r1 tst r1, #FPEXC_EN diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 8c9e7f9f0277..c3b6451c18bd 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include "vfpinstr.h" @@ -642,7 +643,9 @@ static int vfp_starting_cpu(unsigned int unused) return 0; } -void vfp_kmode_exception(void) +#ifdef CONFIG_KERNEL_MODE_NEON + +static int vfp_kmode_exception(struct pt_regs *regs, unsigned int instr) { /* * If we reach this point, a floating point exception has been raised @@ -660,9 +663,51 @@ void vfp_kmode_exception(void) pr_crit("BUG: unsupported FP instruction in kernel mode\n"); else pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n"); + pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC)); + return 1; } -#ifdef CONFIG_KERNEL_MODE_NEON +static struct undef_hook vfp_kmode_exception_hook[] = {{ + .instr_mask = 0xfe000000, + .instr_val = 0xf2000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf4000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xef000000, + .instr_val = 0xef000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0xff100000, + .instr_val = 0xf9000000, + .cpsr_mask = MODE_MASK | PSR_T_BIT, + .cpsr_val = SVC_MODE | PSR_T_BIT, + .fn = vfp_kmode_exception, +}, { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_kmode_exception, +}}; + +static int __init vfp_kmode_exception_hook_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(vfp_kmode_exception_hook); i++) + register_undef_hook(&vfp_kmode_exception_hook[i]); + return 0; +} +core_initcall(vfp_kmode_exception_hook_init); /* * Kernel-side NEON support functions -- cgit v1.2.3 From e64ab473dddaffdfc4bd0b385204f472f2cb00d6 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 30 Nov 2020 22:42:36 +0100 Subject: ARM: 9034/1: __div64_32(): straighten up inline asm constraints The ARM version of __div64_32() encapsulates a call to __do_div64 with non-standard argument passing. In particular, __n is a 64-bit input argument assigned to r0-r1 and __rem is an output argument sharing half of that r0-r1 register pair. With __n being an input argument, the compiler is in its right to presume that r0-r1 would still hold the value of __n past the inline assembly statement. Normally, the compiler would have assigned non overlapping registers to __n and __rem if the value for __n is needed again. However, here we enforce our own register assignment and gcc fails to notice the conflict. In practice this doesn't cause any problem as __n is considered dead after the asm statement and *n is overwritten. However this is not always guaranteed and clang rightfully complains. Let's fix it properly by making __n into an input-output variable. This makes it clear that those registers representing __n have been modified. Then we can extract __rem as the high part of __n with plain C code. This asm constraint "abuse" was likely relied upon back when gcc didn't handle 64-bit values optimally. Turns out that gcc is now able to optimize things and produces the same code with this patch applied. Reported-by: Antony Yu Signed-off-by: Nicolas Pitre Reviewed-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/include/asm/div64.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/div64.h b/arch/arm/include/asm/div64.h index 898e9c78a7e7..595e538f5bfb 100644 --- a/arch/arm/include/asm/div64.h +++ b/arch/arm/include/asm/div64.h @@ -21,29 +21,20 @@ * assembly implementation with completely non standard calling convention * for arguments and results (beware). */ - -#ifdef __ARMEB__ -#define __xh "r0" -#define __xl "r1" -#else -#define __xl "r0" -#define __xh "r1" -#endif - static inline uint32_t __div64_32(uint64_t *n, uint32_t base) { register unsigned int __base asm("r4") = base; register unsigned long long __n asm("r0") = *n; register unsigned long long __res asm("r2"); - register unsigned int __rem asm(__xh); - asm( __asmeq("%0", __xh) + unsigned int __rem; + asm( __asmeq("%0", "r0") __asmeq("%1", "r2") - __asmeq("%2", "r0") - __asmeq("%3", "r4") + __asmeq("%2", "r4") "bl __do_div64" - : "=r" (__rem), "=r" (__res) - : "r" (__n), "r" (__base) + : "+r" (__n), "=r" (__res) + : "r" (__base) : "ip", "lr", "cc"); + __rem = __n >> 32; *n = __res; return __rem; } -- cgit v1.2.3 From 3cce9d44321e460e7c88cdec4e4537a6e9ad7c0d Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 19 Dec 2020 16:29:58 +0100 Subject: ARM: 9044/1: vfp: use undef hook for VFP support detection Commit f77ac2e378be9dd6 ("ARM: 9030/1: entry: omit FP emulation for UND exceptions taken in kernel mode") failed to take into account that there is in fact a case where we relied on this code path: during boot, the VFP detection code issues a read of FPSID, which will trigger an undef exception on cores that lack VFP support. So let's reinstate this logic using an undef hook which is registered only for the duration of the initcall to vpf_init(), and which sets VFP_arch to a non-zero value - as before - if no VFP support is present. Fixes: f77ac2e378be9dd6 ("ARM: 9030/1: entry: omit FP emulation for UND ...") Reported-by: "kernelci.org bot" Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/vfp/entry.S | 17 ----------------- arch/arm/vfp/vfpmodule.c | 25 ++++++++++++++++++++----- 2 files changed, 20 insertions(+), 22 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/vfp/entry.S b/arch/arm/vfp/entry.S index 0186cf9da890..27b0a1f27fbd 100644 --- a/arch/arm/vfp/entry.S +++ b/arch/arm/vfp/entry.S @@ -37,20 +37,3 @@ ENDPROC(vfp_null_entry) .align 2 .LCvfp: .word vfp_vector - -@ This code is called if the VFP does not exist. It needs to flag the -@ failure to the VFP initialisation code. - - __INIT -ENTRY(vfp_testing_entry) - dec_preempt_count_ti r10, r4 - ldr r0, VFP_arch_address - str r0, [r0] @ set to non-zero value - ret r9 @ we have handled the fault -ENDPROC(vfp_testing_entry) - - .align 2 -VFP_arch_address: - .word VFP_arch - - __FINIT diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index c3b6451c18bd..2cb355c1b5b7 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -32,7 +32,6 @@ /* * Our undef handlers (in entry.S) */ -asmlinkage void vfp_testing_entry(void); asmlinkage void vfp_support_entry(void); asmlinkage void vfp_null_entry(void); @@ -43,7 +42,7 @@ asmlinkage void (*vfp_vector)(void) = vfp_null_entry; * Used in startup: set to non-zero if VFP checks fail * After startup, holds VFP architecture */ -unsigned int VFP_arch; +static unsigned int __initdata VFP_arch; /* * The pointer to the vfpstate structure of the thread which currently @@ -437,7 +436,7 @@ static void vfp_enable(void *unused) * present on all CPUs within a SMP complex. Needs to be called prior to * vfp_init(). */ -void vfp_disable(void) +void __init vfp_disable(void) { if (VFP_arch) { pr_debug("%s: should be called prior to vfp_init\n", __func__); @@ -707,7 +706,7 @@ static int __init vfp_kmode_exception_hook_init(void) register_undef_hook(&vfp_kmode_exception_hook[i]); return 0; } -core_initcall(vfp_kmode_exception_hook_init); +subsys_initcall(vfp_kmode_exception_hook_init); /* * Kernel-side NEON support functions @@ -753,6 +752,21 @@ EXPORT_SYMBOL(kernel_neon_end); #endif /* CONFIG_KERNEL_MODE_NEON */ +static int __init vfp_detect(struct pt_regs *regs, unsigned int instr) +{ + VFP_arch = UINT_MAX; /* mark as not present */ + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook vfp_detect_hook __initdata = { + .instr_mask = 0x0c000e00, + .instr_val = 0x0c000a00, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = vfp_detect, +}; + /* * VFP support code initialisation. */ @@ -773,10 +787,11 @@ static int __init vfp_init(void) * The handler is already setup to just log calls, so * we just need to read the VFPSID register. */ - vfp_vector = vfp_testing_entry; + register_undef_hook(&vfp_detect_hook); barrier(); vfpsid = fmrx(FPSID); barrier(); + unregister_undef_hook(&vfp_detect_hook); vfp_vector = vfp_null_entry; pr_info("VFP support v0.3: "); -- cgit v1.2.3 From 6c7a6d22fcef9181239ea7248c6b0c4117b9325e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 27 Nov 2020 13:07:22 +0100 Subject: ARM: 9031/1: hyp-stub: remove unused .L__boot_cpu_mode_offset symbol Commit aaac3733171fca94 ("ARM: kvm: replace open coded VA->PA calculations with adr_l call") removed all uses of .L__boot_cpu_mode_offset, so there is no longer a need to define it. Signed-off-by: Ard Biesheuvel Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/kernel/hyp-stub.S | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S index 26d8e03b1dd3..327f63ec42c6 100644 --- a/arch/arm/kernel/hyp-stub.S +++ b/arch/arm/kernel/hyp-stub.S @@ -228,12 +228,6 @@ ENTRY(__hyp_soft_restart) ret lr ENDPROC(__hyp_soft_restart) -#ifndef ZIMAGE -.align 2 -.L__boot_cpu_mode_offset: - .long __boot_cpu_mode - . -#endif - .align 5 ENTRY(__hyp_stub_vectors) __hyp_stub_reset: W(b) . -- cgit v1.2.3 From 76460d613d9b4096f3567bd444e3fc275db1b96b Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 30 Nov 2020 10:28:20 +0100 Subject: ARM: 9032/1: arm/mm: Convert PUD level pgtable helper macros into functions Macros used as functions can be problematic from the compiler perspective. There was a build failure report caused primarily because of non reference of an argument variable. Hence convert PUD level pgtable helper macros into functions in order to avoid such problems in the future. In the process, it fixes the argument variables sequence in set_pud() which probably remained hidden for being a macro. https://lore.kernel.org/linux-mm/202011020749.5XQ3Hfzc-lkp@intel.com/ https://lore.kernel.org/linux-mm/5fa49698.Vu2O3r+dU20UoEJ+%25lkp@intel.com/ Cc: Andrew Morton Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Linus Walleij Signed-off-by: Anshuman Khandual Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 3502c2f746ca..9d4f5eef410b 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -177,11 +177,28 @@ * the pud: the pud entry is never bad, always exists, and can't be set or * cleared. */ -#define pud_none(pud) (0) -#define pud_bad(pud) (0) -#define pud_present(pud) (1) -#define pud_clear(pudp) do { } while (0) -#define set_pud(pud,pudp) do { } while (0) +static inline int pud_none(pud_t pud) +{ + return 0; +} + +static inline int pud_bad(pud_t pud) +{ + return 0; +} + +static inline int pud_present(pud_t pud) +{ + return 1; +} + +static inline void pud_clear(pud_t *pudp) +{ +} + +static inline void set_pud(pud_t *pudp, pud_t pud) +{ +} static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) { -- cgit v1.2.3 From 27bde183b0d3b0e8e84c80db1864a5c7bd20b5e7 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Mon, 30 Nov 2020 11:24:09 +0100 Subject: ARM: 9033/1: arm/smp: Drop the macro S(x,s) Mapping between IPI type index and its string is direct without requiring an additional offset. Hence the existing macro S(x, s) is now redundant and can just be dropped. This also makes the code clean and simple. Cc: Marc Zyngier Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 48099c6e1e4a..6ab2b0ad5f40 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -524,14 +524,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) } static const char *ipi_types[NR_IPI] __tracepoint_string = { -#define S(x,s) [x] = s - S(IPI_WAKEUP, "CPU wakeup interrupts"), - S(IPI_TIMER, "Timer broadcast interrupts"), - S(IPI_RESCHEDULE, "Rescheduling interrupts"), - S(IPI_CALL_FUNC, "Function call interrupts"), - S(IPI_CPU_STOP, "CPU stop interrupts"), - S(IPI_IRQ_WORK, "IRQ work interrupts"), - S(IPI_COMPLETION, "completion interrupts"), + [IPI_WAKEUP] = "CPU wakeup interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", + [IPI_RESCHEDULE] = "Rescheduling interrupts", + [IPI_CALL_FUNC] = "Function call interrupts", + [IPI_CPU_STOP] = "CPU stop interrupts", + [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_COMPLETION] = "completion interrupts", }; static void smp_cross_call(const struct cpumask *target, unsigned int ipinr); -- cgit v1.2.3 From 0557ac83fd1a0a7cd6909665bad50006507115a0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Dec 2020 10:37:47 +0100 Subject: ARM: 9035/1: uncompress: Add be32tocpu macro DTB stores all values as 32-bit big-endian integers. Add a macro to convert such values to native CPU endianness, to reduce duplication. Signed-off-by: Geert Uytterhoeven Reviewed-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 2e04ec5b5446..14d1995106c2 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -165,6 +165,16 @@ orr \res, \res, \tmp1, lsl #24 .endm + .macro be32tocpu, val, tmp +#ifndef __ARMEB__ + /* convert to little endian */ + eor \tmp, \val, \val, ror #16 + bic \tmp, \tmp, #0x00ff0000 + mov \val, \val, ror #8 + eor \val, \val, \tmp, lsr #8 +#endif + .endm + .section ".start", "ax" /* * sort out different calling conventions @@ -345,13 +355,7 @@ restart: adr r0, LC1 /* Get the initial DTB size */ ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif + be32tocpu r5, r1 dbgadtb r6, r5 /* 50% DTB growth should be good enough */ add r5, r5, r5, lsr #1 @@ -403,13 +407,7 @@ restart: adr r0, LC1 /* Get the current DTB size */ ldr r5, [r6, #4] -#ifndef __ARMEB__ - /* convert r5 (dtb size) to little endian */ - eor r1, r5, r5, ror #16 - bic r1, r1, #0x00ff0000 - mov r5, r5, ror #8 - eor r5, r5, r1, lsr #8 -#endif + be32tocpu r5, r1 /* preserve 64-bit alignment */ add r5, r5, #7 -- cgit v1.2.3 From 1ecec38547d415054fdb63a231234f44396b6d06 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Dec 2020 10:37:14 +0100 Subject: ARM: 9036/1: uncompress: Fix dbgadtb size parameter name The dbgadtb macro is passed the size of the appended DTB, not the end address. Fixes: c03e41470e901123 ("ARM: 9010/1: uncompress: Print the location of appended DTB") Signed-off-by: Geert Uytterhoeven Reviewed-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 14d1995106c2..4fb699769e6a 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -116,7 +116,7 @@ /* * Debug print of the final appended DTB location */ - .macro dbgadtb, begin, end + .macro dbgadtb, begin, size #ifdef DEBUG kputc #'D' kputc #'T' @@ -129,7 +129,7 @@ kputc #'(' kputc #'0' kputc #'x' - kphex \end, 8 /* End of appended DTB */ + kphex \size, 8 /* Size of appended DTB */ kputc #')' kputc #'\n' #endif -- cgit v1.2.3 From 551b39efc6ffdc7a881122fbac0caa2a27a464d8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 4 Dec 2020 10:39:10 +0100 Subject: ARM: 9037/1: uncompress: Add OF_DT_MAGIC macro The DTB magic marker is stored as a 32-bit big-endian value, and thus depends on the CPU's endianness. Add a macro to define this value in native endianness, to reduce #ifdef clutter and (future) duplication. Signed-off-by: Geert Uytterhoeven Reviewed-by: Ard Biesheuvel Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/boot/compressed/head.S | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 4fb699769e6a..8d546cb10921 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -11,6 +11,12 @@ #include "efi-header.S" +#ifdef __ARMEB__ +#define OF_DT_MAGIC 0xd00dfeed +#else +#define OF_DT_MAGIC 0xedfe0dd0 +#endif + AR_CLASS( .arch armv7-a ) M_CLASS( .arch armv7-m ) @@ -335,11 +341,7 @@ restart: adr r0, LC1 */ ldr lr, [r6, #0] -#ifndef __ARMEB__ - ldr r1, =0xedfe0dd0 @ sig is 0xd00dfeed big endian -#else - ldr r1, =0xd00dfeed -#endif + ldr r1, =OF_DT_MAGIC cmp lr, r1 bne dtb_check_done @ not found -- cgit v1.2.3 From 0cda9bc15dfc459bd178d6ba93389df52dd57957 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Tue, 8 Dec 2020 01:34:16 +0100 Subject: ARM: 9038/1: Link with '-z norelro' When linking a multi_v7_defconfig + CONFIG_KASAN=y kernel with LD=ld.lld, the following error occurs: $ make ARCH=arm CROSS_COMPILE=arm-linux-gnueabi- LLVM=1 zImage ld.lld: error: section: .exit.data is not contiguous with other relro sections LLD defaults to '-z relro', which is unneeded for the kernel because program headers are not used nor is there any position independent code generation or linking for ARM. Add '-z norelro' to LDFLAGS_vmlinux to avoid this error. Link: https://github.com/ClangBuiltLinux/linux/issues/1189 Reviewed-by: Nick Desaulniers Tested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Russell King --- arch/arm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm') diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 4d76eab2b22d..3c0a64cefe52 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -10,7 +10,7 @@ # # Copyright (C) 1995-2001 by Russell King -LDFLAGS_vmlinux := --no-undefined -X --pic-veneer +LDFLAGS_vmlinux := --no-undefined -X --pic-veneer -z norelro ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 KBUILD_LDFLAGS_MODULE += --be8 -- cgit v1.2.3 From 10fce53c0ef8f6e79115c3d9e0d7ea1338c3fa37 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 17 Nov 2020 08:41:01 +0100 Subject: ARM: 9027/1: head.S: explicitly map DT even if it lives in the first physical section The early ATAGS/DT mapping code uses SECTION_SHIFT to mask low order bits of R2, and decides that no ATAGS/DTB were provided if the resulting value is 0x0. This means that on systems where DRAM starts at 0x0 (such as Raspberry Pi), no explicit mapping of the DT will be created if R2 points into the first 1 MB section of memory. This was not a problem before, because the decompressed kernel is loaded at the base of DRAM and mapped using sections as well, and so as long as the DT is referenced via a virtual address that uses the same translation (the linear map, in this case), things work fine. However, commit 7a1be318f579 ("9012/1: move device tree mapping out of linear region") changes this, and now the DT is referenced via a virtual address that is disjoint from the linear mapping of DRAM, and so we need the early code to create the DT mapping unconditionally. So let's create the early DT mapping for any value of R2 != 0x0. Reported-by: "kernelci.org bot" Reviewed-by: Linus Walleij Signed-off-by: Ard Biesheuvel Signed-off-by: Russell King --- arch/arm/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm') diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 9b18d8c66129..26ffd462a975 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -274,10 +274,10 @@ __create_page_tables: * We map 2 sections in case the ATAGs/DTB crosses a section boundary. */ mov r0, r2, lsr #SECTION_SHIFT - movs r0, r0, lsl #SECTION_SHIFT + cmp r2, #0 ldrne r3, =FDT_FIXED_BASE >> (SECTION_SHIFT - PMD_ORDER) addne r3, r3, r4 - orrne r6, r7, r0 + orrne r6, r7, r0, lsl #SECTION_SHIFT strne r6, [r3], #1 << PMD_ORDER addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] -- cgit v1.2.3