From 48f99c8ec0b25756d0283ab058826ae07d14fad7 Mon Sep 17 00:00:00 2001 From: Dong Bo Date: Tue, 25 Apr 2017 14:11:29 +0800 Subject: arm64: Preventing READ_IMPLIES_EXEC propagation Like arch/arm/, we inherit the READ_IMPLIES_EXEC personality flag across fork(). This is undesirable for a number of reasons: * ELF files that don't require executable stack can end up with it anyway * We end up performing un-necessary I-cache maintenance when mapping what should be non-executable pages * Restricting what is executable is generally desirable when defending against overflow attacks This patch clears the personality flag when setting up the personality for newly spwaned native tasks. Given that semi-recent AArch64 toolchains emit a non-executable PT_GNU_STACK header, userspace applications can already not rely on READ_IMPLIES_EXEC so shouldn't be adversely affected by this change. Cc: Reported-by: Peter Maydell Signed-off-by: Dong Bo [will: added comment to compat code, rewrote commit message] Signed-off-by: Will Deacon --- arch/arm64/include/asm/elf.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index 5d1700425efe..ac3fb7441510 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -142,6 +142,7 @@ typedef struct user_fpsimd_state elf_fpregset_t; ({ \ clear_bit(TIF_32BIT, ¤t->mm->context.flags); \ clear_thread_flag(TIF_32BIT); \ + current->personality &= ~READ_IMPLIES_EXEC; \ }) /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ @@ -187,6 +188,11 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG]; ((x)->e_flags & EF_ARM_EABI_MASK)) #define compat_start_thread compat_start_thread +/* + * Unlike the native SET_PERSONALITY macro, the compat version inherits + * READ_IMPLIES_EXEC across a fork() since this is the behaviour on + * arch/arm/. + */ #define COMPAT_SET_PERSONALITY(ex) \ ({ \ set_bit(TIF_32BIT, ¤t->mm->context.flags); \ -- cgit v1.2.3 From c07ab957d9af8092fc3caf3db5a0fb49098fdc65 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 9 May 2017 09:53:36 +0800 Subject: arm64: Call __show_regs directly Generic code expects show_regs() to also dump the stack, but arm64's show_reg() does not do this. Some arm64 callers of show_regs() *only* want the registers dumped, without the stack. To enable generic code to work as expected, we need to make show_regs() dump the stack. Where we only want the registers dumped, we must use __show_regs(). This patch updates code to use __show_regs() where only registers are desired. A subsequent patch will modify show_regs(). Acked-by: Mark Rutland Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/kernel/probes/kprobes.c | 4 ++-- arch/arm64/mm/fault.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c5c45942fb6e..d849d9804011 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -522,9 +522,9 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) pr_err("current sp %lx does not match saved sp %lx\n", orig_sp, stack_addr); pr_err("Saved registers for jprobe %p\n", jp); - show_regs(saved_regs); + __show_regs(saved_regs); pr_err("Current registers\n"); - show_regs(regs); + __show_regs(regs); BUG(); } unpause_graph_tracing(); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 37b95dff0b07..c3f2b1048f83 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -250,7 +250,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); show_pte(tsk->mm, addr); - show_regs(regs); + __show_regs(regs); } tsk->thread.fault_address = addr; -- cgit v1.2.3 From 1149aad10b1e2f2cf1ea023889ac8604ae869c5a Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 9 May 2017 09:53:37 +0800 Subject: arm64: Add dump_backtrace() in show_regs Generic code expects show_regs() to dump the stack, but arm64's show_regs() does not. This makes it hard to debug softlockups and other issues that result in show_regs() being called. This patch updates arm64's show_regs() to dump the stack, as common code expects. Acked-by: Mark Rutland Signed-off-by: Kefeng Wang [will: folded in bug_handler fix from mrutland] Signed-off-by: Will Deacon --- arch/arm64/include/asm/stacktrace.h | 1 + arch/arm64/kernel/process.c | 1 + arch/arm64/kernel/traps.c | 4 +--- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 801a16dbbdf6..5b6eafccc5d8 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -30,5 +30,6 @@ struct stackframe { extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame); extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame, int (*fn)(struct stackframe *, void *), void *data); +extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); #endif /* __ASM_STACKTRACE_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ae2a835898d7..af1ea258c212 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -210,6 +210,7 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { __show_regs(regs); + dump_backtrace(regs, NULL); } static void tls_thread_flush(void) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 0805b44f986a..3ebfb1d00b53 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -140,7 +140,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) } } -static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) +void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) { struct stackframe frame; unsigned long irq_stack_ptr; @@ -728,8 +728,6 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr) break; case BUG_TRAP_TYPE_WARN: - /* Ideally, report_bug() should backtrace for us... but no. */ - dump_backtrace(regs, NULL); break; default: -- cgit v1.2.3 From 6efd8499d9bda657fc82621b24d6122a01332c19 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 15 May 2017 13:40:20 +0200 Subject: arm64: mm: explicity include linux/vmalloc.h arm64's mm/mmu.c uses vm_area_add_early, struct vm_area and other definitions but relies on implict inclusion of linux/vmalloc.h which means that changes in other headers could break the build. Thus, add an explicit include. Acked-by: Ard Biesheuvel Signed-off-by: Tobias Klauser Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c429ec6fde8..23c2d89a362e 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 3fde2999fac549024a56353966844ac7633b74ae Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 18 May 2017 16:10:34 +0100 Subject: arm64: cpufeature: Don't dump useless backtrace on CPU_OUT_OF_SPEC Unfortunately, it turns out that mismatched CPU features in big.LITTLE systems are starting to appear in the wild. Whilst we should continue to taint the kernel with CPU_OUT_OF_SPEC for features that differ in ways that we can't fix up, dumping a useless backtrace out of the cpufeature code is pointless and irritating. This patch removes the backtrace from the taint. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 817ce3365e20..22f554320581 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -639,8 +639,8 @@ void update_cpu_features(int cpu, * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ - WARN_TAINT_ONCE(taint, TAINT_CPU_OUT_OF_SPEC, - "Unsupported CPU feature variation.\n"); + pr_warn_once("Unsupported CPU feature variation detected.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); } u64 read_sanitised_ftr_reg(u32 id) -- cgit v1.2.3 From 690e95dd4d67be2eb905e1480b692c84e612a71a Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Tue, 16 May 2017 15:36:16 +0800 Subject: arm64: check return value of of_flat_dt_get_machine_name It's useless to print machine name and setup arch-specific system identifiers if of_flat_dt_get_machine_name() return NULL, especially when ACPI-based boot. Reviewed-by: Geert Uytterhoeven Signed-off-by: Kefeng Wang Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 2c822ef94f34..d4b740538ad5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -194,6 +194,9 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) } name = of_flat_dt_get_machine_name(); + if (!name) + return; + pr_info("Machine model: %s\n", name); dump_stack_set_arch_desc("%s (DT)", name); } -- cgit v1.2.3 From 5f16a046f8e144c294ef98cd29d9458b5f8273e5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Apr 2017 11:14:05 +0100 Subject: arm64: futex: Fix undefined behaviour with FUTEX_OP_OPARG_SHIFT usage FUTEX_OP_OPARG_SHIFT instructs the futex code to treat the 12-bit oparg field as a shift value, potentially leading to a left shift value that is negative or with an absolute value that is significantly larger then the size of the type. UBSAN chokes with: ================================================================================ UBSAN: Undefined behaviour in ./arch/arm64/include/asm/futex.h:60:13 shift exponent -1 is negative CPU: 1 PID: 1449 Comm: syz-executor0 Not tainted 4.11.0-rc4-00005-g977eb52-dirty #11 Hardware name: linux,dummy-virt (DT) Call trace: [] dump_backtrace+0x0/0x538 arch/arm64/kernel/traps.c:73 [] show_stack+0x20/0x30 arch/arm64/kernel/traps.c:228 [] __dump_stack lib/dump_stack.c:16 [inline] [] dump_stack+0x120/0x188 lib/dump_stack.c:52 [] ubsan_epilogue+0x18/0x98 lib/ubsan.c:164 [] __ubsan_handle_shift_out_of_bounds+0x250/0x294 lib/ubsan.c:421 [] futex_atomic_op_inuser arch/arm64/include/asm/futex.h:60 [inline] [] futex_wake_op kernel/futex.c:1489 [inline] [] do_futex+0x137c/0x1740 kernel/futex.c:3231 [] SYSC_futex kernel/futex.c:3281 [inline] [] SyS_futex+0x114/0x268 kernel/futex.c:3249 [] el0_svc_naked+0x24/0x28 ================================================================================ syz-executor1 uses obsolete (PF_INET,SOCK_PACKET) sock: process `syz-executor0' is using obsolete setsockopt SO_BSDCOMPAT This patch attempts to fix some of this by: * Making encoded_op an unsigned type, so we can shift it left even if the top bit is set. * Casting to signed prior to shifting right when extracting oparg and cmparg * Consider only the bottom 5 bits of oparg when using it as a left-shift value. Whilst I think this catches all of the issues, I'd much prefer to remove this stuff, as I think it's unused and the bugs are copy-pasted between a bunch of architectures. Reviewed-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/include/asm/futex.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 85c4a8981d47..f32b42e8725d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -48,16 +48,16 @@ do { \ } while (0) static inline int -futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) +futex_atomic_op_inuser(unsigned int encoded_op, u32 __user *uaddr) { int op = (encoded_op >> 28) & 7; int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; + int oparg = (int)(encoded_op << 8) >> 20; + int cmparg = (int)(encoded_op << 20) >> 20; int oldval = 0, ret, tmp; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; + oparg = 1U << (oparg & 0x1f); if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) return -EFAULT; -- cgit v1.2.3 From db46a72b9713fd20c405e796d7ef841f6d9bd15f Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 24 May 2017 18:22:19 +0100 Subject: ARM64/PCI: Set root bus NUMA node on ACPI systems PCI core requires the NUMA node for the struct pci_host_bridge.dev to be set by using the pcibus_to_node(struct pci_bus*) API, that on ARM64 systems relies on the struct pci_host_bridge->bus.dev NUMA node. The struct pci_host_bridge.dev NUMA node is then propagated through the PCI device hierarchy as PCI devices (and bridges) are enumerated under it. Therefore, in order to set-up the PCI NUMA hierarchy appropriately, the struct pci_host_bridge->bus.dev NUMA node must be set before core code calls pcibus_to_node(struct pci_bus*) on it so that PCI core can retrieve the NUMA node for the struct pci_host_bridge.dev device and can propagate it through the PCI bus tree. On ARM64 ACPI based systems the struct pci_host_bridge->bus.dev NUMA node can be set-up in pcibios_root_bridge_prepare() by parsing the root bridge ACPI device firmware binding. Add code to the pcibios_root_bridge_prepare() that, when booting with ACPI, parse the root bridge ACPI device companion NUMA binding and set the corresponding struct pci_host_bridge->bus.dev NUMA node appropriately. Cc: Bjorn Helgaas Cc: Catalin Marinas Reviewed-by: Robert Richter Tested-by: Robert Richter Signed-off-by: Lorenzo Pieralisi Signed-off-by: Will Deacon --- arch/arm64/kernel/pci.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4f0e3ebfea4b..108283443336 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -108,7 +108,10 @@ int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) if (!acpi_disabled) { struct pci_config_window *cfg = bridge->bus->sysdata; struct acpi_device *adev = to_acpi_device(cfg->parent); + struct device *bus_dev = &bridge->bus->dev; + ACPI_COMPANION_SET(&bridge->dev, adev); + set_dev_node(bus_dev, acpi_get_node(acpi_device_handle(adev))); } return 0; -- cgit v1.2.3 From fe7296e19221c6dc125a06b52e28ccbdb76d9b58 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Wed, 24 May 2017 15:43:18 +0800 Subject: arm64: perf: Extend event config for ARMv8.1 Perf has supported ARMv8.1 feature with 16-bit evtCount filed [see c210ae8 arm64: perf: Extend event mask for ARMv8.1], event config should be extended to 16-bit too, otherwise, if use -e event_name whose event_code is more than 0x3ff, pmu_config_term will return -EINVAL because function pmu_format_max_value depends on event config. This patch extends event config to 16-bit. Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 83a1b1ad189f..b5798ba21189 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -529,7 +529,7 @@ static struct attribute_group armv8_pmuv3_events_attr_group = { .is_visible = armv8pmu_event_attr_is_visible, }; -PMU_FORMAT_ATTR(event, "config:0-9"); +PMU_FORMAT_ATTR(event, "config:0-15"); static struct attribute *armv8_pmuv3_format_attrs[] = { &format_attr_event.attr, -- cgit v1.2.3 From 1151f838cb626005f4d69bf675dacaaa5ea909d6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 19 May 2017 16:42:00 +0100 Subject: arm64: kernel: restrict /dev/mem read() calls to linear region When running lscpu on an AArch64 system that has SMBIOS version 2.0 tables, it will segfault in the following way: Unable to handle kernel paging request at virtual address ffff8000bfff0000 pgd = ffff8000f9615000 [ffff8000bfff0000] *pgd=0000000000000000 Internal error: Oops: 96000007 [#1] PREEMPT SMP Modules linked in: CPU: 0 PID: 1284 Comm: lscpu Not tainted 4.11.0-rc3+ #103 Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 task: ffff8000fa78e800 task.stack: ffff8000f9780000 PC is at __arch_copy_to_user+0x90/0x220 LR is at read_mem+0xcc/0x140 This is caused by the fact that lspci issues a read() on /dev/mem at the offset where it expects to find the SMBIOS structure array. However, this region is classified as EFI_RUNTIME_SERVICE_DATA (as per the UEFI spec), and so it is omitted from the linear mapping. So let's restrict /dev/mem read/write access to those areas that are covered by the linear region. Reported-by: Alexander Graf Fixes: 4dffbfc48d65 ("arm64/efi: mark UEFI reserved regions as MEMBLOCK_NOMAP") Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/mm/mmap.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 7b0d55756eb1..adc208c2ae9c 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -103,12 +104,18 @@ void arch_pick_mmap_layout(struct mm_struct *mm) */ int valid_phys_addr_range(phys_addr_t addr, size_t size) { - if (addr < PHYS_OFFSET) - return 0; - if (addr + size > __pa(high_memory - 1) + 1) - return 0; - - return 1; + /* + * Check whether addr is covered by a memory region without the + * MEMBLOCK_NOMAP attribute, and whether that region covers the + * entire range. In theory, this could lead to false negatives + * if the range is covered by distinct but adjacent memory regions + * that only differ in other attributes. However, few of such + * attributes have been defined, and it is debatable whether it + * follows that /dev/mem read() calls should be able traverse + * such boundaries. + */ + return memblock_is_region_memory(addr, size) && + memblock_is_map_memory(addr); } /* -- cgit v1.2.3 From 8dd0ee651d8aefdc2d8ae0fcc9c68dfc943c9e4c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Jun 2017 11:40:23 +0100 Subject: arm64: cpufeature: Fix CPU_OUT_OF_SPEC taint for uniform systems Commit 3fde2999fac5 ("arm64: cpufeature: Don't dump useless backtrace on CPU_OUT_OF_SPEC") changed the cpufeature detection code to use add_taint instead of WARN_TAINT_ONCE when detecting a heterogeneous system with mismatched feature support. Unfortunately, this resulted in all systems getting the taint, regardless of any feature mismatch. This patch fixes the problem by conditionalising the taint on detecting a feature mismatch. Acked-by: Mark Rutland Reported-by: Heiner Kallweit Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 22f554320581..55d5c72a507d 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -639,8 +639,10 @@ void update_cpu_features(int cpu, * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ - pr_warn_once("Unsupported CPU feature variation detected.\n"); - add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + if (taint) { + pr_warn_once("Unsupported CPU feature variation detected.\n"); + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + } } u64 read_sanitised_ftr_reg(u32 id) -- cgit v1.2.3 From dbbb08f500d6146398b794fdc68a8e811366b451 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 5 Jun 2017 21:52:30 -0700 Subject: arm64, vdso: Define vdso_{start,end} as array Adjust vdso_{start|end} to be char arrays to avoid compile-time analysis that flags "too large" memcmp() calls with CONFIG_FORTIFY_SOURCE. Cc: Jisheng Zhang Acked-by: Catalin Marinas Suggested-by: Mark Rutland Signed-off-by: Kees Cook Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31f8f55..ae35f1855e06 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -37,7 +37,7 @@ #include #include -extern char vdso_start, vdso_end; +extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; /* @@ -125,14 +125,14 @@ static int __init vdso_init(void) struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } - vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -145,7 +145,7 @@ static int __init vdso_init(void) /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(&vdso_start); + pfn = sym_to_pfn(vdso_start); for (i = 0; i < vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(pfn + i); -- cgit v1.2.3 From f8af0b364e249eef0c71200826563947cd74267e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Jun 2017 17:00:21 +0000 Subject: arm64: ftrace: don't validate branch via PLT in ftrace_make_nop() When turning branch instructions into NOPs, we attempt to validate the action by comparing the old value at the call site with the opcode of a direct relative branch instruction pointing at the old target. However, these call sites are statically initialized to call _mcount(), and may be redirected via a PLT entry if the module is loaded far away from the kernel text, leading to false negatives and spurious errors. So skip the validation if CONFIG_ARM64_MODULE_PLTS is configured. Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/ftrace.c | 46 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 40ad08ac569a..4cb576374b82 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -84,12 +84,52 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - u32 old, new; + long offset = (long)pc - (long)addr; + bool validate = true; + u32 old = 0, new; + + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + (offset < -SZ_128M || offset >= SZ_128M)) { + u32 replaced; + + /* + * 'mod' is only set at module load time, but if we end up + * dealing with an out-of-range condition, we can assume it + * is due to a module being loaded far away from the kernel. + */ + if (!mod) { + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + } + + /* + * The instruction we are about to patch may be a branch and + * link instruction that was redirected via a PLT entry. In + * this case, the normal validation will fail, but we can at + * least check that we are dealing with a branch and link + * instruction that points into the right module. + */ + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (!aarch64_insn_is_bl(replaced) || + !within_module(pc + aarch64_get_branch_offset(replaced), + mod)) + return -EINVAL; + + validate = false; + } else { + old = aarch64_insn_gen_branch_imm(pc, addr, + AARCH64_INSN_BRANCH_LINK); + } - old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); new = aarch64_insn_gen_nop(); - return ftrace_modify_code(pc, old, new, true); + return ftrace_modify_code(pc, old, new, validate); } void arch_ftrace_update_code(int command) -- cgit v1.2.3 From e71a4e1bebaf7fd990efbdc04b38e5526914f0f1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 6 Jun 2017 17:00:22 +0000 Subject: arm64: ftrace: add support for far branches to dynamic ftrace Currently, dynamic ftrace support in the arm64 kernel assumes that all core kernel code is within range of ordinary branch instructions that occur in module code, which is usually the case, but is no longer guaranteed now that we have support for module PLTs and address space randomization. Since on arm64, all patching of branch instructions involves function calls to the same entry point [ftrace_caller()], we can emit the modules with a trampoline that has unlimited range, and patch both the trampoline itself and the branch instruction to redirect the call via the trampoline. Signed-off-by: Ard Biesheuvel [will: minor clarification to smp_wmb() comment] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 +- arch/arm64/Makefile | 3 +++ arch/arm64/include/asm/module.h | 3 +++ arch/arm64/kernel/Makefile | 3 +++ arch/arm64/kernel/ftrace-mod.S | 18 +++++++++++++++ arch/arm64/kernel/ftrace.c | 51 +++++++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/module.c | 6 ++++- 7 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kernel/ftrace-mod.S (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..22f769b254b4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -982,7 +982,7 @@ config RANDOMIZE_BASE config RANDOMIZE_MODULE_REGION_FULL bool "Randomize the module region independently from the core kernel" - depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE + depends on RANDOMIZE_BASE default y help Randomizes the location of the module region without considering the diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index f839ecd919f9..1ce57b42f390 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -70,6 +70,9 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds +ifeq ($(CONFIG_DYNAMIC_FTRACE),y) +KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o +endif endif # Default value diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index d57693f5d4ec..19bd97671bb8 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -30,6 +30,9 @@ struct mod_plt_sec { struct mod_arch_specific { struct mod_plt_sec core; struct mod_plt_sec init; + + /* for CONFIG_DYNAMIC_FTRACE */ + void *ftrace_trampoline; }; #endif diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 1dcb69d3d0e5..f2b4e816b6de 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -62,3 +62,6 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif + +# will be included by each individual module but not by the core kernel itself +extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S new file mode 100644 index 000000000000..00c4025be4ff --- /dev/null +++ b/arch/arm64/kernel/ftrace-mod.S @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2017 Linaro Ltd + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + + .section ".text.ftrace_trampoline", "ax" + .align 3 +0: .quad 0 +__ftrace_trampoline: + ldr x16, 0b + br x16 +ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 4cb576374b82..8a42be0693c9 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -10,10 +10,12 @@ */ #include +#include #include #include #include +#include #include #include @@ -69,8 +71,57 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; + long offset = (long)pc - (long)addr; u32 old, new; + if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && + (offset < -SZ_128M || offset >= SZ_128M)) { + unsigned long *trampoline; + struct module *mod; + + /* + * On kernels that support module PLTs, the offset between the + * branch instruction and its target may legally exceed the + * range of an ordinary relative 'bl' opcode. In this case, we + * need to branch via a trampoline in the module. + * + * NOTE: __module_text_address() must be called with preemption + * disabled, but we can rely on ftrace_lock to ensure that 'mod' + * retains its validity throughout the remainder of this code. + */ + preempt_disable(); + mod = __module_text_address(pc); + preempt_enable(); + + if (WARN_ON(!mod)) + return -EINVAL; + + /* + * There is only one ftrace trampoline per module. For now, + * this is not a problem since on arm64, all dynamic ftrace + * invocations are routed via ftrace_caller(). This will need + * to be revisited if support for multiple ftrace entry points + * is added in the future, but for now, the pr_err() below + * deals with a theoretical issue only. + */ + trampoline = (unsigned long *)mod->arch.ftrace_trampoline; + if (trampoline[0] != addr) { + if (trampoline[0] != 0) { + pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); + return -EINVAL; + } + + /* point the trampoline to our ftrace entry point */ + module_disable_ro(mod); + trampoline[0] = addr; + module_enable_ro(mod, true); + + /* update trampoline before patching in the branch */ + smp_wmb(); + } + addr = (unsigned long)&trampoline[1]; + } + old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f035ff6fb223..8c3a7264fb0f 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -420,8 +420,12 @@ int module_finalize(const Elf_Ehdr *hdr, for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) { apply_alternatives((void *)s->sh_addr, s->sh_size); - return 0; } +#ifdef CONFIG_ARM64_MODULE_PLTS + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) + me->arch.ftrace_trampoline = (void *)s->sh_addr; +#endif } return 0; -- cgit v1.2.3 From 67ce16ec15ce9d97d3d85e72beabbc5d7017193e Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Fri, 9 Jun 2017 16:35:52 +0100 Subject: arm64: mm: print out correct page table entries When we take a fault that can't be handled, we print out the page table entries associated with the faulting address. In some cases we currently print out the wrong entries. For a faulting TTBR1 address, we sometimes print out TTBR0 table entries instead, and for a faulting TTBR0 address we sometimes print out TTBR1 table entries. Fix this by choosing the tables based on the faulting address. Acked-by: Mark Rutland Signed-off-by: Kristina Martsenko [will: zero-extend addrs to 64-bit, don't walk swapper w/ TTBR0 addr] Signed-off-by: Will Deacon --- arch/arm64/include/asm/system_misc.h | 2 +- arch/arm64/mm/fault.c | 36 +++++++++++++++++++++++++----------- 2 files changed, 26 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index bc812435bc76..d0beefeb6d25 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -40,7 +40,7 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int, int sig, int code, const char *name); struct mm_struct; -extern void show_pte(struct mm_struct *mm, unsigned long addr); +extern void show_pte(unsigned long addr); extern void __show_regs(struct pt_regs *); extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c3f2b1048f83..9d27b1720c52 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -80,18 +80,33 @@ static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr) #endif /* - * Dump out the page tables associated with 'addr' in mm 'mm'. + * Dump out the page tables associated with 'addr' in the currently active mm. */ -void show_pte(struct mm_struct *mm, unsigned long addr) +void show_pte(unsigned long addr) { + struct mm_struct *mm; pgd_t *pgd; - if (!mm) + if (addr < TASK_SIZE) { + /* TTBR0 */ + mm = current->active_mm; + if (mm == &init_mm) { + pr_alert("[%016lx] user address but active_mm is swapper\n", + addr); + return; + } + } else if (addr >= VA_START) { + /* TTBR1 */ mm = &init_mm; + } else { + pr_alert("[%016lx] address between user and kernel address ranges\n", + addr); + return; + } pr_alert("pgd = %p\n", mm->pgd); pgd = pgd_offset(mm, addr); - pr_alert("[%08lx] *pgd=%016llx", addr, pgd_val(*pgd)); + pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); do { pud_t *pud; @@ -196,8 +211,8 @@ static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, /* * The kernel tried to access some page that wasn't present. */ -static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, - unsigned int esr, struct pt_regs *regs) +static void __do_kernel_fault(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { const char *msg; @@ -227,7 +242,7 @@ static void __do_kernel_fault(struct mm_struct *mm, unsigned long addr, pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, addr); - show_pte(mm, addr); + show_pte(addr); die("Oops", regs, esr); bust_spinlocks(0); do_exit(SIGKILL); @@ -249,7 +264,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); - show_pte(tsk->mm, addr); + show_pte(addr); __show_regs(regs); } @@ -265,7 +280,6 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct task_struct *tsk = current; - struct mm_struct *mm = tsk->active_mm; const struct fault_info *inf; /* @@ -276,7 +290,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re inf = esr_to_fault_info(esr); __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); } else - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); } #define VM_FAULT_BADMAP 0x010000 @@ -475,7 +489,7 @@ retry: return 0; no_context: - __do_kernel_fault(mm, addr, esr, regs); + __do_kernel_fault(addr, esr, regs); return 0; } -- cgit v1.2.3 From bf396c09c2447a787d02af34cf167e953f85fa42 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Fri, 9 Jun 2017 16:35:53 +0100 Subject: arm64: mm: don't print out page table entries on EL0 faults When we take a fault from EL0 that can't be handled, we print out the page table entries associated with the faulting address. This allows userspace to print out any current page table entries, including kernel (TTBR1) entries. Exposing kernel mappings like this could pose a security risk, so don't print out page table information on EL0 faults. (But still print it out for EL1 faults.) This also follows the same behaviour as x86, printing out page table entries on kernel mode faults but not user mode faults. Acked-by: Mark Rutland Signed-off-by: Kristina Martsenko Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 9d27b1720c52..b5a1605398b7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -264,7 +264,6 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); - show_pte(addr); __show_regs(regs); } -- cgit v1.2.3 From 83016b204225d69516de3d57489783fafd6a0ecc Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Fri, 9 Jun 2017 16:35:54 +0100 Subject: arm64: mm: print file name of faulting vma Print out the name of the file associated with the vma that faulted. This is usually the executable or shared library name. We already print out the task name, but also printing the library name is useful for pinpointing bugs to libraries. Also print the base address and size of the vma, which together with the PC (printed by __show_regs) gives the offset into the library. Fault prints now look like: test[2361]: unhandled level 2 translation fault (11) at 0x00000012, esr 0x92000006, in libfoo.so[ffffa0145000+1000] This is already done on x86, for more details see commit 03252919b798 ("x86: print which shared library/executable faulted in segfault etc. messages v3"). Acked-by: Mark Rutland Signed-off-by: Kristina Martsenko Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index b5a1605398b7..ad7c9c94d379 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -261,9 +261,11 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { inf = esr_to_fault_info(esr); - pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x\n", + pr_info("%s[%d]: unhandled %s (%d) at 0x%08lx, esr 0x%03x", tsk->comm, task_pid_nr(tsk), inf->name, sig, addr, esr); + print_vma_addr(KERN_CONT ", in ", regs->pc); + pr_cont("\n"); __show_regs(regs); } -- cgit v1.2.3 From 1eb34b6e5160f20e1889b2551182bf4d61084d6b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 May 2017 15:23:58 +0100 Subject: arm64: fault: Print info about page table structure when dumping pte Whilst debugging a remote crash, I noticed that show_pte is unhelpful when it comes to describing the structure of the page table being walked. This is easily fixed by printing out the page table (swapper vs user), page size and virtual address size when displaying the PGD address. Acked-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ad7c9c94d379..222427ae23d6 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -104,7 +104,9 @@ void show_pte(unsigned long addr) return; } - pr_alert("pgd = %p\n", mm->pgd); + pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgd = %p\n", + mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, + VA_BITS, mm->pgd); pgd = pgd_offset(mm, addr); pr_alert("[%016lx] *pgd=%016llx", addr, pgd_val(*pgd)); -- cgit v1.2.3 From 687644209a6e95576ea453977b26dbd6248cadda Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 12 Jun 2017 14:43:25 +0100 Subject: arm64: ftrace: fix building without CONFIG_MODULES When CONFIG_MODULES is disabled, we cannot dereference a module pointer: arch/arm64/kernel/ftrace.c: In function 'ftrace_make_call': arch/arm64/kernel/ftrace.c:107:36: error: dereferencing pointer to incomplete type 'struct module' trampoline = (unsigned long *)mod->arch.ftrace_trampoline; Also, the within_module() function is not defined: arch/arm64/kernel/ftrace.c: In function 'ftrace_make_nop': arch/arm64/kernel/ftrace.c:171:8: error: implicit declaration of function 'within_module'; did you mean 'init_module'? [-Werror=implicit-function-declaration] This addresses both by adding replacing the IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) checks with #ifdef versions. Fixes: e71a4e1bebaf ("arm64: ftrace: add support for far branches to dynamic ftrace") Reported-by: Arnd Bergmann Acked-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/ftrace.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 8a42be0693c9..401aa27808a4 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -71,11 +71,12 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - long offset = (long)pc - (long)addr; u32 old, new; - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (offset < -SZ_128M || offset >= SZ_128M)) { +#ifdef CONFIG_ARM64_MODULE_PLTS + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { unsigned long *trampoline; struct module *mod; @@ -121,6 +122,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } addr = (unsigned long)&trampoline[1]; } +#endif /* CONFIG_ARM64_MODULE_PLTS */ old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -135,12 +137,13 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; - long offset = (long)pc - (long)addr; bool validate = true; u32 old = 0, new; - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (offset < -SZ_128M || offset >= SZ_128M)) { +#ifdef CONFIG_ARM64_MODULE_PLTS + long offset = (long)pc - (long)addr; + + if (offset < -SZ_128M || offset >= SZ_128M) { u32 replaced; /* @@ -177,6 +180,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); } +#endif /* CONFIG_ARM64_MODULE_PLTS */ new = aarch64_insn_gen_nop(); -- cgit v1.2.3 From f02ab08afbe76ee7b0b2a34a9970e7dd200d8b01 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 8 Jun 2017 18:25:26 +0100 Subject: arm64: hugetlb: Fix huge_pte_offset to return poisoned page table entries When memory failure is enabled, a poisoned hugepage pte is marked as a swap entry. huge_pte_offset() does not return the poisoned page table entries when it encounters PUD/PMD hugepages. This behaviour of huge_pte_offset() leads to error such as below when munmap is called on poisoned hugepages. [ 344.165544] mm/pgtable-generic.c:33: bad pmd 000000083af00074. Fix huge_pte_offset() to return the poisoned pte which is then appropriately handled by the generic layer code. Signed-off-by: Punit Agrawal Acked-by: Steve Capper Reviewed-by: Catalin Marinas Cc: David Woods Tested-by: Manoj Iyer Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- arch/arm64/mm/hugetlbpage.c | 29 ++++++++++------------------- 2 files changed, 11 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c213fdbd056c..6eae342ced6b 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -441,7 +441,7 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd) #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) -#define pud_present(pud) (pud_val(pud)) +#define pud_present(pud) pte_present(pud_pte(pud)) static inline void set_pud(pud_t *pudp, pud_t pud) { diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 7514a000e361..69b8200b1cfd 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -136,36 +136,27 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd = NULL; - pte_t *pte = NULL; + pmd_t *pmd; pgd = pgd_offset(mm, addr); pr_debug("%s: addr:0x%lx pgd:%p\n", __func__, addr, pgd); if (!pgd_present(*pgd)) return NULL; + pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) + if (pud_none(*pud)) return NULL; - - if (pud_huge(*pud)) + /* swap or huge page */ + if (!pud_present(*pud) || pud_huge(*pud)) return (pte_t *)pud; + /* table; check the next level */ + pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) + if (pmd_none(*pmd)) return NULL; - - if (pte_cont(pmd_pte(*pmd))) { - pmd = pmd_offset( - pud, (addr & CONT_PMD_MASK)); - return (pte_t *)pmd; - } - if (pmd_huge(*pmd)) + if (!pmd_present(*pmd) || pmd_huge(*pmd)) return (pte_t *)pmd; - pte = pte_offset_kernel(pmd, addr); - if (pte_present(*pte) && pte_cont(*pte)) { - pte = pte_offset_kernel( - pmd, (addr & CONT_PTE_MASK)); - return pte; - } + return NULL; } -- cgit v1.2.3 From e7c600f149b89e06073ab50f4f12e79828d3d2f0 Mon Sep 17 00:00:00 2001 From: "Jonathan (Zhixiong) Zhang" Date: Thu, 8 Jun 2017 18:25:27 +0100 Subject: arm64: hwpoison: add VM_FAULT_HWPOISON[_LARGE] handling Add VM_FAULT_HWPOISON[_LARGE] handling to the arm64 page fault handler. Handling of VM_FAULT_HWPOISON[_LARGE] is very similar to VM_FAULT_OOM, the only difference is that a different si_code (BUS_MCEERR_AR) is passed to user space and si_addr_lsb field is initialized. Signed-off-by: Jonathan (Zhixiong) Zhang Signed-off-by: Tyler Baicar (fix new __do_user_fault call-site) Signed-off-by: Punit Agrawal Acked-by: Steve Capper Tested-by: Manoj Iyer Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 222427ae23d6..d73e7f1fe184 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -256,10 +257,11 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, */ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, unsigned int esr, unsigned int sig, int code, - struct pt_regs *regs) + struct pt_regs *regs, int fault) { struct siginfo si; const struct fault_info *inf; + unsigned int lsb = 0; if (unhandled_signal(tsk, sig) && show_unhandled_signals_ratelimited()) { inf = esr_to_fault_info(esr); @@ -277,6 +279,17 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, si.si_errno = 0; si.si_code = code; si.si_addr = (void __user *)addr; + /* + * Either small page or large page may be poisoned. + * In other words, VM_FAULT_HWPOISON_LARGE and + * VM_FAULT_HWPOISON are mutually exclusive. + */ + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + else if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + si.si_addr_lsb = lsb; + force_sig_info(sig, &si, tsk); } @@ -291,7 +304,7 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re */ if (user_mode(regs)) { inf = esr_to_fault_info(esr); - __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs); + __do_user_fault(tsk, addr, esr, inf->sig, inf->code, regs, 0); } else __do_kernel_fault(addr, esr, regs); } @@ -478,6 +491,9 @@ retry: */ sig = SIGBUS; code = BUS_ADRERR; + } else if (fault & (VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { + sig = SIGBUS; + code = BUS_MCEERR_AR; } else { /* * Something tried to access memory that isn't in our memory @@ -488,7 +504,7 @@ retry: SEGV_ACCERR : SEGV_MAPERR; } - __do_user_fault(tsk, addr, esr, sig, code, regs); + __do_user_fault(tsk, addr, esr, sig, code, regs, fault); return 0; no_context: -- cgit v1.2.3 From 0e3a9026396cd7d0eb5777ba923a8f30a3d9db19 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Thu, 8 Jun 2017 18:25:28 +0100 Subject: arm64: mm: Update perf accounting to handle poison faults Re-organise the perf accounting for fault handling in preparation for enabling handling of hardware poison faults in subsequent commits. The change updates perf accounting to be inline with the behaviour on x86. With this update, the perf fault accounting - * Always report PERF_COUNT_SW_PAGE_FAULTS * Doesn't report anything else for VM_FAULT_ERROR (which includes hwpoison faults) * Reports PERF_COUNT_SW_PAGE_FAULTS_MAJ if it's a major fault (indicated by VM_FAULT_MAJOR) * Otherwise, reports PERF_COUNT_SW_PAGE_FAULTS_MIN Signed-off-by: Punit Agrawal Signed-off-by: Will Deacon --- arch/arm64/mm/fault.c | 68 +++++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index d73e7f1fe184..ea2ea68d1bd7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -359,7 +359,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, { struct task_struct *tsk; struct mm_struct *mm; - int fault, sig, code; + int fault, sig, code, major = 0; unsigned long vm_flags = VM_READ | VM_WRITE; unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -398,6 +398,8 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, die("Accessing user space memory outside uaccess.h routines", regs, esr); } + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -421,24 +423,42 @@ retry: } fault = __do_page_fault(mm, addr, mm_flags, vm_flags, tsk); + major |= fault & VM_FAULT_MAJOR; - /* - * If we need to retry but a fatal signal is pending, handle the - * signal first. We do not need to release the mmap_sem because it - * would already be released in __lock_page_or_retry in mm/filemap.c. - */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) - return 0; + if (fault & VM_FAULT_RETRY) { + /* + * If we need to retry but a fatal signal is pending, + * handle the signal first. We do not need to release + * the mmap_sem because it would already be released + * in __lock_page_or_retry in mm/filemap.c. + */ + if (fatal_signal_pending(current)) + return 0; + + /* + * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of + * starvation. + */ + if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { + mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; + goto retry; + } + } + up_read(&mm->mmap_sem); /* - * Major/minor page fault accounting is only done on the initial - * attempt. If we go through a retry, it is extremely likely that the - * page will be found in page cache at that point. + * Handle the "normal" (no error) case first. */ - - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); - if (mm_flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { + if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | + VM_FAULT_BADACCESS)))) { + /* + * Major/minor page fault accounting is only done + * once. If we go through a retry, it is extremely + * likely that the page will be found in page cache at + * that point. + */ + if (major) { tsk->maj_flt++; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, addr); @@ -447,25 +467,9 @@ retry: perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, addr); } - if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk of - * starvation. - */ - mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; - mm_flags |= FAULT_FLAG_TRIED; - goto retry; - } - } - - up_read(&mm->mmap_sem); - /* - * Handle the "normal" case first - VM_FAULT_MAJOR - */ - if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | - VM_FAULT_BADACCESS)))) return 0; + } /* * If we are in kernel mode at this point, we have no context to -- cgit v1.2.3 From c484f2564db12fa2b01b198ecb6ff0751a3e5e32 Mon Sep 17 00:00:00 2001 From: "Jonathan (Zhixiong) Zhang" Date: Thu, 8 Jun 2017 18:25:29 +0100 Subject: arm64: kconfig: allow support for memory failure handling Declare ARCH_SUPPORTS_MEMORY_FAILURE, as arm64 does support memory failure recovery attempt. Signed-off-by: Jonathan (Zhixiong) Zhang Signed-off-by: Tyler Baicar (Dropped changes to ACPI APEI Kconfig and updated commit log) Signed-off-by: Punit Agrawal Acked-by: Steve Capper Tested-by: Manoj Iyer Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 22f769b254b4..e7fcdc9a616d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -20,6 +20,7 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION -- cgit v1.2.3 From d5f7b828ffab28e5684913e76697e94e2442d164 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 15 Jun 2017 14:55:37 +0530 Subject: arm: perf: make of_device_ids const of_device_ids are not supposed to change at runtime. All functions working with of_device_ids provided by work with const of_device_ids. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 96b7a477a8db..8226d0b71fd3 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -552,7 +552,7 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) return 0; } -static struct of_device_id armv6_pmu_of_device_ids[] = { +static const struct of_device_id armv6_pmu_of_device_ids[] = { {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, {.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init}, {.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init}, -- cgit v1.2.3 From 577dfe16b852a90f26cf677cdc9113ec34199de6 Mon Sep 17 00:00:00 2001 From: Olav Haugan Date: Tue, 13 Jun 2017 13:56:14 -0700 Subject: arm64/dma-mapping: Remove extraneous null-pointer checks The current null-pointer check in __dma_alloc_coherent and __dma_free_coherent is not needed anymore since the __dma_alloc/__dma_free functions won't be called if !dev (dummy ops will be called instead). Reviewed-by: Catalin Marinas Signed-off-by: Olav Haugan Signed-off-by: Will Deacon --- arch/arm64/include/asm/dma-mapping.h | 2 -- arch/arm64/mm/dma-mapping.c | 9 --------- 2 files changed, 11 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 5392dbeffa45..f72779aad276 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -48,8 +48,6 @@ void arch_teardown_dma_ops(struct device *dev); /* do not use this function in a driver */ static inline bool is_device_dma_coherent(struct device *dev) { - if (!dev) - return false; return dev->archdata.dma_coherent; } diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3216e098c058..3e340b625436 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -95,11 +95,6 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, unsigned long attrs) { - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return NULL; - } - if (IS_ENABLED(CONFIG_ZONE_DMA) && dev->coherent_dma_mask <= DMA_BIT_MASK(32)) flags |= GFP_DMA; @@ -128,10 +123,6 @@ static void __dma_free_coherent(struct device *dev, size_t size, bool freed; phys_addr_t paddr = dma_to_phys(dev, dma_handle); - if (dev == NULL) { - WARN_ONCE(1, "Use an actual device structure for DMA allocation\n"); - return; - } freed = dma_release_from_contiguous(dev, phys_to_page(paddr), -- cgit v1.2.3 From c6bb8f89fa6df7a8d62bad2f66063b4b7433ea59 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 14 Jun 2017 17:37:12 +0100 Subject: ARM64/irqchip: Update ACPI_IORT symbol selection logic ACPI IORT is an ACPI addendum to describe the connection topology of devices with IOMMUs and interrupt controllers on ARM64 ACPI systems. Currently the ACPI IORT Kbuild symbol is selected whenever the Kbuild symbol ARM_GIC_V3_ITS is enabled, which in turn is selected by ARM64 Kbuild defaults. This makes the logic behind ACPI_IORT selection a bit twisted and not easy to follow. On ARM64 systems enabling ACPI the kbuild symbol ACPI_IORT should always be selected in that it is a kernel layer provided to the ARM64 arch code to parse and enable ACPI firmware bindings. Make the ACPI_IORT selection explicit in ARM64 Kbuild and remove the selection from ARM_GIC_V3_ITS entry, making the ACPI_IORT selection logic clearer to follow. Acked-by: Hanjun Guo Signed-off-by: Lorenzo Pieralisi Cc: Will Deacon Cc: Hanjun Guo Cc: Catalin Marinas Cc: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e7fcdc9a616d..79c0dec93030 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -3,6 +3,7 @@ config ARM64 select ACPI_CCA_REQUIRED if ACPI select ACPI_GENERIC_GSI if ACPI select ACPI_GTDT if ACPI + select ACPI_IORT if ACPI select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI -- cgit v1.2.3 From e27c7fa015d61c8be6a2c32b2144aad2ae6ec975 Mon Sep 17 00:00:00 2001 From: Dustin Brown Date: Tue, 13 Jun 2017 11:40:56 -0700 Subject: arm64: Export save_stack_trace_tsk() The kernel watchdog is a great debugging tool for finding tasks that consume a disproportionate amount of CPU time in contiguous chunks. One can imagine building a similar watchdog for arbitrary driver threads using save_stack_trace_tsk() and print_stack_trace(). However, this is not viable for dynamically loaded driver modules on ARM platforms because save_stack_trace_tsk() is not exported for those architectures. Export save_stack_trace_tsk() for the ARM64 architecture to align with x86 and support various debugging use cases such as arbitrary driver thread watchdog timers. Signed-off-by: Dustin Brown Signed-off-by: Will Deacon --- arch/arm64/kernel/stacktrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index feac80c22f61..09d37d66b630 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -210,6 +210,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) put_task_stack(tsk); } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); void save_stack_trace(struct stack_trace *trace) { -- cgit v1.2.3 From 8f36094802e4e6de180b36bcac4cfd9d319e1b64 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Jun 2017 12:43:55 +0200 Subject: arm64: mm: select CONFIG_ARCH_PROC_KCORE_TEXT To avoid issues with the /proc/kcore code getting confused about the kernels block mappings in the VMALLOC region, enable the existing facility that describes the [_text, _end) interval as a separate KCORE_TEXT region, which supersedes the KCORE_VMALLOC region that it intersects with on arm64. Reported-by: Tan Xiaojun Tested-by: Tan Xiaojun Tested-by: Mark Rutland Acked-by: Mark Rutland Reviewed-by: Laura Abbott Reviewed-by: Jiri Olsa Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 79c0dec93030..b37ac9b2a36b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -246,6 +246,9 @@ config PGTABLE_LEVELS config ARCH_SUPPORTS_UPROBES def_bool y +config ARCH_PROC_KCORE_TEXT + def_bool y + source "init/Kconfig" source "kernel/Kconfig.freezer" -- cgit v1.2.3 From 20987de3c2c45c314e0386f724aa85f55d984ef2 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:38 +0100 Subject: arm64: signal: split frame link record from sigcontext structure In order to be able to increase the amount of the data currently written to the __reserved[] array in the signal frame, it is necessary to overwrite the locations currently occupied by the {fp,lr} frame link record pushed at the top of the signal stack. In order for this to work, this patch detaches the frame link record from struct rt_sigframe and places it separately at the top of the signal stack. This will allow subsequent patches to insert data between it and __reserved[]. This change relies on the non-ABI status of the placement of the frame record with respect to struct sigframe: this status is undocumented, but the placement is not declared or described in the user headers, and known unwinder implementations (libgcc, libunwind, gdb) appear not to rely on it. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 50 +++++++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index c7b6de62f9d3..1e5ed3be78ed 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -41,10 +42,18 @@ struct rt_sigframe { struct siginfo info; struct ucontext uc; +}; + +struct frame_record { u64 fp; u64 lr; }; +struct rt_sigframe_user_layout { + struct rt_sigframe __user *sigframe; + struct frame_record __user *next_frame; +}; + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; @@ -162,16 +171,17 @@ badframe: return 0; } -static int setup_sigframe(struct rt_sigframe __user *sf, +static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { int i, err = 0; + struct rt_sigframe __user *sf = user->sigframe; void *aux = sf->uc.uc_mcontext.__reserved; struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ - __put_user_error(regs->regs[29], &sf->fp, err); - __put_user_error(regs->regs[30], &sf->lr, err); + __put_user_error(regs->regs[29], &user->next_frame->fp, err); + __put_user_error(regs->regs[30], &user->next_frame->lr, err); for (i = 0; i < 31; i++) __put_user_error(regs->regs[i], &sf->uc.uc_mcontext.regs[i], @@ -209,34 +219,36 @@ static int setup_sigframe(struct rt_sigframe __user *sf, return err; } -static struct rt_sigframe __user *get_sigframe(struct ksignal *ksig, - struct pt_regs *regs) +static int get_sigframe(struct rt_sigframe_user_layout *user, + struct ksignal *ksig, struct pt_regs *regs) { unsigned long sp, sp_top; - struct rt_sigframe __user *frame; sp = sp_top = sigsp(regs->sp, ksig); - sp = (sp - sizeof(struct rt_sigframe)) & ~15; - frame = (struct rt_sigframe __user *)sp; + sp = round_down(sp - sizeof(struct frame_record), 16); + user->next_frame = (struct frame_record __user *)sp; + + sp = round_down(sp - sizeof(struct rt_sigframe), 16); + user->sigframe = (struct rt_sigframe __user *)sp; /* * Check that we can actually write to the signal frame. */ - if (!access_ok(VERIFY_WRITE, frame, sp_top - sp)) - frame = NULL; + if (!access_ok(VERIFY_WRITE, user->sigframe, sp_top - sp)) + return -EFAULT; - return frame; + return 0; } static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, - void __user *frame, int usig) + struct rt_sigframe_user_layout *user, int usig) { __sigrestore_t sigtramp; regs->regs[0] = usig; - regs->sp = (unsigned long)frame; - regs->regs[29] = regs->sp + offsetof(struct rt_sigframe, fp); + regs->sp = (unsigned long)user->sigframe; + regs->regs[29] = (unsigned long)&user->next_frame->fp; regs->pc = (unsigned long)ka->sa.sa_handler; if (ka->sa.sa_flags & SA_RESTORER) @@ -250,20 +262,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, static int setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + struct rt_sigframe_user_layout user; struct rt_sigframe __user *frame; int err = 0; - frame = get_sigframe(ksig, regs); - if (!frame) + if (get_sigframe(&user, ksig, regs)) return 1; + frame = user.sigframe; + __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); err |= __save_altstack(&frame->uc.uc_stack, regs->sp); - err |= setup_sigframe(frame, regs, set); + err |= setup_sigframe(&user, regs, set); if (err == 0) { - setup_return(regs, &ksig->ka, frame, usig); + setup_return(regs, &ksig->ka, &user, usig); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { err |= copy_siginfo_to_user(&frame->info, &ksig->info); regs->regs[1] = (unsigned long)&frame->info; -- cgit v1.2.3 From 47ccb02868cead34578d953b5fe0cdd58394605e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:39 +0100 Subject: arm64: signal: Refactor sigcontext parsing in rt_sigreturn Currently, rt_sigreturn does very limited checking on the sigcontext coming from userspace. Future additions to the sigcontext data will increase the potential for surprises. Also, it is not clear whether the sigcontext extension records are supposed to occur in a particular order. To allow the parsing code to be extended more easily, this patch factors out the sigcontext parsing into a separate function, and adds extra checks to validate the well-formedness of the sigcontext structure. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 86 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 1e5ed3be78ed..67769f68ae06 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -101,12 +102,86 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) return err ? -EFAULT : 0; } +struct user_ctxs { + struct fpsimd_context __user *fpsimd; +}; + +static int parse_user_sigframe(struct user_ctxs *user, + struct rt_sigframe __user *sf) +{ + struct sigcontext __user *const sc = &sf->uc.uc_mcontext; + struct _aarch64_ctx __user *head = + (struct _aarch64_ctx __user *)&sc->__reserved; + size_t offset = 0; + + user->fpsimd = NULL; + + while (1) { + int err; + u32 magic, size; + + head = (struct _aarch64_ctx __user *)&sc->__reserved[offset]; + if (!IS_ALIGNED((unsigned long)head, 16)) + goto invalid; + + err = 0; + __get_user_error(magic, &head->magic, err); + __get_user_error(size, &head->size, err); + if (err) + return err; + + switch (magic) { + case 0: + if (size) + goto invalid; + + goto done; + + case FPSIMD_MAGIC: + if (user->fpsimd) + goto invalid; + + if (offset > sizeof(sc->__reserved) - + sizeof(*user->fpsimd) || + size < sizeof(*user->fpsimd)) + goto invalid; + + user->fpsimd = (struct fpsimd_context __user *)head; + break; + + case ESR_MAGIC: + /* ignore */ + break; + + default: + goto invalid; + } + + if (size < sizeof(*head)) + goto invalid; + + if (size > sizeof(sc->__reserved) - (sizeof(*head) + offset)) + goto invalid; + + offset += size; + } + +done: + if (!user->fpsimd) + goto invalid; + + return 0; + +invalid: + return -EINVAL; +} + static int restore_sigframe(struct pt_regs *regs, struct rt_sigframe __user *sf) { sigset_t set; int i, err; - void *aux = sf->uc.uc_mcontext.__reserved; + struct user_ctxs user; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -125,12 +200,11 @@ static int restore_sigframe(struct pt_regs *regs, regs->syscallno = ~0UL; err |= !valid_user_regs(®s->user_regs, current); + if (err == 0) + err = parse_user_sigframe(&user, sf); - if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); - err |= restore_fpsimd_context(fpsimd_ctx); - } + if (err == 0) + err = restore_fpsimd_context(user.fpsimd); return err; } -- cgit v1.2.3 From bb4891a6c3551f27fa4d548b87a66c258bdb100b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:40 +0100 Subject: arm64: signal: factor frame layout and population into separate passes In preparation for expanding the signal frame, this patch refactors the signal frame setup code in setup_sigframe() into two separate passes. The first pass, setup_sigframe_layout(), determines the size of the signal frame and its internal layout, including the presence and location of optional records. The resulting knowledge is used to allocate and locate the user stack space required for the signal frame and to determine which optional records to include. The second pass, setup_sigframe(), is called once the stack frame is allocated in order to populate it with the necessary context information. As a result of these changes, it becomes more natural to represent locations in the signal frame by a base pointer and an offset, since the absolute address of each location is not known during the layout pass. To be more consistent with this logic, parse_user_sigframe() is refactored to describe signal frame locations in a similar way. This change has no effect on the signal ABI, but will make it easier to expand the signal frame in future patches. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/signal.c | 111 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 23 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 67769f68ae06..eaef530579f8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -53,8 +54,39 @@ struct frame_record { struct rt_sigframe_user_layout { struct rt_sigframe __user *sigframe; struct frame_record __user *next_frame; + + unsigned long size; /* size of allocated sigframe data */ + unsigned long limit; /* largest allowed size */ + + unsigned long fpsimd_offset; + unsigned long esr_offset; + unsigned long end_offset; }; +static void init_user_layout(struct rt_sigframe_user_layout *user) +{ + memset(user, 0, sizeof(*user)); + user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved); + + user->limit = user->size + + sizeof(user->sigframe->uc.uc_mcontext.__reserved) - + round_up(sizeof(struct _aarch64_ctx), 16); + /* ^ reserve space for terminator */ +} + +static size_t sigframe_size(struct rt_sigframe_user_layout const *user) +{ + return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); +} + +static void __user *apply_user_offset( + struct rt_sigframe_user_layout const *user, unsigned long offset) +{ + char __user *base = (char __user *)user->sigframe; + + return base + offset; +} + static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; @@ -110,26 +142,35 @@ static int parse_user_sigframe(struct user_ctxs *user, struct rt_sigframe __user *sf) { struct sigcontext __user *const sc = &sf->uc.uc_mcontext; - struct _aarch64_ctx __user *head = - (struct _aarch64_ctx __user *)&sc->__reserved; + struct _aarch64_ctx __user *head; + char __user *base = (char __user *)&sc->__reserved; size_t offset = 0; + size_t limit = sizeof(sc->__reserved); user->fpsimd = NULL; + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + while (1) { - int err; + int err = 0; u32 magic, size; - head = (struct _aarch64_ctx __user *)&sc->__reserved[offset]; - if (!IS_ALIGNED((unsigned long)head, 16)) + if (limit - offset < sizeof(*head)) goto invalid; - err = 0; + if (!IS_ALIGNED(offset, 16)) + goto invalid; + + head = (struct _aarch64_ctx __user *)(base + offset); __get_user_error(magic, &head->magic, err); __get_user_error(size, &head->size, err); if (err) return err; + if (limit - offset < size) + goto invalid; + switch (magic) { case 0: if (size) @@ -141,9 +182,7 @@ static int parse_user_sigframe(struct user_ctxs *user, if (user->fpsimd) goto invalid; - if (offset > sizeof(sc->__reserved) - - sizeof(*user->fpsimd) || - size < sizeof(*user->fpsimd)) + if (size < sizeof(*user->fpsimd)) goto invalid; user->fpsimd = (struct fpsimd_context __user *)head; @@ -160,7 +199,7 @@ static int parse_user_sigframe(struct user_ctxs *user, if (size < sizeof(*head)) goto invalid; - if (size > sizeof(sc->__reserved) - (sizeof(*head) + offset)) + if (limit - offset < size) goto invalid; offset += size; @@ -245,13 +284,30 @@ badframe: return 0; } +/* Determine the layout of optional records in the signal frame */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +{ + user->fpsimd_offset = user->size; + user->size += round_up(sizeof(struct fpsimd_context), 16); + + /* fault information, if valid */ + if (current->thread.fault_code) { + user->esr_offset = user->size; + user->size += round_up(sizeof(struct esr_context), 16); + } + + /* set the "end" magic */ + user->end_offset = user->size; + + return 0; +} + + static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { int i, err = 0; struct rt_sigframe __user *sf = user->sigframe; - void *aux = sf->uc.uc_mcontext.__reserved; - struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ __put_user_error(regs->regs[29], &user->next_frame->fp, err); @@ -269,26 +325,29 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); if (err == 0) { - struct fpsimd_context *fpsimd_ctx = - container_of(aux, struct fpsimd_context, head); + struct fpsimd_context __user *fpsimd_ctx = + apply_user_offset(user, user->fpsimd_offset); err |= preserve_fpsimd_context(fpsimd_ctx); - aux += sizeof(*fpsimd_ctx); } /* fault information, if valid */ - if (current->thread.fault_code) { - struct esr_context *esr_ctx = - container_of(aux, struct esr_context, head); + if (err == 0 && user->esr_offset) { + struct esr_context __user *esr_ctx = + apply_user_offset(user, user->esr_offset); + __put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); __put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); - aux += sizeof(*esr_ctx); } /* set the "end" magic */ - end = aux; - __put_user_error(0, &end->magic, err); - __put_user_error(0, &end->size, err); + if (err == 0) { + struct _aarch64_ctx __user *end = + apply_user_offset(user, user->end_offset); + + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); + } return err; } @@ -297,13 +356,19 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, struct ksignal *ksig, struct pt_regs *regs) { unsigned long sp, sp_top; + int err; + + init_user_layout(user); + err = setup_sigframe_layout(user); + if (err) + return err; sp = sp_top = sigsp(regs->sp, ksig); sp = round_down(sp - sizeof(struct frame_record), 16); user->next_frame = (struct frame_record __user *)sp; - sp = round_down(sp - sizeof(struct rt_sigframe), 16); + sp = round_down(sp, 16) - sigframe_size(user); user->sigframe = (struct rt_sigframe __user *)sp; /* -- cgit v1.2.3 From bb4322f74340de578bc61ed0cfb9690ddeb9ef76 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 15 Jun 2017 15:03:41 +0100 Subject: arm64: signal: factor out signal frame record allocation This patch factors out the allocator for signal frame optional records into a separate function, to ensure consistency and facilitate later expansion. No overrun checking is currently done, because the allocation is in user memory and anyway the kernel never tries to allocate enough space in the signal frame yet for an overrun to occur. This behaviour will be refined in future patches. The approach taken in this patch to allocation of the terminator record is not very clean: this will also be replaced in subsequent patches. For future extension, a comment is added in sigcontext.h documenting the current static allocations in __reserved[]. This will be important for determining under what circumstances userspace may or may not see an expanded signal frame. Reviewed-by: Catalin Marinas Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/sigcontext.h | 19 ++++++++++++++ arch/arm64/kernel/signal.c | 43 ++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index ee469be1ae1d..1328a2c14371 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -33,6 +33,25 @@ struct sigcontext { __u8 __reserved[4096] __attribute__((__aligned__(16))); }; +/* + * Allocation of __reserved[]: + * (Note: records do not necessarily occur in the order shown here.) + * + * size description + * + * 0x210 fpsimd_context + * 0x10 esr_context + * 0x10 terminator (null _aarch64_ctx) + * + * 0xdd0 (reserved for future allocation) + * + * New records that can exceed this space need to be opt-in for userspace, so + * that an expanded signal frame is not generated unexpectedly. The mechanism + * for opting in will depend on the extension that generates each new record. + * The above table documents the maximum set and sizes of records than can be + * generated when userspace does not opt in for any such extension. + */ + /* * Header to be used at the beginning of structures extending the user * context. Such structures must be placed after the rt_sigframe on the stack diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index eaef530579f8..fa787e6ac7c2 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -79,6 +79,22 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); } +/* + * Allocate space for an optional record of bytes in the user + * signal frame. The offset from the signal frame base address to the + * allocated block is assigned to *offset. + */ +static int sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size) +{ + size_t padded_size = round_up(size, 16); + + *offset = user->size; + user->size += padded_size; + + return 0; +} + static void __user *apply_user_offset( struct rt_sigframe_user_layout const *user, unsigned long offset) { @@ -287,19 +303,32 @@ badframe: /* Determine the layout of optional records in the signal frame */ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) { - user->fpsimd_offset = user->size; - user->size += round_up(sizeof(struct fpsimd_context), 16); + int err; + + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; /* fault information, if valid */ if (current->thread.fault_code) { - user->esr_offset = user->size; - user->size += round_up(sizeof(struct esr_context), 16); + err = sigframe_alloc(user, &user->esr_offset, + sizeof(struct esr_context)); + if (err) + return err; } - /* set the "end" magic */ - user->end_offset = user->size; + /* + * Allocate space for the terminator record. + * HACK: here we undo the reservation of space for the end record. + * This bodge should be replaced with a cleaner approach later on. + */ + user->limit = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved) + + sizeof(user->sigframe->uc.uc_mcontext.__reserved); - return 0; + err = sigframe_alloc(user, &user->end_offset, + sizeof(struct _aarch64_ctx)); + return err; } -- cgit v1.2.3 From f5d284900c0f960e318a063f4c40826b6e3aa6a8 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 20 Jun 2017 14:24:43 +0200 Subject: arm64: pass machine size to sparse When using sparse on the arm64 tree we get many thousands of warnings like 'constant ... is so big it is unsigned long long' or 'shift too big (32) for type unsigned long'. This happens because by default sparse considers the machine as 32bit and defines the size of the types accordingly. Fix this by passing the '-m64' flag to sparse so that sparse can correctly define longs as being 64bit. CC: Catalin Marinas CC: Will Deacon CC: linux-arm-kernel@lists.infradead.org Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 1ce57b42f390..fdb0133142ff 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -62,7 +62,7 @@ LD += -EL UTS_MACHINE := aarch64 endif -CHECKFLAGS += -D__aarch64__ +CHECKFLAGS += -D__aarch64__ -m64 ifeq ($(CONFIG_ARM64_MODULE_CMODEL_LARGE), y) KBUILD_CFLAGS_MODULE += -mcmodel=large -- cgit v1.2.3 From af66b2d88a76574d55e81d712292abd34beb6178 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 21 Jun 2017 16:00:42 +0100 Subject: arm64: ptrace: Fix VFP register dumping in compat coredumps Currently, VFP registers are omitted from coredumps for compat processes, due to a bug in the REGSET_COMPAT_VFP regset implementation. compat_vfp_get() needs to transfer non-contiguous data from thread_struct.fpsimd_state, and uses put_user() to handle the offending trailing word (FPSCR). This fails when copying to a kernel address (i.e., kbuf && !ubuf), which is what happens when dumping core. As a result, the ELF coredump core code silently omits the NT_ARM_VFP note from the dump. It would be possible to work around this with additional special case code for the put_user(), but since user_regset_copyout() is explicitly designed to handle this scenario it is cleaner to port the put_user() to a user_regset_copyout() call, which this patch does. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index c142459a88f3..0e5aaec5b751 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -894,7 +894,7 @@ static int compat_vfp_get(struct task_struct *target, { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret; + int ret, vregs_end_pos; uregs = &target->thread.fpsimd_state.user_fpsimd; @@ -902,13 +902,16 @@ static int compat_vfp_get(struct task_struct *target, * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, - VFP_STATE_SIZE - sizeof(compat_ulong_t)); + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, + 0, vregs_end_pos); if (count && !ret) { fpscr = (uregs->fpsr & VFP_FPSCR_STAT_MASK) | (uregs->fpcr & VFP_FPSCR_CTRL_MASK); - ret = put_user(fpscr, (compat_ulong_t *)ubuf); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpscr, + vregs_end_pos, VFP_STATE_SIZE); } return ret; -- cgit v1.2.3 From e1d5a8fb73e6c65280c21ec188180345649ee650 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 21 Jun 2017 16:00:43 +0100 Subject: arm64: ptrace: Flush FPSIMD regs back to thread_struct before reading When reading the FPSIMD state of current (which occurs when dumping core), it is possible that userspace has modified the FPSIMD registers since the time the task was last scheduled out. Such changes are not guaranteed to be reflected immedately in thread_struct. As a result, a coredump can contain stale values for these registers. Reading the registers of a stopped task via ptrace is unaffected. This patch explicitly flushes the CPU state back to thread_struct before dumping when operating on current, thus ensuring that coredump contents are up to date. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 0e5aaec5b751..eeef01a219a6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -623,6 +623,10 @@ static int fpr_get(struct task_struct *target, const struct user_regset *regset, { struct user_fpsimd_state *uregs; uregs = &target->thread.fpsimd_state.user_fpsimd; + + if (target == current) + fpsimd_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, uregs, 0, -1); } @@ -898,6 +902,9 @@ static int compat_vfp_get(struct task_struct *target, uregs = &target->thread.fpsimd_state.user_fpsimd; + if (target == current) + fpsimd_preserve_current_state(); + /* * The VFP registers are packed into the fpsimd_state, so they all sit * nicely together for us. We just need to create the fpscr separately. -- cgit v1.2.3 From 936eb65ca22ad856cb3a995e8cd742e982dc2dd0 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 21 Jun 2017 16:00:44 +0100 Subject: arm64: ptrace: Flush user-RW TLS reg to thread_struct before reading When reading current's user-writable TLS register (which occurs when dumping core for native tasks), it is possible that userspace has modified it since the time the task was last scheduled out. The new TLS register value is not guaranteed to have been written immediately back to thread_struct in this case. As a result, a coredump can capture stale data for this register. Reading the register for a stopped task via ptrace is unaffected. For native tasks, this patch explicitly flushes the TPIDR_EL0 register back to thread_struct before dumping when operating on current, thus ensuring that coredump contents are up to date. For compat tasks, the TLS register is not user-writable and so cannot be out of sync, so no flush is required in compat_tls_get(). Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/processor.h | 3 +++ arch/arm64/kernel/process.c | 8 ++++++-- arch/arm64/kernel/ptrace.c | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9428b93fefb2..64c9e78f9882 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -104,6 +104,9 @@ struct thread_struct { #define task_user_tls(t) (&(t)->thread.tp_value) #endif +/* Sync TPIDR_EL0 back to thread_struct for current */ +void tls_preserve_current_state(void); + #define INIT_THREAD { } static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index af1ea258c212..659ae8094ed5 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -298,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start, return 0; } +void tls_preserve_current_state(void) +{ + *task_user_tls(current) = read_sysreg(tpidr_el0); +} + static void tls_thread_switch(struct task_struct *next) { unsigned long tpidr, tpidrro; - tpidr = read_sysreg(tpidr_el0); - *task_user_tls(current) = tpidr; + tls_preserve_current_state(); tpidr = *task_user_tls(next); tpidrro = is_compat_thread(task_thread_info(next)) ? diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index eeef01a219a6..35846f1550db 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -652,6 +652,10 @@ static int tls_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { unsigned long *tls = &target->thread.tp_value; + + if (target == current) + tls_preserve_current_state(); + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, tls, 0, -1); } -- cgit v1.2.3 From 8effeaaf2cacc8a8007d3089e253e7baaff57bb7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 21 Jun 2017 18:11:23 +0100 Subject: arm64: dump cpu_hwcaps at panic time When debugging a kernel panic(), it can be useful to know which CPU features have been detected by the kernel, as some code paths can depend on these (and may have been patched at runtime). This patch adds a notifier to dump the detected CPU caps (as a hex string) at panic(), when we log other information useful for debugging. On a Juno R1 system running v4.12-rc5, this looks like: [ 615.431249] Kernel panic - not syncing: Fatal exception in interrupt [ 615.437609] SMP: stopping secondary CPUs [ 615.441872] Kernel Offset: disabled [ 615.445372] CPU features: 0x02086 [ 615.448522] Memory Limit: none A developer can decode this by looking at the corresponding bits. For example, the above decodes as: * bit 1: ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE * bit 2: ARM64_WORKAROUND_845719 * bit 7: ARM64_WORKAROUND_834220 * bit 13: ARM64_HAS_32BIT_EL0 Signed-off-by: Mark Rutland Acked-by: Steve Capper Cc: Catalin Marinas Cc: Suzuki K Poulose Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 55d5c72a507d..9f9e0064c8c1 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -51,6 +51,25 @@ unsigned int compat_elf_hwcap2 __read_mostly; DECLARE_BITMAP(cpu_hwcaps, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcaps); +static int dump_cpu_hwcaps(struct notifier_block *self, unsigned long v, void *p) +{ + /* file-wide pr_fmt adds "CPU features: " prefix */ + pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); + return 0; +} + +static struct notifier_block cpu_hwcaps_notifier = { + .notifier_call = dump_cpu_hwcaps +}; + +static int __init register_cpu_hwcaps_dumper(void) +{ + atomic_notifier_chain_register(&panic_notifier_list, + &cpu_hwcaps_notifier); + return 0; +} +__initcall(register_cpu_hwcaps_dumper); + DEFINE_STATIC_KEY_ARRAY_FALSE(cpu_hwcap_keys, ARM64_NCAPS); EXPORT_SYMBOL(cpu_hwcap_keys); -- cgit v1.2.3 From 32015c235603a209697d1228667b1fb1cc8a8d06 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:08 -0600 Subject: arm64: exception: handle Synchronous External Abort SEA exceptions are often caused by an uncorrected hardware error, and are handled when data abort and instruction abort exception classes have specific values for their Fault Status Code. When SEA occurs, before killing the process, report the error in the kernel logs. Update fault_info[] with specific SEA faults so that the new SEA handler is used. Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Acked-by: Catalin Marinas [will: use NULL instead of 0 when assigning si_addr] Signed-off-by: Will Deacon --- arch/arm64/include/asm/esr.h | 1 + arch/arm64/mm/fault.c | 45 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 85997c0e5443..28bf02efce76 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -83,6 +83,7 @@ #define ESR_ELx_WNR (UL(1) << 6) /* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_FnV (UL(1) << 10) #define ESR_ELx_EA (UL(1) << 9) #define ESR_ELx_S1PTW (UL(1) << 7) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 37b95dff0b07..246e64f60610 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -522,6 +522,31 @@ static int do_bad(unsigned long addr, unsigned int esr, struct pt_regs *regs) return 1; } +/* + * This abort handler deals with Synchronous External Abort. + * It calls notifiers, and then returns "fault". + */ +static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) +{ + struct siginfo info; + const struct fault_info *inf; + + inf = esr_to_fault_info(esr); + pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", + inf->name, esr, addr); + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = 0; + if (esr & ESR_ELx_FnV) + info.si_addr = NULL; + else + info.si_addr = (void __user *)addr; + arm64_notify_die("", regs, &info, esr); + + return 0; +} + static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "ttbr address size fault" }, { do_bad, SIGBUS, 0, "level 1 address size fault" }, @@ -539,22 +564,22 @@ static const struct fault_info fault_info[] = { { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 permission fault" }, { do_page_fault, SIGSEGV, SEGV_ACCERR, "level 3 permission fault" }, - { do_bad, SIGBUS, 0, "synchronous external abort" }, + { do_sea, SIGBUS, 0, "synchronous external abort" }, { do_bad, SIGBUS, 0, "unknown 17" }, { do_bad, SIGBUS, 0, "unknown 18" }, { do_bad, SIGBUS, 0, "unknown 19" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous external abort (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error" }, + { do_sea, SIGBUS, 0, "level 0 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 1 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 2 (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 3 (translation table walk)" }, + { do_sea, SIGBUS, 0, "synchronous parity or ECC error" }, { do_bad, SIGBUS, 0, "unknown 25" }, { do_bad, SIGBUS, 0, "unknown 26" }, { do_bad, SIGBUS, 0, "unknown 27" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, - { do_bad, SIGBUS, 0, "synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 0 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 1 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 2 synchronous parity error (translation table walk)" }, + { do_sea, SIGBUS, 0, "level 3 synchronous parity error (translation table walk)" }, { do_bad, SIGBUS, 0, "unknown 32" }, { do_alignment_fault, SIGBUS, BUS_ADRALN, "alignment fault" }, { do_bad, SIGBUS, 0, "unknown 34" }, -- cgit v1.2.3 From 7edda0886bc3d1e5418951558a2555af1bc73b0a Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:09 -0600 Subject: acpi: apei: handle SEA notification type for ARMv8 ARM APEI extension proposal added SEA (Synchronous External Abort) notification type for ARMv8. Add a new GHES error source handling function for SEA. If an error source's notification type is SEA, then this function can be registered into the SEA exception handler. That way GHES will parse and report SEA exceptions when they occur. An SEA can interrupt code that had interrupts masked and is treated as an NMI. To aid this the page of address space for mapping APEI buffers while in_nmi() is always reserved, and ghes_ioremap_pfn_nmi() is changed to use the helper methods to find the prot_t to map with in the same way as ghes_ioremap_pfn_irq(). Signed-off-by: Tyler Baicar CC: Jonathan (Zhixiong) Zhang Reviewed-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ arch/arm64/mm/fault.c | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..80559977a83e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -19,6 +19,7 @@ config ARM64 select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING @@ -92,6 +93,7 @@ config ARM64 select HAVE_IRQ_TIME_ACCOUNTING select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP if NUMA + select HAVE_NMI if ACPI_APEI_SEA select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select HAVE_PERF_REGS diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 246e64f60610..07481af15fd7 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -42,6 +42,8 @@ #include #include +#include + struct fault_info { int (*fn)(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -535,6 +537,21 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", inf->name, esr, addr); + /* + * Synchronous aborts may interrupt code which had interrupts masked. + * Before calling out into the wider kernel tell the interested + * subsystems. + */ + if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) { + if (interrupts_enabled(regs)) + nmi_enter(); + + ghes_notify_sea(); + + if (interrupts_enabled(regs)) + nmi_exit(); + } + info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = 0; -- cgit v1.2.3 From 621f48e40ee9b0100a802531069166d7d94796e0 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:14 -0600 Subject: arm/arm64: KVM: add guest SEA support Currently external aborts are unsupported by the guest abort handling. Add handling for SEAs so that the host kernel reports SEAs which occur in the guest kernel. When an SEA occurs in the guest kernel, the guest exits and is routed to kvm_handle_guest_abort(). Prior to this patch, a print message of an unsupported FSC would be printed and nothing else would happen. With this patch, the code gets routed to the APEI handling of SEAs in the host kernel to report the SEA information. Signed-off-by: Tyler Baicar Acked-by: Catalin Marinas Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm/include/asm/kvm_arm.h | 10 ++++++++++ arch/arm/include/asm/system_misc.h | 5 +++++ arch/arm64/include/asm/kvm_arm.h | 10 ++++++++++ arch/arm64/include/asm/system_misc.h | 2 ++ arch/arm64/mm/fault.c | 22 ++++++++++++++++++++-- 5 files changed, 47 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index a3f0b3d50089..ebf020b02bc8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -187,6 +187,16 @@ #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) +#define FSC_SEA (0x10) +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~0xf) diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index a3d61ad984af..8c4a89f5ce7d 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -22,6 +22,11 @@ extern void (*arm_pm_idle)(void); extern unsigned int user_debug; +static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + return -1; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_ARM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6e99978e83bd..61d694c2eae5 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -204,6 +204,16 @@ #define FSC_FAULT ESR_ELx_FSC_FAULT #define FSC_ACCESS ESR_ELx_FSC_ACCESS #define FSC_PERM ESR_ELx_FSC_PERM +#define FSC_SEA ESR_ELx_FSC_EXTABT +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index bc812435bc76..95aa4423247d 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -56,6 +56,8 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); __show_ratelimited; \ }) +int handle_guest_sea(phys_addr_t addr, unsigned int esr); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 07481af15fd7..a8c9f2db20ba 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -532,6 +532,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; const struct fault_info *inf; + int ret = 0; inf = esr_to_fault_info(esr); pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", @@ -546,7 +547,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) if (interrupts_enabled(regs)) nmi_enter(); - ghes_notify_sea(); + ret = ghes_notify_sea(); if (interrupts_enabled(regs)) nmi_exit(); @@ -561,7 +562,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) info.si_addr = (void __user *)addr; arm64_notify_die("", regs, &info, esr); - return 0; + return ret; } static const struct fault_info fault_info[] = { @@ -631,6 +632,23 @@ static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "unknown 63" }, }; +/* + * Handle Synchronous External Aborts that occur in a guest kernel. + * + * The return value will be zero if the SEA was successfully handled + * and non-zero if there was an error processing the error or there was + * no error to process. + */ +int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + int ret = -ENOENT; + + if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) + ret = ghes_notify_sea(); + + return ret; +} + /* * Dispatch a data abort to the relevant handler. */ -- cgit v1.2.3 From 33f082614c3443d937f50fe936f284f62bbb4a1b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 20 Jun 2017 18:23:39 +0100 Subject: arm64: signal: Allow expansion of the signal frame This patch defines an extra_context signal frame record that can be used to describe an expanded signal frame, and modifies the context block allocator and signal frame setup and parsing code to create, populate, parse and decode this block as necessary. To avoid abuse by userspace, parse_user_sigframe() attempts to ensure that: * no more than one extra_context is accepted; * the extra context data is a sensible size, and properly placed and aligned. The extra_context data is required to start at the first 16-byte aligned address immediately after the dummy terminator record following extra_context in rt_sigframe.__reserved[] (as ensured during signal delivery). This serves as a sanity-check that the signal frame has not been moved or copied without taking the extra data into account. Signed-off-by: Dave Martin Reviewed-by: Catalin Marinas [will: add __force annotation when casting extra_datap to __user pointer] Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/sigcontext.h | 38 +++++- arch/arm64/kernel/signal.c | 195 ++++++++++++++++++++++++++++--- 2 files changed, 214 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/uapi/asm/sigcontext.h b/arch/arm64/include/uapi/asm/sigcontext.h index 1328a2c14371..f0a76b9fcd6e 100644 --- a/arch/arm64/include/uapi/asm/sigcontext.h +++ b/arch/arm64/include/uapi/asm/sigcontext.h @@ -41,9 +41,10 @@ struct sigcontext { * * 0x210 fpsimd_context * 0x10 esr_context + * 0x20 extra_context (optional) * 0x10 terminator (null _aarch64_ctx) * - * 0xdd0 (reserved for future allocation) + * 0xdb0 (reserved for future allocation) * * New records that can exceed this space need to be opt-in for userspace, so * that an expanded signal frame is not generated unexpectedly. The mechanism @@ -80,4 +81,39 @@ struct esr_context { __u64 esr; }; +/* + * extra_context: describes extra space in the signal frame for + * additional structures that don't fit in sigcontext.__reserved[]. + * + * Note: + * + * 1) fpsimd_context, esr_context and extra_context must be placed in + * sigcontext.__reserved[] if present. They cannot be placed in the + * extra space. Any other record can be placed either in the extra + * space or in sigcontext.__reserved[], unless otherwise specified in + * this file. + * + * 2) There must not be more than one extra_context. + * + * 3) If extra_context is present, it must be followed immediately in + * sigcontext.__reserved[] by the terminating null _aarch64_ctx. + * + * 4) The extra space to which datap points must start at the first + * 16-byte aligned address immediately after the terminating null + * _aarch64_ctx that follows the extra_context structure in + * __reserved[]. The extra space may overrun the end of __reserved[], + * as indicated by a sufficiently large value for the size field. + * + * 5) The extra space must itself be terminated with a null + * _aarch64_ctx. + */ +#define EXTRA_MAGIC 0x45585401 + +struct extra_context { + struct _aarch64_ctx head; + __u64 datap; /* 16-byte aligned pointer to extra space cast to __u64 */ + __u32 size; /* size in bytes of the extra space */ + __u32 __reserved[3]; +}; + #endif /* _UAPI__ASM_SIGCONTEXT_H */ diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index fa787e6ac7c2..089c3747995d 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -60,18 +61,27 @@ struct rt_sigframe_user_layout { unsigned long fpsimd_offset; unsigned long esr_offset; + unsigned long extra_offset; unsigned long end_offset; }; +#define BASE_SIGFRAME_SIZE round_up(sizeof(struct rt_sigframe), 16) +#define TERMINATOR_SIZE round_up(sizeof(struct _aarch64_ctx), 16) +#define EXTRA_CONTEXT_SIZE round_up(sizeof(struct extra_context), 16) + static void init_user_layout(struct rt_sigframe_user_layout *user) { + const size_t reserved_size = + sizeof(user->sigframe->uc.uc_mcontext.__reserved); + memset(user, 0, sizeof(*user)); user->size = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved); - user->limit = user->size + - sizeof(user->sigframe->uc.uc_mcontext.__reserved) - - round_up(sizeof(struct _aarch64_ctx), 16); - /* ^ reserve space for terminator */ + user->limit = user->size + reserved_size; + + user->limit -= TERMINATOR_SIZE; + user->limit -= EXTRA_CONTEXT_SIZE; + /* Reserve space for extension and terminator ^ */ } static size_t sigframe_size(struct rt_sigframe_user_layout const *user) @@ -79,6 +89,52 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) return round_up(max(user->size, sizeof(struct rt_sigframe)), 16); } +/* + * Sanity limit on the approximate maximum size of signal frame we'll + * try to generate. Stack alignment padding and the frame record are + * not taken into account. This limit is not a guarantee and is + * NOT ABI. + */ +#define SIGFRAME_MAXSZ SZ_64K + +static int __sigframe_alloc(struct rt_sigframe_user_layout *user, + unsigned long *offset, size_t size, bool extend) +{ + size_t padded_size = round_up(size, 16); + + if (padded_size > user->limit - user->size && + !user->extra_offset && + extend) { + int ret; + + user->limit += EXTRA_CONTEXT_SIZE; + ret = __sigframe_alloc(user, &user->extra_offset, + sizeof(struct extra_context), false); + if (ret) { + user->limit -= EXTRA_CONTEXT_SIZE; + return ret; + } + + /* Reserve space for the __reserved[] terminator */ + user->size += TERMINATOR_SIZE; + + /* + * Allow expansion up to SIGFRAME_MAXSZ, ensuring space for + * the terminator: + */ + user->limit = SIGFRAME_MAXSZ - TERMINATOR_SIZE; + } + + /* Still not enough space? Bad luck! */ + if (padded_size > user->limit - user->size) + return -ENOMEM; + + *offset = user->size; + user->size += padded_size; + + return 0; +} + /* * Allocate space for an optional record of bytes in the user * signal frame. The offset from the signal frame base address to the @@ -87,11 +143,24 @@ static size_t sigframe_size(struct rt_sigframe_user_layout const *user) static int sigframe_alloc(struct rt_sigframe_user_layout *user, unsigned long *offset, size_t size) { - size_t padded_size = round_up(size, 16); + return __sigframe_alloc(user, offset, size, true); +} - *offset = user->size; - user->size += padded_size; +/* Allocate the null terminator record and prevent further allocations */ +static int sigframe_alloc_end(struct rt_sigframe_user_layout *user) +{ + int ret; + + /* Un-reserve the space reserved for the terminator: */ + user->limit += TERMINATOR_SIZE; + ret = sigframe_alloc(user, &user->end_offset, + sizeof(struct _aarch64_ctx)); + if (ret) + return ret; + + /* Prevent further allocation: */ + user->limit = user->size; return 0; } @@ -162,6 +231,8 @@ static int parse_user_sigframe(struct user_ctxs *user, char __user *base = (char __user *)&sc->__reserved; size_t offset = 0; size_t limit = sizeof(sc->__reserved); + bool have_extra_context = false; + char const __user *const sfp = (char const __user *)sf; user->fpsimd = NULL; @@ -171,6 +242,12 @@ static int parse_user_sigframe(struct user_ctxs *user, while (1) { int err = 0; u32 magic, size; + char const __user *userp; + struct extra_context const __user *extra; + u64 extra_datap; + u32 extra_size; + struct _aarch64_ctx const __user *end; + u32 end_magic, end_size; if (limit - offset < sizeof(*head)) goto invalid; @@ -208,6 +285,64 @@ static int parse_user_sigframe(struct user_ctxs *user, /* ignore */ break; + case EXTRA_MAGIC: + if (have_extra_context) + goto invalid; + + if (size < sizeof(*extra)) + goto invalid; + + userp = (char const __user *)head; + + extra = (struct extra_context const __user *)userp; + userp += size; + + __get_user_error(extra_datap, &extra->datap, err); + __get_user_error(extra_size, &extra->size, err); + if (err) + return err; + + /* Check for the dummy terminator in __reserved[]: */ + + if (limit - offset - size < TERMINATOR_SIZE) + goto invalid; + + end = (struct _aarch64_ctx const __user *)userp; + userp += TERMINATOR_SIZE; + + __get_user_error(end_magic, &end->magic, err); + __get_user_error(end_size, &end->size, err); + if (err) + return err; + + if (end_magic || end_size) + goto invalid; + + /* Prevent looping/repeated parsing of extra_context */ + have_extra_context = true; + + base = (__force void __user *)extra_datap; + if (!IS_ALIGNED((unsigned long)base, 16)) + goto invalid; + + if (!IS_ALIGNED(extra_size, 16)) + goto invalid; + + if (base != userp) + goto invalid; + + /* Reject "unreasonably large" frames: */ + if (extra_size > sfp + SIGFRAME_MAXSZ - userp) + goto invalid; + + /* + * Ignore trailing terminator in __reserved[] + * and start parsing extra data: + */ + offset = 0; + limit = extra_size; + continue; + default: goto invalid; } @@ -318,17 +453,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return err; } - /* - * Allocate space for the terminator record. - * HACK: here we undo the reservation of space for the end record. - * This bodge should be replaced with a cleaner approach later on. - */ - user->limit = offsetof(struct rt_sigframe, uc.uc_mcontext.__reserved) + - sizeof(user->sigframe->uc.uc_mcontext.__reserved); - - err = sigframe_alloc(user, &user->end_offset, - sizeof(struct _aarch64_ctx)); - return err; + return sigframe_alloc_end(user); } @@ -369,6 +494,40 @@ static int setup_sigframe(struct rt_sigframe_user_layout *user, __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); } + if (err == 0 && user->extra_offset) { + char __user *sfp = (char __user *)user->sigframe; + char __user *userp = + apply_user_offset(user, user->extra_offset); + + struct extra_context __user *extra; + struct _aarch64_ctx __user *end; + u64 extra_datap; + u32 extra_size; + + extra = (struct extra_context __user *)userp; + userp += EXTRA_CONTEXT_SIZE; + + end = (struct _aarch64_ctx __user *)userp; + userp += TERMINATOR_SIZE; + + /* + * extra_datap is just written to the signal frame. + * The value gets cast back to a void __user * + * during sigreturn. + */ + extra_datap = (__force u64)userp; + extra_size = sfp + round_up(user->size, 16) - userp; + + __put_user_error(EXTRA_MAGIC, &extra->head.magic, err); + __put_user_error(EXTRA_CONTEXT_SIZE, &extra->head.size, err); + __put_user_error(extra_datap, &extra->datap, err); + __put_user_error(extra_size, &extra->size, err); + + /* Add the terminator */ + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); + } + /* set the "end" magic */ if (err == 0) { struct _aarch64_ctx __user *end = -- cgit v1.2.3 From 8486e54d30e170c969090f80bbdfa7142d33ed6a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 23 Jun 2017 18:02:06 +0100 Subject: arm64: ftrace: fix !CONFIG_ARM64_MODULE_PLTS kernels When a kernel is built without CONFIG_ARM64_MODULE_PLTS, we don't generate the expected branch instruction in ftrace_make_nop(). This means we pass zero (rather than a valid branch) to ftrace_modify_code() as the expected instruction to validate. This causes us to return -EINVAL to the core ftrace code for a valid case, resulting in a splat at boot time. This was an unintended effect of commit: 687644209a6e9557 ("arm64: ftrace: fix building without CONFIG_MODULES") ... which incorrectly moved the generation of the branch instruction into the ifdef for CONFIG_ARM64_MODULE_PLTS. This patch fixes the issue by moving the ifdef inside of the relevant if-else case, and always checking that the branch is in range, regardless of CONFIG_ARM64_MODULE_PLTS. This ensures that we generate the expected branch instruction, and also improves our sanity checks. For consistency, both ftrace_make_nop() and ftrace_make_call() are updated with this pattern. Fixes: 687644209a6e9557 ("arm64: ftrace: fix building without CONFIG_MODULES") Signed-off-by: Mark Rutland Reported-by: Marc Zyngier Reviewed-by: Ard Biesheuvel Cc: Arnd Bergmann Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ftrace.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 401aa27808a4..c13b1fca0e5b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -72,11 +72,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { unsigned long pc = rec->ip; u32 old, new; - -#ifdef CONFIG_ARM64_MODULE_PLTS long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS unsigned long *trampoline; struct module *mod; @@ -121,8 +120,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) smp_wmb(); } addr = (unsigned long)&trampoline[1]; - } +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ + } old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -139,11 +140,10 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long pc = rec->ip; bool validate = true; u32 old = 0, new; - -#ifdef CONFIG_ARM64_MODULE_PLTS long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { +#ifdef CONFIG_ARM64_MODULE_PLTS u32 replaced; /* @@ -176,11 +176,13 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EINVAL; validate = false; +#else /* CONFIG_ARM64_MODULE_PLTS */ + return -EINVAL; +#endif /* CONFIG_ARM64_MODULE_PLTS */ } else { old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); } -#endif /* CONFIG_ARM64_MODULE_PLTS */ new = aarch64_insn_gen_nop(); -- cgit v1.2.3 From bcde519e8c325f3cc1fcf443eb6466e6bb3a3aca Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sat, 24 Jun 2017 17:42:11 +0200 Subject: arm64: pass endianness info to sparse ARM64 depends on the macro __AARCH64EB__ being defined or not to correctly select or define endian-specific macros, structures or pieces of code. This macro is predefined by the compiler but sparse knows nothing about it and thus may pre-process files differently from what gcc would. Fix this by passing '-D__AARCH64EL__' or '-D__AARCH64EB__' to sparse depending of the endianness of the kernel, like defined by GCC. Note: In most case it won't change anything since most arm64 use little-endian (but an allyesconfig would use big-endian!). CC: Catalin Marinas CC: Will Deacon CC: linux-arm-kernel@lists.infradead.org Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index fdb0133142ff..9b41f1e3b1a0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -52,11 +52,13 @@ KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian +CHECKFLAGS += -D__AARCH64EB__ AS += -EB LD += -EB UTS_MACHINE := aarch64_be else KBUILD_CPPFLAGS += -mlittle-endian +CHECKFLAGS += -D__AARCH64EL__ AS += -EL LD += -EL UTS_MACHINE := aarch64 -- cgit v1.2.3 From a5018b0e6f036a598e55371e9135e287dc3b25e5 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:55:52 +0200 Subject: arm64: fix endianness annotation for debug-monitors.c Here we're reading thumb or ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to native order but the intermediate value should be annotated as for little-endian values. Fix this by declaring the intermediate var as __le32 or __le16. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index d618e25c3de1..c7ef99904934 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -341,20 +341,22 @@ int aarch32_break_handler(struct pt_regs *regs) if (compat_thumb_mode(regs)) { /* get 16-bit Thumb instruction */ - get_user(thumb_instr, (u16 __user *)pc); - thumb_instr = le16_to_cpu(thumb_instr); + __le16 instr; + get_user(instr, (__le16 __user *)pc); + thumb_instr = le16_to_cpu(instr); if (thumb_instr == AARCH32_BREAK_THUMB2_LO) { /* get second half of 32-bit Thumb-2 instruction */ - get_user(thumb_instr, (u16 __user *)(pc + 2)); - thumb_instr = le16_to_cpu(thumb_instr); + get_user(instr, (__le16 __user *)(pc + 2)); + thumb_instr = le16_to_cpu(instr); bp = thumb_instr == AARCH32_BREAK_THUMB2_HI; } else { bp = thumb_instr == AARCH32_BREAK_THUMB; } } else { /* 32-bit ARM instruction */ - get_user(arm_instr, (u32 __user *)pc); - arm_instr = le32_to_cpu(arm_instr); + __le32 instr; + get_user(instr, (__le32 __user *)pc); + arm_instr = le32_to_cpu(instr); bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM; } -- cgit v1.2.3 From 6cf5d4af83e04f4cfae91bfdefd9d4d6949c09b2 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:55:55 +0200 Subject: arm64: fix endianness annotation in call_undef_hook() Here we're reading thumb or ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to native order but the intermediate value should be annotated as for little-endian values. Fix this by declaring the intermediate var as __le32 or __le16. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3ebfb1d00b53..c7c7088097be 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -344,22 +344,24 @@ static int call_undef_hook(struct pt_regs *regs) if (compat_thumb_mode(regs)) { /* 16-bit Thumb instruction */ - if (get_user(instr, (u16 __user *)pc)) + __le16 instr_le; + if (get_user(instr_le, (__le16 __user *)pc)) goto exit; - instr = le16_to_cpu(instr); + instr = le16_to_cpu(instr_le); if (aarch32_insn_is_wide(instr)) { u32 instr2; - if (get_user(instr2, (u16 __user *)(pc + 2))) + if (get_user(instr_le, (__le16 __user *)(pc + 2))) goto exit; - instr2 = le16_to_cpu(instr2); + instr2 = le16_to_cpu(instr_le); instr = (instr << 16) | instr2; } } else { /* 32-bit ARM instruction */ - if (get_user(instr, (u32 __user *)pc)) + __le32 instr_le; + if (get_user(instr_le, (__le32 __user *)pc)) goto exit; - instr = le32_to_cpu(instr); + instr = le32_to_cpu(instr_le); } raw_spin_lock_irqsave(&undef_lock, flags); -- cgit v1.2.3 From 65de142143206c7ffd98b0fcb062a79b3c6f1934 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:09 +0200 Subject: arm64: fix endianness annotation in aarch64_insn_read() The function arch64_insn_read() is used to read an instruction. On AM64 instructions are always stored in little-endian order and thus the function correctly do a little-to-native endian conversion to the value just read. However, the variable used to hold the value before the conversion is not declared for a little-endian value but for a native one. Fix this by using the correct type for the declaration: __le32 Note: This only works because the function reading the value, probe_kernel_read((), takes a void pointer and void pointers are endian-agnostic. Otherwise probe_kernel_read() should also be properly annotated (or worse, need to be specialized). Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index b884a926a632..d4d80b32cb69 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -117,7 +117,7 @@ static void __kprobes patch_unmap(int fixmap) int __kprobes aarch64_insn_read(void *addr, u32 *insnp) { int ret; - u32 val; + __le32 val; ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); if (!ret) -- cgit v1.2.3 From 57c138357d5922878b3bc5207bd59b8512ee80e6 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:11 +0200 Subject: arm64: fix endianness annotation for aarch64_insn_write() aarch64_insn_write() is used to write an instruction. As on ARM64 in-memory instructions are always stored in little-endian order, this function, taking the instruction opcode in native order, correctly convert it to little-endian before sending it to an helper function __aarch64_insn_write() which will do the effective write. This is all good, but the variable and argument holding the converted value are not annotated for a little-endian value but left for native values. Fix this by adjusting the prototype of the helper and directly using the result of cpu_to_le32() without passing by an intermediate variable (which was not a distinct one but the same as the one holding the native value). Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index d4d80b32cb69..60a2f03d76bc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -126,7 +126,7 @@ int __kprobes aarch64_insn_read(void *addr, u32 *insnp) return ret; } -static int __kprobes __aarch64_insn_write(void *addr, u32 insn) +static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) { void *waddr = addr; unsigned long flags = 0; @@ -145,8 +145,7 @@ static int __kprobes __aarch64_insn_write(void *addr, u32 insn) int __kprobes aarch64_insn_write(void *addr, u32 insn) { - insn = cpu_to_le32(insn); - return __aarch64_insn_write(addr, insn); + return __aarch64_insn_write(addr, cpu_to_le32(insn)); } static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) -- cgit v1.2.3 From 02129ae5fea83294b45c8f16c4ff14ae94e6858d Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:56:00 +0200 Subject: arm64: fix endianness annotation for reloc_insn_movw() & reloc_insn_imm() Here the functions reloc_insn_movw() & reloc_insn_imm() are used to read, modify and write back ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to/from native order but the pointers used to hold their addresses are declared as for native order values. Fix this by declaring the pointers as __le32* and remove the casts that are now unneeded. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/module.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 8c3a7264fb0f..f469e0435903 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -74,7 +74,7 @@ enum aarch64_reloc_op { RELOC_OP_PAGE, }; -static u64 do_reloc(enum aarch64_reloc_op reloc_op, void *place, u64 val) +static u64 do_reloc(enum aarch64_reloc_op reloc_op, __le32 *place, u64 val) { switch (reloc_op) { case RELOC_OP_ABS: @@ -121,12 +121,12 @@ enum aarch64_insn_movw_imm_type { AARCH64_INSN_IMM_MOVKZ, }; -static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, +static int reloc_insn_movw(enum aarch64_reloc_op op, __le32 *place, u64 val, int lsb, enum aarch64_insn_movw_imm_type imm_type) { u64 imm; s64 sval; - u32 insn = le32_to_cpu(*(u32 *)place); + u32 insn = le32_to_cpu(*place); sval = do_reloc(op, place, val); imm = sval >> lsb; @@ -154,7 +154,7 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, /* Update the instruction with the new encoding. */ insn = aarch64_insn_encode_immediate(AARCH64_INSN_IMM_16, insn, imm); - *(u32 *)place = cpu_to_le32(insn); + *place = cpu_to_le32(insn); if (imm > U16_MAX) return -ERANGE; @@ -162,12 +162,12 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, return 0; } -static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, +static int reloc_insn_imm(enum aarch64_reloc_op op, __le32 *place, u64 val, int lsb, int len, enum aarch64_insn_imm_type imm_type) { u64 imm, imm_mask; s64 sval; - u32 insn = le32_to_cpu(*(u32 *)place); + u32 insn = le32_to_cpu(*place); /* Calculate the relocation value. */ sval = do_reloc(op, place, val); @@ -179,7 +179,7 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, /* Update the instruction's immediate field. */ insn = aarch64_insn_encode_immediate(imm_type, insn, imm); - *(u32 *)place = cpu_to_le32(insn); + *place = cpu_to_le32(insn); /* * Extract the upper value bits (including the sign bit) and -- cgit v1.2.3 From c0d109de4c0ca365a2bd180e2e65501196fa8ef4 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Mon, 26 Jun 2017 15:16:25 +0200 Subject: arm64: use readq() instead of readl() to read 64bit entry_point Here the entrypoint, declared as a 64 bit integer, is read from a pointer to 64bit integer but the read is done via readl_relaxed() which is for 32bit quantities. All the high bits will thus be lost which change the meaning of the test against zero done later. Fix this by using readq_relaxed() instead as it should be for 64bit quantities. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi_parking_protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c index 1f5655cd9cc9..f35e80aad378 100644 --- a/arch/arm64/kernel/acpi_parking_protocol.c +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -125,7 +125,7 @@ static void acpi_parking_protocol_cpu_postboot(void) struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox; __le64 entry_point; - entry_point = readl_relaxed(&mailbox->entry_point); + entry_point = readq_relaxed(&mailbox->entry_point); /* * Check if firmware has cleared the entry_point as expected * by the protocol specification. -- cgit v1.2.3 From f0cda7e6dc5893b4b4daa3440512fc1226bc983f Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:07 +0200 Subject: arm64: fix endianness annotation in acpi_parking_protocol.c Here both variables 'cpu_id' and 'entry_point' are read via read[lq]_relaxed(), from a little-endian annotated pointer and then used as a native endian value. This is correct since the read[lq]() family of function internally do a little-to-native endian conversion. But in this case, it is wrong to declare these variable as little-endian since there are native ones. Fix this by changing the declaration of these variables as 'u32' or 'u64' instead of '__le32' / '__le64'. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/acpi_parking_protocol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/acpi_parking_protocol.c b/arch/arm64/kernel/acpi_parking_protocol.c index f35e80aad378..98a20e58758b 100644 --- a/arch/arm64/kernel/acpi_parking_protocol.c +++ b/arch/arm64/kernel/acpi_parking_protocol.c @@ -71,7 +71,7 @@ static int acpi_parking_protocol_cpu_boot(unsigned int cpu) { struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; struct parking_protocol_mailbox __iomem *mailbox; - __le32 cpu_id; + u32 cpu_id; /* * Map mailbox memory with attribute device nGnRE (ie ioremap - @@ -123,7 +123,7 @@ static void acpi_parking_protocol_cpu_postboot(void) int cpu = smp_processor_id(); struct cpu_mailbox_entry *cpu_entry = &cpu_mailbox_entries[cpu]; struct parking_protocol_mailbox __iomem *mailbox = cpu_entry->mailbox; - __le64 entry_point; + u64 entry_point; entry_point = readq_relaxed(&mailbox->entry_point); /* -- cgit v1.2.3 From 50a4b05609929003ce98987bb901ee10fe21fb20 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 29 Jun 2017 16:31:40 +0200 Subject: arm64: add missing conversion to __wsum in ip_fast_csum() ARM64 implementation of ip_fast_csum() do most of the work in 128 or 64 bit and call csum_fold() to finalize. csum_fold() itself take a __wsum argument, to insure that this value is always a 32bit native-order value. Fix this by adding the sadly needed '__force' to cast the native 'sum' to the type '__wsum'. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index 09f65339d66d..0b6f5a7d4027 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -42,7 +42,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } while (--ihl); sum += ((sum >> 32) | (sum << 32)); - return csum_fold(sum >> 32); + return csum_fold((__force u32)(sum >> 32)); } #define ip_fast_csum ip_fast_csum -- cgit v1.2.3 From 67831edf8a826750b43d5612792a9c3bc449f227 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 29 Jun 2017 16:35:29 +0200 Subject: arm64: fix endianness annotation in get_kaslr_seed() In the flattened device tree format, all integer properties are in big-endian order. Here the property "kaslr-seed" is read from the fdt and then correctly converted to native order (via fdt64_to_cpu()) but the pointer used for this is not annotated as being for big-endian. Fix this by declaring the pointer as fdt64_t instead of u64 (fdt64_t being itself typedefed to __be64). Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/kaslr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index d7e90d97f5c4..a9710efb8c01 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -27,7 +27,7 @@ u16 __initdata memstart_offset_seed; static __init u64 get_kaslr_seed(void *fdt) { int node, len; - u64 *prop; + fdt64_t *prop; u64 ret; node = fdt_path_offset(fdt, "/chosen"); -- cgit v1.2.3 From 15ad6ace52039c7e39435c4d712d147126604a97 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Thu, 29 Jun 2017 16:40:12 +0200 Subject: arm64: fix endianness annotation for __apply_alternatives()/get_alt_insn() get_alt_insn() is used to read and create ARM instructions, which are always stored in memory in little-endian order. These values are thus correctly converted to/from native order when processed but the pointers used to hold the address of these instructions are declared as for native order values. Fix this by declaring the pointers as __le32* instead of u32* and make the few appropriate needed changes like removing the unneeded cast '(u32*)' in front of __ALT_PTR()'s definition. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 8840c109c5d6..6dd0a3a3e5c9 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -28,7 +28,7 @@ #include #include -#define __ALT_PTR(a,f) (u32 *)((void *)&(a)->f + (a)->f) +#define __ALT_PTR(a,f) ((void *)&(a)->f + (a)->f) #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset) @@ -60,7 +60,7 @@ static bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc) #define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1)) -static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) +static u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr) { u32 insn; @@ -109,7 +109,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) { struct alt_instr *alt; struct alt_region *region = alt_region; - u32 *origptr, *replptr, *updptr; + __le32 *origptr, *replptr, *updptr; for (alt = region->begin; alt < region->end; alt++) { u32 insn; @@ -124,7 +124,7 @@ static void __apply_alternatives(void *alt_region, bool use_linear_alias) origptr = ALT_ORIG_PTR(alt); replptr = ALT_REPL_PTR(alt); - updptr = use_linear_alias ? (u32 *)lm_alias(origptr) : origptr; + updptr = use_linear_alias ? lm_alias(origptr) : origptr; nr_inst = alt->alt_len / sizeof(insn); for (i = 0; i < nr_inst; i++) { -- cgit v1.2.3 From 53b1a742ed251780267a57415bc955bd50f40c3d Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 29 Jun 2017 15:25:47 +0100 Subject: arm64: ptrace: Avoid setting compat FP[SC]R to garbage if get_user fails If get_user() fails when reading the new FPSCR value from userspace in compat_vfp_get(), then garbage* will be written to the task's FPSR and FPCR registers. This patch prevents this by checking the return from get_user() first. [*] Actually, zero, due to the behaviour of get_user() on error, but that's still not what userspace expects. Fixes: 478fcb2cdb23 ("arm64: Debugging support") Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 35846f1550db..4c068dcf1977 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -947,8 +947,10 @@ static int compat_vfp_set(struct task_struct *target, if (count && !ret) { ret = get_user(fpscr, (compat_ulong_t *)ubuf); - uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; - uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + if (!ret) { + uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; + uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; + } } fpsimd_flush_task_state(target); -- cgit v1.2.3 From 16d38acb12d065ebe3494e4e31e8b4438f3214da Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 29 Jun 2017 15:25:48 +0100 Subject: arm64: ptrace: Remove redundant overrun check from compat_vfp_set() compat_vfp_set() checks for userspace trying to write an excessive amount of data to the regset. However this check is conspicuous for its absence from every other _set() in the arm64 ptrace implementation. In fact, the core ptrace_regset() already clamps userspace's iov_len to the regset size before the individual regset .{get,set}() methods get called. This patch removes the redundant check. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 4c068dcf1977..949ab6bdfbad 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -937,9 +937,6 @@ static int compat_vfp_set(struct task_struct *target, compat_ulong_t fpscr; int ret; - if (pos + count > VFP_STATE_SIZE) - return -EIO; - uregs = &target->thread.fpsimd_state.user_fpsimd; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, -- cgit v1.2.3 From 5fbd5fc49fc39ac8433da62d16682a1d0217ea4f Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 29 Jun 2017 15:25:49 +0100 Subject: arm64: ptrace: Fix incorrect get_user() use in compat_vfp_set() Now that compat_vfp_get() uses the regset API to copy the FPSCR value out to userspace, compat_vfp_set() looks inconsistent. In particular, compat_vfp_set() will fail if called with kbuf != NULL && ubuf == NULL (which is valid usage according to the regset API). This patch fixes compat_vfp_set() to use user_regset_copyin(), similarly to compat_vfp_get(). This also squashes a sparse warning triggered by the cast that drops __user when calling get_user(). Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 949ab6bdfbad..1b38c0150aec 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -935,15 +935,17 @@ static int compat_vfp_set(struct task_struct *target, { struct user_fpsimd_state *uregs; compat_ulong_t fpscr; - int ret; + int ret, vregs_end_pos; uregs = &target->thread.fpsimd_state.user_fpsimd; + vregs_end_pos = VFP_STATE_SIZE - sizeof(compat_ulong_t); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, uregs, 0, - VFP_STATE_SIZE - sizeof(compat_ulong_t)); + vregs_end_pos); if (count && !ret) { - ret = get_user(fpscr, (compat_ulong_t *)ubuf); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpscr, + vregs_end_pos, VFP_STATE_SIZE); if (!ret) { uregs->fpsr = fpscr & VFP_FPSCR_STAT_MASK; uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; -- cgit v1.2.3 From 70a62ad19e40a6b0d9e3a048fe0d0a391d76ad12 Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Fri, 30 Jun 2017 17:00:20 +0530 Subject: arm64: cpuinfo: constify attribute_group structures. attribute_groups are not supposed to change at runtime. All functions working with attribute_groups provided by work with const attribute_group. So mark the non-const structs as const. Signed-off-by: Arvind Yadav Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 68b1f364c515..f495ee5049fd 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -227,7 +227,7 @@ static struct attribute *cpuregs_id_attrs[] = { NULL }; -static struct attribute_group cpuregs_attr_group = { +static const struct attribute_group cpuregs_attr_group = { .attrs = cpuregs_id_attrs, .name = "identification" }; -- cgit v1.2.3 From 425e1ed73e6574e4fe186ec82fd37213cbd47df0 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Wed, 28 Jun 2017 16:58:03 +0200 Subject: arm64: fix endianness annotation for 'struct jit_ctx' and friends struct jit_ctx::image is used the store a pointer to the jitted intructions, which are always little-endian. These instructions are thus correctly converted from native order to little-endian before being stored but the pointer 'image' is declared as for native order values. Fix this by declaring the field as __le32* instead of u32*. Same for the pointer used in jit_fill_hole() to initialize the image. Signed-off-by: Luc Van Oostenryck Signed-off-by: Will Deacon --- arch/arm64/net/bpf_jit_comp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 71f930501ade..bd2ac9912087 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -68,7 +68,7 @@ struct jit_ctx { int idx; int epilogue_offset; int *offset; - u32 *image; + __le32 *image; }; static inline void emit(const u32 insn, struct jit_ctx *ctx) @@ -128,7 +128,7 @@ static inline int bpf2a64_offset(int bpf_to, int bpf_from, static void jit_fill_hole(void *area, unsigned int size) { - u32 *ptr; + __le32 *ptr; /* We are guaranteed to have aligned memory. */ for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); @@ -871,7 +871,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 2. Now, the actual pass. */ - ctx.image = (u32 *)image_ptr; + ctx.image = (__le32 *)image_ptr; ctx.idx = 0; build_prologue(&ctx); -- cgit v1.2.3