From 2d23ed04de933625bc777cc474d2cd9a2a3fa860 Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Fri, 12 Jun 2015 15:39:28 +0100 Subject: arm64: perf: Remove unnecessary printk ARM64 pmu prints an error message in event_init() when no hardware PMU is available. This is pretty annoying as it keeps printing the message for every single trial, flooding the kernel logs, unnecessarily. The return code is sufficient for the user to figure out the reason. Signed-off-by: Suzuki K. Poulose Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index b31e9a4b6275..2cd580a1fa80 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -435,10 +435,8 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; - if (!pmu_device) { - pr_err("no PMU device registered\n"); + if (!pmu_device) return -ENODEV; - } irqs = min(pmu_device->num_resources, num_possible_cpus()); if (!irqs) { -- cgit v1.2.3 From d09ce834dfffd93aa4586b3e07faaf901ee8706f Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 30 Jun 2015 13:56:56 +0100 Subject: arm64: perf: replace arch_find_n_match_cpu_physical_id with of_cpu_device_node_get arch_find_n_match_cpu_physical_id parses the device tree to get the device node for a given logical cpu index. However, since ARM PMUs get probed after the CPU device nodes are stashed while registering the cpus, we can use of_cpu_device_node_get to avoid another DT parse. This patch replaces arch_find_n_match_cpu_physical_id with of_cpu_device_node_get to reuse the stashed value directly instead. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 2cd580a1fa80..563f8ce8df68 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -1335,7 +1335,7 @@ static int armpmu_device_probe(struct platform_device *pdev) } for_each_possible_cpu(cpu) - if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) + if (dn == of_cpu_device_node_get(cpu)) break; if (cpu >= nr_cpu_ids) { -- cgit v1.2.3 From e147ae6d7f908412a013c115e42c3e15dac33ccc Mon Sep 17 00:00:00 2001 From: Rohit Thapliyal Date: Fri, 10 Jul 2015 09:23:59 +0100 Subject: arm64: modify the dump mem for 64 bit addresses On 64bit kernel, the dump_mem gives 32 bit addresses on the stack dump. This gives unorganized information regarding the 64bit values on the stack. Hence, modified to get a complete 64bit memory dump. With patch: [ 93.534801] Process insmod (pid: 1587, stack limit = 0xffffffc976be4058) [ 93.541441] Stack: (0xffffffc976be7cf0 to 0xffffffc976be8000) [ 93.547136] 7ce0: ffffffc976be7d00 ffffffc00008163c [ 93.554898] 7d00: ffffffc976be7d40 ffffffc0000f8a44 ffffffc00098ef38 ffffffbffc000088 [ 93.562659] 7d20: ffffffc00098ef50 ffffffbffc0000c0 0000000000000001 ffffffbffc000070 [ 93.570419] 7d40: ffffffc976be7e40 ffffffc0000f935c 0000000000000000 000000002b424090 [ 93.578179] 7d60: 000000002b424010 0000007facc555f4 0000000080000000 0000000000000015 [ 93.585937] 7d80: 0000000000000116 0000000000000069 ffffffc00097b000 ffffffc976be4000 [ 93.593694] 7da0: 0000000000000064 0000000000000072 000000000000006e 000000000000003f [ 93.601453] 7dc0: 000000000000feff 000000000000fff1 ffffffbffc002028 0000000000000124 [ 93.609211] 7de0: ffffffc976be7e10 0000000000000001 ffffff8000000000 ffffffbbffff0000 [ 93.616969] 7e00: ffffffc976be7e60 0000000000000000 0000000000000000 0000000000000000 [ 93.624726] 7e20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 93.632484] 7e40: 0000007fcc474550 ffffffc0000841ec 000000002b424010 0000007facda0710 [ 93.640241] 7e60: ffffffffffffffff ffffffc0000be6dc ffffff80007d2000 000000000001c010 [ 93.647999] 7e80: ffffff80007e0ae0 ffffff80007e09d0 ffffff80007edf70 0000000000000288 [ 93.655757] 7ea0: 00000000000002e8 0000000000000000 0000000000000000 0000001c0000001b [ 93.663514] 7ec0: 0000000000000009 0000000000000007 000000002b424090 000000000001c010 [ 93.671272] 7ee0: 000000002b424010 0000007faccd3a48 0000000000000000 0000000000000000 [ 93.679030] 7f00: 0000007fcc4743f8 0000007fcc4743f8 0000000000000069 0000000000000003 [ 93.686787] 7f20: 0101010101010101 0000000000000004 0000000000000020 00000000000003f3 [ 93.694544] 7f40: 0000007facb95664 0000007facda7030 0000007facc555d0 0000000000498378 [ 93.702301] 7f60: 0000000000000000 000000002b424010 0000007facda0710 000000002b424090 [ 93.710058] 7f80: 0000007fcc474698 0000000000498000 0000007fcc474ebb 0000000000474f58 [ 93.717815] 7fa0: 0000000000498000 0000000000000000 0000000000000000 0000007fcc474550 [ 93.725573] 7fc0: 00000000004104bc 0000007fcc474430 0000007facc555f4 0000000080000000 [ 93.733330] 7fe0: 000000002b424090 0000000000000069 0950020128000244 4104000008000004 [ 93.741084] Call trace: The above output makes a debugger life a lot more easier. Signed-off-by: Rohit Thapliyal Signed-off-by: Maninder Singh Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 566bc4c35040..4db6a2574fec 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -52,11 +52,12 @@ int show_unhandled_signals = 1; * Dump out the contents of some memory nicely... */ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, - unsigned long top) + unsigned long top, bool compat) { unsigned long first; mm_segment_t fs; int i; + unsigned int width = compat ? 4 : 8; /* * We need to switch to kernel mode so that we can use __get_user @@ -75,13 +76,22 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom, memset(str, ' ', sizeof(str)); str[sizeof(str) - 1] = '\0'; - for (p = first, i = 0; i < 8 && p < top; i++, p += 4) { + for (p = first, i = 0; i < (32 / width) + && p < top; i++, p += width) { if (p >= bottom && p < top) { - unsigned int val; - if (__get_user(val, (unsigned int *)p) == 0) - sprintf(str + i * 9, " %08x", val); - else - sprintf(str + i * 9, " ????????"); + unsigned long val; + + if (width == 8) { + if (__get_user(val, (unsigned long *)p) == 0) + sprintf(str + i * 17, " %016lx", val); + else + sprintf(str + i * 17, " ????????????????"); + } else { + if (__get_user(val, (unsigned int *)p) == 0) + sprintf(str + i * 9, " %08lx", val); + else + sprintf(str + i * 9, " ????????"); + } } } printk("%s%04lx:%s\n", lvl, first & 0xffff, str); @@ -95,7 +105,7 @@ static void dump_backtrace_entry(unsigned long where, unsigned long stack) print_ip_sym(where); if (in_exception_text(where)) dump_mem("", "Exception stack", stack, - stack + sizeof(struct pt_regs)); + stack + sizeof(struct pt_regs), false); } static void dump_instr(const char *lvl, struct pt_regs *regs) @@ -207,7 +217,8 @@ static int __die(const char *str, int err, struct thread_info *thread, if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->sp, - THREAD_SIZE + (unsigned long)task_stack_page(tsk)); + THREAD_SIZE + (unsigned long)task_stack_page(tsk), + compat_user_mode(regs)); dump_backtrace(regs, tsk); dump_instr(KERN_EMERG, regs); } -- cgit v1.2.3 From d3127afa712321a2b297cfee358be2cb223f933c Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Fri, 10 Jul 2015 14:58:00 +0100 Subject: arm64: Remove unused macros from assembler.h Commit 68234df4ea79 ("arm64: kill flush_cache_all()") removed the only users of these macros. Signed-off-by: Daniel Thompson Cc: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 144b64ad96c3..e10516bbe833 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -49,18 +49,6 @@ msr daifclr, #2 .endm -/* - * Save/disable and restore interrupts. - */ - .macro save_and_disable_irqs, olddaif - mrs \olddaif, daif - disable_irq - .endm - - .macro restore_irqs, olddaif - msr daif, \olddaif - .endm - /* * Enable and disable debug exceptions. */ -- cgit v1.2.3 From aaf6f2f098f8ec22fa51ec15bd327b8acdfe5a78 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 10 Jul 2015 16:47:56 +0100 Subject: arm64: consolidate __swiotlb_mmap Since commit 9d3bfbb4df58 ("arm64: Combine coherent and non-coherent swiotlb dma_ops"), __dma_common_mmap is no longer shared between two callers, so roll it into the remaining one. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index d16a1cead23f..63b2a117a03c 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -303,9 +303,10 @@ static void __swiotlb_sync_sg_for_device(struct device *dev, sg->length, dir); } -/* vma->vm_page_prot must be set appropriately before calling this function */ -static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size) +static int __swiotlb_mmap(struct device *dev, + struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + struct dma_attrs *attrs) { int ret = -ENXIO; unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >> @@ -314,6 +315,9 @@ static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; unsigned long off = vma->vm_pgoff; + vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, + is_device_dma_coherent(dev)); + if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -327,16 +331,6 @@ static int __dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } -static int __swiotlb_mmap(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - struct dma_attrs *attrs) -{ - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); - return __dma_common_mmap(dev, vma, cpu_addr, dma_addr, size); -} - static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, -- cgit v1.2.3 From cba3574fd56be8132a19e4aa6b1d41a12c56d990 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 16 Jul 2015 19:26:02 +0100 Subject: arm64: move update_mmu_cache() into asm/pgtable.h Mark Brown reported an allnoconfig build failure in -next: Today's linux-next fails to build an arm64 allnoconfig due to "mm: make GUP handle pfn mapping unless FOLL_GET is requested" which causes: > arm64-allnoconfig > ../mm/gup.c:51:4: error: implicit declaration of function 'update_mmu_cache' [-Werror=implicit-function-declaration] Fix the error by moving the function to asm/pgtable.h, as is the case for most other architectures. Reported-by: Mark Brown Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 15 +++++++++++++++ arch/arm64/include/asm/tlbflush.h | 14 -------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 56283f8a675c..4d5c812847e9 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -505,6 +505,21 @@ extern int kern_addr_valid(unsigned long addr); #define pgtable_cache_init() do { } while (0) +/* + * On AArch64, the cache coherency is handled via the set_pte_at() function. + */ +static inline void update_mmu_cache(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + /* + * set_pte() does not have a DSB for user mappings, so make sure that + * the page table write is visible. + */ + dsb(ishst); +} + +#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 934815d45eda..7fedfa787a64 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -154,20 +154,6 @@ static inline void __flush_tlb_pgtable(struct mm_struct *mm, asm("tlbi vae1is, %0" : : "r" (addr)); dsb(ish); } -/* - * On AArch64, the cache coherency is handled via the set_pte_at() function. - */ -static inline void update_mmu_cache(struct vm_area_struct *vma, - unsigned long addr, pte_t *ptep) -{ - /* - * set_pte() does not have a DSB for user mappings, so make sure that - * the page table write is visible. - */ - dsb(ishst); -} - -#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0) #endif -- cgit v1.2.3 From b08d4640a3dca68670fc5af2fe9205b395a02388 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Thu, 16 Jul 2015 18:58:53 +0100 Subject: arm64: remove dead code Commit 68234df4ea79 ("arm64: kill flush_cache_all()") removed soft_reset() from the kernel. This was the only caller of setup_mm_for_reboot(), so remove that also. Signed-off-by: Mark Salter Signed-off-by: Will Deacon --- arch/arm64/include/asm/mmu.h | 1 - arch/arm64/mm/mmu.c | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 79fcfb048884..030208767185 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -28,7 +28,6 @@ typedef struct { #define ASID(mm) ((mm)->context.id & 0xffff) extern void paging_init(void); -extern void setup_mm_for_reboot(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); extern void init_mem_pgprot(void); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a4ede4e2ddd1..63012fed46fc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -460,17 +460,6 @@ void __init paging_init(void) cpu_set_default_tcr_t0sz(); } -/* - * Enable the identity mapping to allow the MMU disabling. - */ -void setup_mm_for_reboot(void) -{ - cpu_set_reserved_ttbr0(); - flush_tlb_all(); - cpu_set_idmap_tcr_t0sz(); - cpu_switch_mm(idmap_pg_dir, &init_mm); -} - /* * Check whether a kernel address is valid (derived from arch/x86/). */ -- cgit v1.2.3 From 2f4b829c625ec36c2d80bef6395c7b74cea8aac0 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 10 Jul 2015 17:24:28 +0100 Subject: arm64: Add support for hardware updates of the access and dirty pte bits The ARMv8.1 architecture extensions introduce support for hardware updates of the access and dirty information in page table entries. With TCR_EL1.HA enabled, when the CPU accesses an address with the PTE_AF bit cleared in the page table, instead of raising an access flag fault the CPU sets the actual page table entry bit. To ensure that kernel modifications to the page tables do not inadvertently revert a change introduced by hardware updates, the exclusive monitor (ldxr/stxr) is adopted in the pte accessors. When TCR_EL1.HD is enabled, a write access to a memory location with the DBM (Dirty Bit Management) bit set in the corresponding pte automatically clears the read-only bit (AP[2]). Such DBM bit maps onto the Linux PTE_WRITE bit and to check whether a writable (DBM set) page is dirty, the kernel tests the PTE_RDONLY bit. In order to allow read-only and dirty pages, the kernel needs to preserve the software dirty bit. The hardware dirty status is transferred to the software dirty bit in ptep_set_wrprotect() (using load/store exclusive loop) and pte_modify(). Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 17 ++++ arch/arm64/include/asm/pgtable-hwdef.h | 3 + arch/arm64/include/asm/pgtable.h | 147 ++++++++++++++++++++++++++++++++- arch/arm64/mm/proc.S | 13 +++ 4 files changed, 178 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 318175f62c24..40f717f8820a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -469,6 +469,23 @@ config ARM64_VA_BITS default 42 if ARM64_VA_BITS_42 default 48 if ARM64_VA_BITS_48 +config ARM64_HW_AFDBM + bool "Support for hardware updates of the Access and Dirty page flags" + default y + help + The ARMv8.1 architecture extensions introduce support for + hardware updates of the access and dirty information in page + table entries. When enabled in TCR_EL1 (HA and HD bits) on + capable processors, accesses to pages with PTE_AF cleared will + set this bit instead of raising an access flag fault. + Similarly, writes to read-only pages with the DBM bit set will + clear the read-only bit (AP[2]) instead of raising a + permission fault. + + Kernels built with this configuration option enabled continue + to work on pre-ARMv8.1 hardware and the performance impact is + minimal. If unsure, say Y. + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 59bfae75dc98..24154b055835 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -104,6 +104,7 @@ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ +#define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ @@ -168,5 +169,7 @@ #define TCR_TG1_64K (UL(3) << 30) #define TCR_ASID16 (UL(1) << 36) #define TCR_TBI0 (UL(1) << 37) +#define TCR_HA (UL(1) << 39) +#define TCR_HD (UL(1) << 40) #endif diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 4d5c812847e9..8212e6aa0fb1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef __ASM_PGTABLE_H #define __ASM_PGTABLE_H +#include #include #include @@ -27,7 +28,11 @@ #define PTE_VALID (_AT(pteval_t, 1) << 0) #define PTE_DIRTY (_AT(pteval_t, 1) << 55) #define PTE_SPECIAL (_AT(pteval_t, 1) << 56) +#ifdef CONFIG_ARM64_HW_AFDBM +#define PTE_WRITE (PTE_DBM) /* same as DBM */ +#else #define PTE_WRITE (_AT(pteval_t, 1) << 57) +#endif #define PTE_PROT_NONE (_AT(pteval_t, 1) << 58) /* only when !PTE_VALID */ /* @@ -48,6 +53,9 @@ #define FIRST_USER_ADDRESS 0UL #ifndef __ASSEMBLY__ + +#include + extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); @@ -137,12 +145,20 @@ extern struct page *empty_zero_page; * The following only work if pte_present(). Undefined behaviour otherwise. */ #define pte_present(pte) (!!(pte_val(pte) & (PTE_VALID | PTE_PROT_NONE))) -#define pte_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) #define pte_exec(pte) (!(pte_val(pte) & PTE_UXN)) +#ifdef CONFIG_ARM64_HW_AFDBM +#define pte_hw_dirty(pte) (!(pte_val(pte) & PTE_RDONLY)) +#else +#define pte_hw_dirty(pte) (0) +#endif +#define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) +#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) + +#define pte_valid(pte) (!!(pte_val(pte) && PTE_VALID)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ @@ -209,20 +225,49 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } +struct mm_struct; +struct vm_area_struct; + extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); +/* + * PTE bits configuration in the presence of hardware Dirty Bit Management + * (PTE_WRITE == PTE_DBM): + * + * Dirty Writable | PTE_RDONLY PTE_WRITE PTE_DIRTY (sw) + * 0 0 | 1 0 0 + * 0 1 | 1 1 0 + * 1 0 | 1 0 1 + * 1 1 | 0 1 x + * + * When hardware DBM is not present, the sofware PTE_DIRTY bit is updated via + * the page fault mechanism. Checking the dirty status of a pte becomes: + * + * PTE_DIRTY || !PTE_RDONLY + */ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { if (pte_valid_user(pte)) { if (!pte_special(pte) && pte_exec(pte)) __sync_icache_dcache(pte, addr); - if (pte_dirty(pte) && pte_write(pte)) + if (pte_sw_dirty(pte) && pte_write(pte)) pte_val(pte) &= ~PTE_RDONLY; else pte_val(pte) |= PTE_RDONLY; } + /* + * If the existing pte is valid, check for potential race with + * hardware updates of the pte (ptep_set_access_flags safely changes + * valid ptes without going through an invalid entry). + */ + if (IS_ENABLED(CONFIG_DEBUG_VM) && IS_ENABLED(CONFIG_ARM64_HW_AFDBM) && + pte_valid(*ptep)) { + BUG_ON(!pte_young(pte)); + BUG_ON(pte_write(*ptep) && !pte_dirty(pte)); + } + set_pte(ptep, pte); } @@ -461,6 +506,9 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | PTE_PROT_NONE | PTE_WRITE | PTE_TYPE_MASK; + /* preserve the hardware dirty information */ + if (pte_hw_dirty(pte)) + newprot |= PTE_DIRTY; pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } @@ -470,6 +518,101 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) return pte_pmd(pte_modify(pmd_pte(pmd), newprot)); } +#ifdef CONFIG_ARM64_HW_AFDBM +/* + * Atomic pte/pmd modifications. + */ +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pte_t *ptep) +{ + pteval_t pteval; + unsigned int tmp, res; + + asm volatile("// ptep_test_and_clear_young\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " ubfx %w3, %w0, %5, #1 // extract PTE_AF (young)\n" + " and %0, %0, %4 // clear PTE_AF\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)), "=&r" (res) + : "L" (~PTE_AF), "I" (ilog2(PTE_AF))); + + return res; +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long address, + pmd_t *pmdp) +{ + return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +static inline pte_t ptep_get_and_clear(struct mm_struct *mm, + unsigned long address, pte_t *ptep) +{ + pteval_t old_pteval; + unsigned int tmp; + + asm volatile("// ptep_get_and_clear\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " stxr %w1, xzr, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (old_pteval), "=&r" (tmp), "+Q" (pte_val(*ptep))); + + return __pte(old_pteval); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + return pte_pmd(ptep_get_and_clear(mm, address, (pte_t *)pmdp)); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +/* + * ptep_set_wrprotect - mark read-only while trasferring potential hardware + * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. + */ +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) +{ + pteval_t pteval; + unsigned long tmp; + + asm volatile("// ptep_set_wrprotect\n" + " prfm pstl1strm, %2\n" + "1: ldxr %0, %2\n" + " tst %0, %4 // check for hw dirty (!PTE_RDONLY)\n" + " csel %1, %3, xzr, eq // set PTE_DIRTY|PTE_RDONLY if dirty\n" + " orr %0, %0, %1 // if !dirty, PTE_RDONLY is already set\n" + " and %0, %0, %5 // clear PTE_WRITE/PTE_DBM\n" + " stxr %w1, %0, %2\n" + " cbnz %w1, 1b\n" + : "=&r" (pteval), "=&r" (tmp), "+Q" (pte_val(*ptep)) + : "r" (PTE_DIRTY|PTE_RDONLY), "L" (PTE_RDONLY), "L" (~PTE_WRITE) + : "cc"); +} + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp) +{ + ptep_set_wrprotect(mm, address, (pte_t *)pmdp); +} +#endif +#endif /* CONFIG_ARM64_HW_AFDBM */ + extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 39139a3aa16d..a8be513dff6f 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -196,6 +196,19 @@ ENTRY(__cpu_setup) */ mrs x9, ID_AA64MMFR0_EL1 bfi x10, x9, #32, #3 +#ifdef CONFIG_ARM64_HW_AFDBM + /* + * Hardware update of the Access and Dirty bits. + */ + mrs x9, ID_AA64MMFR1_EL1 + and x9, x9, #0xf + cbz x9, 2f + cmp x9, #2 + b.lt 1f + orr x10, x10, #TCR_HD // hardware Dirty flag update +1: orr x10, x10, #TCR_HA // hardware Access flag update +2: +#endif /* CONFIG_ARM64_HW_AFDBM */ msr tcr_el1, x10 ret // return to head.S ENDPROC(__cpu_setup) -- cgit v1.2.3 From 0723c05fb75e4428b79b5cd657af7496b2604422 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 16 Jul 2015 21:26:16 +0100 Subject: arm64: enable more compressed Image formats Plumb up Makefile arguments for the already supported formats in the kbuild system: lz4, bzip2, lzma, and lzo. Note that just as with Image.gz, these images are not self-decompressing and the booting firmware still needs to handle decompression before launching the kernel image. Signed-off-by: Olof Johansson Signed-off-by: Will Deacon --- arch/arm64/Makefile | 5 ++++- arch/arm64/boot/Makefile | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4d2a925998f9..0953a97b5119 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -58,7 +58,10 @@ all: $(KBUILD_IMAGE) $(KBUILD_DTBS) boot := arch/arm64/boot -Image Image.gz: vmlinux +Image: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +Image.%: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ zinstall install: vmlinux diff --git a/arch/arm64/boot/Makefile b/arch/arm64/boot/Makefile index 5a0e3ab854a5..abcbba2f01ba 100644 --- a/arch/arm64/boot/Makefile +++ b/arch/arm64/boot/Makefile @@ -19,9 +19,21 @@ targets := Image Image.gz $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) +$(obj)/Image.bz2: $(obj)/Image FORCE + $(call if_changed,bzip2) + $(obj)/Image.gz: $(obj)/Image FORCE $(call if_changed,gzip) +$(obj)/Image.lz4: $(obj)/Image FORCE + $(call if_changed,lz4) + +$(obj)/Image.lzma: $(obj)/Image FORCE + $(call if_changed,lzma) + +$(obj)/Image.lzo: $(obj)/Image FORCE + $(call if_changed,lzo) + install: $(obj)/Image $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" -- cgit v1.2.3 From 5d220ff9420f8b1689805ba2d938bedf9e0860a4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 14 Jul 2015 16:20:17 +0100 Subject: arm64: Better native ptrace support for compat tasks The compat ptrace interface allows access to the TLS register, hardware breakpoints and watchpoints, syscall number. However, a native task using the native ptrace interface to debug compat tasks (e.g. multi-arch gdb) only has access to the general and VFP register sets. The compat ptrace interface cannot be accessed from a native task. This patch adds a new user_aarch32_ptrace_view which contains the TLS, hardware breakpoint/watchpoint and syscall number regsets in addition to the existing GPR and VFP regsets. This view is backwards compatible with the previous kernels. Core dumping of 32-bit tasks and compat ptrace are not affected since the original user_aarch32_view is preserved. Signed-off-by: Catalin Marinas Reported-by: Yao Qi Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 92 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 91 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d882b833dbdb..1971f491bb90 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -826,6 +826,30 @@ static int compat_vfp_set(struct task_struct *target, return ret; } +static int compat_tls_get(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, void *kbuf, void __user *ubuf) +{ + compat_ulong_t tls = (compat_ulong_t)target->thread.tp_value; + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); +} + +static int compat_tls_set(struct task_struct *target, + const struct user_regset *regset, unsigned int pos, + unsigned int count, const void *kbuf, + const void __user *ubuf) +{ + int ret; + compat_ulong_t tls; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &tls, 0, -1); + if (ret) + return ret; + + target->thread.tp_value = tls; + return ret; +} + static const struct user_regset aarch32_regsets[] = { [REGSET_COMPAT_GPR] = { .core_note_type = NT_PRSTATUS, @@ -850,6 +874,64 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; +static const struct user_regset aarch32_ptrace_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = COMPAT_ELF_NGREG, + .size = sizeof(compat_elf_greg_t), + .align = sizeof(compat_elf_greg_t), + .get = compat_gpr_get, + .set = compat_gpr_set + }, + [REGSET_FPR] = { + .core_note_type = NT_ARM_VFP, + .n = VFP_STATE_SIZE / sizeof(compat_ulong_t), + .size = sizeof(compat_ulong_t), + .align = sizeof(compat_ulong_t), + .get = compat_vfp_get, + .set = compat_vfp_set + }, + [REGSET_TLS] = { + .core_note_type = NT_ARM_TLS, + .n = 1, + .size = sizeof(compat_ulong_t), + .align = sizeof(compat_ulong_t), + .get = compat_tls_get, + .set = compat_tls_set, + }, +#ifdef CONFIG_HAVE_HW_BREAKPOINT + [REGSET_HW_BREAK] = { + .core_note_type = NT_ARM_HW_BREAK, + .n = sizeof(struct user_hwdebug_state) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = hw_break_get, + .set = hw_break_set, + }, + [REGSET_HW_WATCH] = { + .core_note_type = NT_ARM_HW_WATCH, + .n = sizeof(struct user_hwdebug_state) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = hw_break_get, + .set = hw_break_set, + }, +#endif + [REGSET_SYSTEM_CALL] = { + .core_note_type = NT_ARM_SYSTEM_CALL, + .n = 1, + .size = sizeof(int), + .align = sizeof(int), + .get = system_call_get, + .set = system_call_set, + }, +}; + +static const struct user_regset_view user_aarch32_ptrace_view = { + .name = "aarch32", .e_machine = EM_ARM, + .regsets = aarch32_ptrace_regsets, .n = ARRAY_SIZE(aarch32_ptrace_regsets) +}; + static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -1109,8 +1191,16 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, const struct user_regset_view *task_user_regset_view(struct task_struct *task) { #ifdef CONFIG_COMPAT - if (is_compat_thread(task_thread_info(task))) + /* + * Core dumping of 32-bit tasks or compat ptrace requests must use the + * user_aarch32_view compatible with arm32. Native ptrace requests on + * 32-bit children use an extended user_aarch32_ptrace_view to allow + * access to the TLS register. + */ + if (is_compat_task()) return &user_aarch32_view; + else if (is_compat_thread(task_thread_info(task))) + return &user_aarch32_ptrace_view; #endif return &user_aarch64_view; } -- cgit v1.2.3 From 23e94994464a7281838785675e09c8ed1055f62f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jun 2015 15:18:38 +0100 Subject: arm64: lib: use pair accessors for copy_*_user routines The AArch64 instruction set contains load/store pair memory accessors, so use these in our copy_*_user routines to transfer 16 bytes per iteration. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/lib/copy_from_user.S | 17 +++++++++++------ arch/arm64/lib/copy_in_user.S | 17 +++++++++++------ arch/arm64/lib/copy_to_user.S | 17 +++++++++++------ 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 5e27add9d362..47c3fa5ae4ae 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -28,14 +28,19 @@ * x0 - bytes not copied */ ENTRY(__copy_from_user) - add x4, x1, x2 // upper user buffer boundary - subs x2, x2, #8 + add x5, x1, x2 // upper user buffer boundary + subs x2, x2, #16 + b.mi 1f +0: +USER(9f, ldp x3, x4, [x1], #16) + subs x2, x2, #16 + stp x3, x4, [x0], #16 + b.pl 0b +1: adds x2, x2, #8 b.mi 2f -1: USER(9f, ldr x3, [x1], #8 ) - subs x2, x2, #8 + sub x2, x2, #8 str x3, [x0], #8 - b.pl 1b 2: adds x2, x2, #4 b.mi 3f USER(9f, ldr w3, [x1], #4 ) @@ -56,7 +61,7 @@ ENDPROC(__copy_from_user) .section .fixup,"ax" .align 2 -9: sub x2, x4, x1 +9: sub x2, x5, x1 mov x3, x2 10: strb wzr, [x0], #1 // zero remaining buffer space subs x3, x3, #1 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 84b6c9bb9b93..436bcc5d77b5 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -30,14 +30,19 @@ * x0 - bytes not copied */ ENTRY(__copy_in_user) - add x4, x0, x2 // upper user buffer boundary - subs x2, x2, #8 + add x5, x0, x2 // upper user buffer boundary + subs x2, x2, #16 + b.mi 1f +0: +USER(9f, ldp x3, x4, [x1], #16) + subs x2, x2, #16 +USER(9f, stp x3, x4, [x0], #16) + b.pl 0b +1: adds x2, x2, #8 b.mi 2f -1: USER(9f, ldr x3, [x1], #8 ) - subs x2, x2, #8 + sub x2, x2, #8 USER(9f, str x3, [x0], #8 ) - b.pl 1b 2: adds x2, x2, #4 b.mi 3f USER(9f, ldr w3, [x1], #4 ) @@ -58,6 +63,6 @@ ENDPROC(__copy_in_user) .section .fixup,"ax" .align 2 -9: sub x0, x4, x0 // bytes not copied +9: sub x0, x5, x0 // bytes not copied ret .previous diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index a0aeeb9b7a28..f5e1f526f408 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -28,14 +28,19 @@ * x0 - bytes not copied */ ENTRY(__copy_to_user) - add x4, x0, x2 // upper user buffer boundary - subs x2, x2, #8 + add x5, x0, x2 // upper user buffer boundary + subs x2, x2, #16 + b.mi 1f +0: + ldp x3, x4, [x1], #16 + subs x2, x2, #16 +USER(9f, stp x3, x4, [x0], #16) + b.pl 0b +1: adds x2, x2, #8 b.mi 2f -1: ldr x3, [x1], #8 - subs x2, x2, #8 + sub x2, x2, #8 USER(9f, str x3, [x0], #8 ) - b.pl 1b 2: adds x2, x2, #4 b.mi 3f ldr w3, [x1], #4 @@ -56,6 +61,6 @@ ENDPROC(__copy_to_user) .section .fixup,"ax" .align 2 -9: sub x0, x4, x0 // bytes not copied +9: sub x0, x5, x0 // bytes not copied ret .previous -- cgit v1.2.3 From 52da443ec4d0a807b720527eb474f9c2878cd671 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 6 Jul 2015 12:23:54 +0100 Subject: arm64: perf: factor out callchain code We currently bundle the callchain handling code with the PMU code, despite the fact the two are distinct, and the former can be useful even in the absence of the latter. Follow the example of arch/arm and factor the callchain handling into its own file dependent on CONFIG_PERF_EVENTS rather than CONFIG_HW_PERF_EVENTS. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/perf_event.h | 2 +- arch/arm64/kernel/Makefile | 2 +- arch/arm64/kernel/perf_callchain.c | 196 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/perf_event.c | 178 -------------------------------- 4 files changed, 198 insertions(+), 180 deletions(-) create mode 100644 arch/arm64/kernel/perf_callchain.c diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 6471773db6fd..7bd3cdb533ea 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -17,7 +17,7 @@ #ifndef __ASM_PERF_EVENT_H #define __ASM_PERF_EVENT_H -#ifdef CONFIG_HW_PERF_EVENTS +#ifdef CONFIG_PERF_EVENTS struct pt_regs; extern unsigned long perf_instruction_pointer(struct pt_regs *regs); extern unsigned long perf_misc_flags(struct pt_regs *regs); diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 426d0763c81b..e89063eff14f 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -25,7 +25,7 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o -arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o diff --git a/arch/arm64/kernel/perf_callchain.c b/arch/arm64/kernel/perf_callchain.c new file mode 100644 index 000000000000..3aa74830cc69 --- /dev/null +++ b/arch/arm64/kernel/perf_callchain.c @@ -0,0 +1,196 @@ +/* + * arm64 callchain support + * + * Copyright (C) 2015 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include + +#include + +struct frame_tail { + struct frame_tail __user *fp; + unsigned long lr; +} __attribute__((packed)); + +/* + * Get the return address for a single stackframe and return a pointer to the + * next frame tail. + */ +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail >= buftail.fp) + return NULL; + + return buftail.fp; +} + +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + +void perf_callchain_user(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->pc); + + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; + + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) + tail = compat_user_backtrace(tail, entry); +#endif + } +} + +/* + * Gets called by walk_stackframe() for every stackframe. This will be called + * whist unwinding the stackframe and is like a subroutine return so we use + * the PC. + */ +static int callchain_trace(struct stackframe *frame, void *data) +{ + struct perf_callchain_entry *entry = data; + perf_callchain_store(entry, frame->pc); + return 0; +} + +void perf_callchain_kernel(struct perf_callchain_entry *entry, + struct pt_regs *regs) +{ + struct stackframe frame; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + frame.fp = regs->regs[29]; + frame.sp = regs->sp; + frame.pc = regs->pc; + + walk_stackframe(&frame, callchain_trace, entry); +} + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 563f8ce8df68..182140561416 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -36,7 +36,6 @@ #include #include #include -#include /* * ARMv8 supports a maximum of 32 events. @@ -1413,180 +1412,3 @@ static int __init init_hw_perf_events(void) } early_initcall(init_hw_perf_events); -/* - * Callchain handling code. - */ -struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; -} __attribute__((packed)); - -/* - * Get the return address for a single stackframe and return a pointer to the - * next frame tail. - */ -static struct frame_tail __user * -user_backtrace(struct frame_tail __user *tail, - struct perf_callchain_entry *entry) -{ - struct frame_tail buftail; - unsigned long err; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - perf_callchain_store(entry, buftail.lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail >= buftail.fp) - return NULL; - - return buftail.fp; -} - -#ifdef CONFIG_COMPAT -/* - * The registers we're interested in are at the end of the variable - * length saved register structure. The fp points at the end of this - * structure so the address of this struct is: - * (struct compat_frame_tail *)(xxx->fp)-1 - * - * This code has been adapted from the ARM OProfile support. - */ -struct compat_frame_tail { - compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ - u32 sp; - u32 lr; -} __attribute__((packed)); - -static struct compat_frame_tail __user * -compat_user_backtrace(struct compat_frame_tail __user *tail, - struct perf_callchain_entry *entry) -{ - struct compat_frame_tail buftail; - unsigned long err; - - /* Also check accessibility of one struct frame_tail beyond */ - if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) - return NULL; - - pagefault_disable(); - err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); - pagefault_enable(); - - if (err) - return NULL; - - perf_callchain_store(entry, buftail.lr); - - /* - * Frame pointers should strictly progress back up the stack - * (towards higher addresses). - */ - if (tail + 1 >= (struct compat_frame_tail __user *) - compat_ptr(buftail.fp)) - return NULL; - - return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; -} -#endif /* CONFIG_COMPAT */ - -void perf_callchain_user(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - - perf_callchain_store(entry, regs->pc); - - if (!compat_user_mode(regs)) { - /* AARCH64 mode */ - struct frame_tail __user *tail; - - tail = (struct frame_tail __user *)regs->regs[29]; - - while (entry->nr < PERF_MAX_STACK_DEPTH && - tail && !((unsigned long)tail & 0xf)) - tail = user_backtrace(tail, entry); - } else { -#ifdef CONFIG_COMPAT - /* AARCH32 compat mode */ - struct compat_frame_tail __user *tail; - - tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; - - while ((entry->nr < PERF_MAX_STACK_DEPTH) && - tail && !((unsigned long)tail & 0x3)) - tail = compat_user_backtrace(tail, entry); -#endif - } -} - -/* - * Gets called by walk_stackframe() for every stackframe. This will be called - * whist unwinding the stackframe and is like a subroutine return so we use - * the PC. - */ -static int callchain_trace(struct stackframe *frame, void *data) -{ - struct perf_callchain_entry *entry = data; - perf_callchain_store(entry, frame->pc); - return 0; -} - -void perf_callchain_kernel(struct perf_callchain_entry *entry, - struct pt_regs *regs) -{ - struct stackframe frame; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - /* We don't support guest os callchain now */ - return; - } - - frame.fp = regs->regs[29]; - frame.sp = regs->sp; - frame.pc = regs->pc; - - walk_stackframe(&frame, callchain_trace, entry); -} - -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) - return perf_guest_cbs->get_guest_ip(); - - return instruction_pointer(regs); -} - -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - int misc = 0; - - if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { - if (perf_guest_cbs->is_user_mode()) - misc |= PERF_RECORD_MISC_GUEST_USER; - else - misc |= PERF_RECORD_MISC_GUEST_KERNEL; - } else { - if (user_mode(regs)) - misc |= PERF_RECORD_MISC_USER; - else - misc |= PERF_RECORD_MISC_KERNEL; - } - - return misc; -} -- cgit v1.2.3 From 4b3dc9679cf779339d9049800803dfc3c83433d1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2015 18:28:44 +0100 Subject: arm64: force CONFIG_SMP=y and remove redundant #ifdefs Nobody seems to be producing !SMP systems anymore, so this is just becoming a source of kernel bugs, particularly if people want to use coherent DMA with non-shared pages. This patch forces CONFIG_SMP=y for arm64, removing a modest amount of code in the process. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 26 ++++---------------------- arch/arm64/include/asm/assembler.h | 2 -- arch/arm64/include/asm/barrier.h | 24 ------------------------ arch/arm64/include/asm/hardirq.h | 4 ---- arch/arm64/include/asm/irq_work.h | 11 ----------- arch/arm64/include/asm/percpu.h | 8 -------- arch/arm64/include/asm/pgtable.h | 5 ----- arch/arm64/include/asm/ptrace.h | 4 ---- arch/arm64/include/asm/smp.h | 4 ---- arch/arm64/include/asm/topology.h | 9 --------- arch/arm64/kernel/Makefile | 7 ++++--- arch/arm64/kernel/cpu_ops.c | 2 -- arch/arm64/kernel/head.S | 7 ------- arch/arm64/kernel/irq.c | 2 -- arch/arm64/kernel/psci.c | 5 ----- arch/arm64/kernel/setup.c | 6 ------ arch/arm64/kernel/sleep.S | 5 ----- arch/arm64/kernel/time.c | 2 -- arch/arm64/kernel/traps.c | 4 ---- arch/arm64/mm/context.c | 16 ---------------- arch/arm64/mm/flush.c | 4 ---- arch/arm64/mm/proc.S | 4 ---- 22 files changed, 8 insertions(+), 153 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 40f717f8820a..de8dee60fd82 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -28,7 +28,7 @@ config ARM64 select EDAC_SUPPORT select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CLOCKEVENTS_BROADCAST select GENERIC_CPU_AUTOPROBE select GENERIC_EARLY_IOREMAP select GENERIC_IRQ_PROBE @@ -137,6 +137,9 @@ config NEED_DMA_MAP_STATE config NEED_SG_DMA_LENGTH def_bool y +config SMP + def_bool y + config SWIOTLB def_bool y @@ -491,22 +494,8 @@ config CPU_BIG_ENDIAN help Say Y if you plan on running a kernel in big-endian mode. -config SMP - bool "Symmetric Multi-Processing" - help - This enables support for systems with more than one CPU. If - you say N here, the kernel will run on single and - multiprocessor machines, but will use only one CPU of a - multiprocessor machine. If you say Y here, the kernel will run - on many, but not all, single processor machines. On a single - processor machine, the kernel will run faster if you say N - here. - - If you don't know what to do here, say N. - config SCHED_MC bool "Multi-core scheduler support" - depends on SMP help Multi-core scheduler support improves the CPU scheduler's decision making when dealing with multi-core CPU chips at a cost of slightly @@ -514,7 +503,6 @@ config SCHED_MC config SCHED_SMT bool "SMT scheduler support" - depends on SMP help Improves the CPU scheduler's decision making when dealing with MultiThreading at a cost of slightly increased overhead in some @@ -523,23 +511,17 @@ config SCHED_SMT config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 - depends on SMP # These have to remain sorted largest to smallest default "64" config HOTPLUG_CPU bool "Support for hot-pluggable CPUs" - depends on SMP help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. source kernel/Kconfig.preempt -config UP_LATE_INIT - def_bool y - depends on !SMP - config HZ int default 100 diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index e10516bbe833..b51f2cc22ca9 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -91,9 +91,7 @@ * SMP data memory barrier */ .macro smp_dmb, opt -#ifdef CONFIG_SMP dmb \opt -#endif .endm #define USER(l, x...) \ diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 0fa47c4275cb..624f9679f4b0 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -35,28 +35,6 @@ #define dma_rmb() dmb(oshld) #define dma_wmb() dmb(oshst) -#ifndef CONFIG_SMP -#define smp_mb() barrier() -#define smp_rmb() barrier() -#define smp_wmb() barrier() - -#define smp_store_release(p, v) \ -do { \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ACCESS_ONCE(*p) = (v); \ -} while (0) - -#define smp_load_acquire(p) \ -({ \ - typeof(*p) ___p1 = ACCESS_ONCE(*p); \ - compiletime_assert_atomic_type(*p); \ - barrier(); \ - ___p1; \ -}) - -#else - #define smp_mb() dmb(ish) #define smp_rmb() dmb(ishld) #define smp_wmb() dmb(ishst) @@ -109,8 +87,6 @@ do { \ ___p1; \ }) -#endif - #define read_barrier_depends() do { } while(0) #define smp_read_barrier_depends() do { } while(0) diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 6aae421f4d73..2bb7009bdac7 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -24,9 +24,7 @@ typedef struct { unsigned int __softirq_pending; -#ifdef CONFIG_SMP unsigned int ipi_irqs[NR_IPI]; -#endif } ____cacheline_aligned irq_cpustat_t; #include /* Standard mappings for irq_cpustat_t above */ @@ -34,10 +32,8 @@ typedef struct { #define __inc_irq_stat(cpu, member) __IRQ_STAT(cpu, member)++ #define __get_irq_stat(cpu, member) __IRQ_STAT(cpu, member) -#ifdef CONFIG_SMP u64 smp_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu smp_irq_stat_cpu -#endif #define __ARCH_IRQ_EXIT_IRQS_DISABLED 1 diff --git a/arch/arm64/include/asm/irq_work.h b/arch/arm64/include/asm/irq_work.h index b4f6b19a8a68..8e24ef3f7c82 100644 --- a/arch/arm64/include/asm/irq_work.h +++ b/arch/arm64/include/asm/irq_work.h @@ -1,8 +1,6 @@ #ifndef __ASM_IRQ_WORK_H #define __ASM_IRQ_WORK_H -#ifdef CONFIG_SMP - #include static inline bool arch_irq_work_has_interrupt(void) @@ -10,13 +8,4 @@ static inline bool arch_irq_work_has_interrupt(void) return !!__smp_cross_call; } -#else - -static inline bool arch_irq_work_has_interrupt(void) -{ - return false; -} - -#endif - #endif /* __ASM_IRQ_WORK_H */ diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 4fde8c1df97f..0a456bef8c79 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -16,8 +16,6 @@ #ifndef __ASM_PERCPU_H #define __ASM_PERCPU_H -#ifdef CONFIG_SMP - static inline void set_my_cpu_offset(unsigned long off) { asm volatile("msr tpidr_el1, %0" :: "r" (off) : "memory"); @@ -38,12 +36,6 @@ static inline unsigned long __my_cpu_offset(void) } #define __my_cpu_offset __my_cpu_offset() -#else /* !CONFIG_SMP */ - -#define set_my_cpu_offset(x) do { } while (0) - -#endif /* CONFIG_SMP */ - #define PERCPU_OP(op, asm_op) \ static inline unsigned long __percpu_##op(void *ptr, \ unsigned long val, int size) \ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 8212e6aa0fb1..d001846c13ac 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -61,13 +61,8 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); -#ifdef CONFIG_SMP #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) -#else -#define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF) -#define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF) -#endif #define PROT_DEVICE_nGnRE (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_DEVICE_nGnRE)) #define PROT_NORMAL_NC (PROT_DEFAULT | PTE_PXN | PTE_UXN | PTE_ATTRINDX(MT_NORMAL_NC)) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index d6dd9fdbc3be..536274ed292e 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -183,11 +183,7 @@ static inline int valid_user_regs(struct user_pt_regs *regs) #define instruction_pointer(regs) ((unsigned long)(regs)->pc) -#ifdef CONFIG_SMP extern unsigned long profile_pc(struct pt_regs *regs); -#else -#define profile_pc(regs) instruction_pointer(regs) -#endif #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index db02be81b90a..d9c3d6a6100a 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -20,10 +20,6 @@ #include #include -#ifndef CONFIG_SMP -# error " included in non-SMP build" -#endif - #define raw_smp_processor_id() (current_thread_info()->cpu) struct seq_file; diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 225ec3524fbf..a3e9d6fdbf21 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -1,8 +1,6 @@ #ifndef __ASM_TOPOLOGY_H #define __ASM_TOPOLOGY_H -#ifdef CONFIG_SMP - #include struct cpu_topology { @@ -24,13 +22,6 @@ void init_cpu_topology(void); void store_cpu_topology(unsigned int cpuid); const struct cpumask *cpu_coregroup_mask(int cpu); -#else - -static inline void init_cpu_topology(void) { } -static inline void store_cpu_topology(unsigned int cpuid) { } - -#endif - #include #endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index e89063eff14f..f126cfe99003 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -17,7 +17,8 @@ arm64-obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ hyp-stub.o psci.o psci-call.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ - cpufeature.o alternative.o cacheinfo.o + cpufeature.o alternative.o cacheinfo.o \ + smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o entry32.o \ @@ -25,8 +26,8 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o -arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o -arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o +arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_callchain.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index 5ea337dd2f15..b6bd7d447768 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -30,9 +30,7 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS]; static const struct cpu_operations *supported_cpu_ops[] __initconst = { -#ifdef CONFIG_SMP &smp_spin_table_ops, -#endif &cpu_psci_ops, NULL, }; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c0ff3ce4299e..3a0654173997 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -62,13 +62,8 @@ /* * Initial memory map attributes. */ -#ifndef CONFIG_SMP -#define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF -#define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF -#else #define PTE_FLAGS PTE_TYPE_PAGE | PTE_AF | PTE_SHARED #define PMD_FLAGS PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S -#endif #ifdef CONFIG_ARM64_64K_PAGES #define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS @@ -574,7 +569,6 @@ ENTRY(__boot_cpu_mode) .long BOOT_CPU_MODE_EL1 .popsection -#ifdef CONFIG_SMP /* * This provides a "holding pen" for platforms to hold all secondary * cores are held until we're ready for them to initialise. @@ -622,7 +616,6 @@ ENTRY(__secondary_switched) mov x29, #0 b secondary_start_kernel ENDPROC(__secondary_switched) -#endif /* CONFIG_SMP */ /* * Enable the MMU. diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 463fa2e7e34c..11dc3fd47853 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -33,9 +33,7 @@ unsigned long irq_err_count; int arch_show_interrupts(struct seq_file *p, int prec) { -#ifdef CONFIG_SMP show_ipi_list(p, prec); -#endif seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); return 0; } diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 869f202748e8..ec30152090ae 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -462,8 +462,6 @@ int __init psci_acpi_init(void) } #endif -#ifdef CONFIG_SMP - static int __init cpu_psci_cpu_init(unsigned int cpu) { return 0; @@ -550,7 +548,6 @@ static int cpu_psci_cpu_kill(unsigned int cpu) return -ETIMEDOUT; } #endif -#endif static int psci_suspend_finisher(unsigned long index) { @@ -585,7 +582,6 @@ const struct cpu_operations cpu_psci_ops = { .cpu_init_idle = cpu_psci_cpu_init_idle, .cpu_suspend = cpu_psci_cpu_suspend, #endif -#ifdef CONFIG_SMP .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, @@ -594,6 +590,5 @@ const struct cpu_operations cpu_psci_ops = { .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif -#endif }; diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f3067d4d4e35..cf609cf3fcb5 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -131,7 +131,6 @@ bool arch_match_cpu_phys_id(int cpu, u64 phys_id) } struct mpidr_hash mpidr_hash; -#ifdef CONFIG_SMP /** * smp_build_mpidr_hash - Pre-compute shifts required at each affinity * level in order to build a linear index from an @@ -197,7 +196,6 @@ static void __init smp_build_mpidr_hash(void) pr_warn("Large number of MPIDR hash buckets detected\n"); __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } -#endif static void __init hyp_mode_check(void) { @@ -405,10 +403,8 @@ void __init setup_arch(char **cmdline_p) xen_early_init(); cpu_read_bootcpu_ops(); -#ifdef CONFIG_SMP smp_init_cpus(); smp_build_mpidr_hash(); -#endif #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) @@ -508,9 +504,7 @@ static int c_show(struct seq_file *m, void *v) * online processors, looking for lines beginning with * "processor". Give glibc what it expects. */ -#ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); -#endif /* * Dump out the common processor features in a single line. diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 803cfea41962..5686a3ae3940 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -82,7 +82,6 @@ ENTRY(__cpu_suspend_enter) str x2, [x0, #CPU_CTX_SP] ldr x1, =sleep_save_sp ldr x1, [x1, #SLEEP_SAVE_SP_VIRT] -#ifdef CONFIG_SMP mrs x7, mpidr_el1 ldr x9, =mpidr_hash ldr x10, [x9, #MPIDR_HASH_MASK] @@ -94,7 +93,6 @@ ENTRY(__cpu_suspend_enter) ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 add x1, x1, x8, lsl #3 -#endif bl __cpu_suspend_save /* * Grab suspend finisher in x20 and its argument in x19 @@ -151,7 +149,6 @@ ENDPROC(cpu_resume_after_mmu) ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly -#ifdef CONFIG_SMP mrs x1, mpidr_el1 adrp x8, mpidr_hash add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address @@ -161,9 +158,7 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ -#else mov x7, xzr -#endif ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr x0, [x0, x7, lsl #3] /* load sp from context */ diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 42f9195cf2f8..149151fb42bb 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -42,7 +42,6 @@ #include #include -#ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) { struct stackframe frame; @@ -62,7 +61,6 @@ unsigned long profile_pc(struct pt_regs *regs) return frame.pc; } EXPORT_SYMBOL(profile_pc); -#endif void __init time_init(void) { diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 4db6a2574fec..1ea920cbd66d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -189,11 +189,7 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #else #define S_PREEMPT "" #endif -#ifdef CONFIG_SMP #define S_SMP " SMP" -#else -#define S_SMP "" -#endif static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 76c1e6cd36fc..d70ff14dbdbd 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -53,8 +53,6 @@ static void flush_context(void) __flush_icache_all(); } -#ifdef CONFIG_SMP - static void set_mm_context(struct mm_struct *mm, unsigned int asid) { unsigned long flags; @@ -110,23 +108,12 @@ static void reset_context(void *info) cpu_switch_mm(mm->pgd, mm); } -#else - -static inline void set_mm_context(struct mm_struct *mm, unsigned int asid) -{ - mm->context.id = asid; - cpumask_copy(mm_cpumask(mm), cpumask_of(smp_processor_id())); -} - -#endif - void __new_context(struct mm_struct *mm) { unsigned int asid; unsigned int bits = asid_bits(); raw_spin_lock(&cpu_asid_lock); -#ifdef CONFIG_SMP /* * Check the ASID again, in case the change was broadcast from another * CPU before we acquired the lock. @@ -136,7 +123,6 @@ void __new_context(struct mm_struct *mm) raw_spin_unlock(&cpu_asid_lock); return; } -#endif /* * At this point, it is guaranteed that the current mm (with an old * ASID) isn't active on any other CPU since the ASIDs are changed @@ -155,10 +141,8 @@ void __new_context(struct mm_struct *mm) cpu_last_asid = ASID_FIRST_VERSION; asid = cpu_last_asid + smp_processor_id(); flush_context(); -#ifdef CONFIG_SMP smp_wmb(); smp_call_function(reset_context, NULL, 1); -#endif cpu_last_asid += NR_CPUS - 1; } diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 4dfa3975ce5b..c26b804015e8 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -60,14 +60,10 @@ void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long uaddr, void *dst, const void *src, unsigned long len) { -#ifdef CONFIG_SMP preempt_disable(); -#endif memcpy(dst, src, len); flush_ptrace_access(vma, page, uaddr, dst, len); -#ifdef CONFIG_SMP preempt_enable(); -#endif } void __sync_icache_dcache(pte_t pte, unsigned long addr) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index a8be513dff6f..34da270f9e34 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -34,11 +34,7 @@ #define TCR_TG_FLAGS TCR_TG0_4K | TCR_TG1_4K #endif -#ifdef CONFIG_SMP #define TCR_SMP_FLAGS TCR_SHARED -#else -#define TCR_SMP_FLAGS 0 -#endif /* PTWs cacheable, inner/outer WBWA */ #define TCR_CACHE_FLAGS TCR_IRGN_WBWA | TCR_ORGN_WBWA -- cgit v1.2.3 From 1d1ddf67dc3bfd80f60b216fa1fedfb242bee299 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 17 Jul 2015 16:58:21 +0100 Subject: arm64: dma-mapping: implement dma_get_sgtable() The default dma_common_get_sgtable() implementation relies on the CPU address of the buffer being a regular lowmem address. This is not always the case on arm64, since allocations from the various DMA pools may have remapped vmalloc addresses, rendering the use of virt_to_page() invalid. Fix this by providing our own implementation based on the fact that we can safely derive a physical address from the DMA address in both cases. CC: Jon Medhurst Signed-off-by: Robin Murphy [will: made static] Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 63b2a117a03c..e5d74cdfdb71 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -331,10 +331,24 @@ static int __swiotlb_mmap(struct device *dev, return ret; } +static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t handle, size_t size, + struct dma_attrs *attrs) +{ + int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + + if (!ret) + sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)), + PAGE_ALIGN(size), 0); + + return ret; +} + static struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, .mmap = __swiotlb_mmap, + .get_sgtable = __swiotlb_get_sgtable, .map_page = __swiotlb_map_page, .unmap_page = __swiotlb_unmap_page, .map_sg = __swiotlb_map_sg_attrs, -- cgit v1.2.3 From 0a570e7adeeae28892e60bc919c7dcf011815134 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 21 Jul 2015 15:43:58 +0100 Subject: arm64: hugetlb: remove paragraph about writing to FSF Remove paragraph about writing to the Free Software Foundation's mailing address from GPL notice. Signed-off-by: Jisheng Zhang Signed-off-by: Will Deacon --- arch/arm64/include/asm/hugetlb.h | 4 ---- arch/arm64/mm/hugetlbpage.c | 4 ---- 2 files changed, 8 deletions(-) diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h index 2fd9b14ca295..bb4052e85dba 100644 --- a/arch/arm64/include/asm/hugetlb.h +++ b/arch/arm64/include/asm/hugetlb.h @@ -13,10 +13,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifndef __ASM_HUGETLB_H diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 831ec534d449..383b03ff38f8 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -13,10 +13,6 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include -- cgit v1.2.3 From ae2fb7ece973f0b285f41985f5de85c57df9bf5a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 21 Jul 2015 11:36:39 +0100 Subject: arm64: perf: condense event number maps Most of the cache events an architecture might support do not map well to those provided by the ARM architecture, and as such most entries in the event number maps are *_UNSUPPORTED. Unfortuantely as 0 is a valid physical event identifier, the *_UNSUPPORTED macros expand to a non-zero value and thus each unsupported event must be explicitly initialised as such. This leads to large diffs when adding support for a new CPU, and makes it difficult to spot the important information. This patch follows arch/arm/ in making use of PERF_*_ALL_UNSUPPORTED macros to initialise all entries to *_UNSUPPORTED before overriding this for the specific events we actually support, resulting in a significant source code reduction. Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 124 ++++++++--------------------------------- 1 file changed, 22 insertions(+), 102 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 182140561416..f9a74d4fff3b 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -77,6 +77,16 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF +#define PERF_MAP_ALL_UNSUPPORTED \ + [0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED + +#define PERF_CACHE_MAP_ALL_UNSUPPORTED \ +[0 ... C(MAX) - 1] = { \ + [0 ... C(OP_MAX) - 1] = { \ + [0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \ + }, \ +} + static int armpmu_map_cache_event(const unsigned (*cache_map) [PERF_COUNT_HW_CACHE_MAX] @@ -700,118 +710,28 @@ enum armv8_pmuv3_perf_types { /* PMUv3 HW events mapping. */ static const unsigned armv8_pmuv3_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, [PERF_COUNT_HW_CPU_CYCLES] = ARMV8_PMUV3_PERFCTR_CLOCK_CYCLES, [PERF_COUNT_HW_INSTRUCTIONS] = ARMV8_PMUV3_PERFCTR_INSTR_EXECUTED, [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, [PERF_COUNT_HW_CACHE_MISSES] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, [PERF_COUNT_HW_BRANCH_MISSES] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, }; static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, - [C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(NODE)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, + PERF_CACHE_MAP_ALL_UNSUPPORTED, + + [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_ACCESS, + [C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_L1_DCACHE_REFILL, + + [C(BPU)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_READ)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_PRED, + [C(BPU)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV8_PMUV3_PERFCTR_PC_BRANCH_MIS_PRED, }; /* -- cgit v1.2.3 From 79b0e09a3c9bd74ee54582efdb351179d7c00351 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Jul 2015 13:23:26 +0100 Subject: arm64: kernel: Add cpuid_feature_extract_field() for 4bit sign extension Based on arch/arm/include/asm/cputype.h, this function does the shifting and sign extension necessary when accessing cpu feature fields. Signed-off-by: James Morse Suggested-by: Russell King Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index c1044218a63a..9fafa7537997 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -70,6 +70,13 @@ static inline void cpus_set_cap(unsigned int num) __set_bit(num, cpu_hwcaps); } +static inline int __attribute_const__ cpuid_feature_extract_field(u64 features, + int field) +{ + return (s64)(features << (64 - 4 - field)) >> (64 - 4); +} + + void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, const char *info); void check_local_cpu_errata(void); -- cgit v1.2.3 From 63e40815f02584ba8174e0f6af40924b2b335cae Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Wed, 22 Jul 2015 12:21:01 +0100 Subject: arm64: alternative: Provide if/else/endif assembler macros The existing alternative_insn macro has some limitations that make it hard to work with. In particular the fact it takes instructions from it own macro arguments means it doesn't play very nicely with C pre-processor macros because the macro arguments look like a string to the C pre-processor. Workarounds are (probably) possible but things start to look ugly. Introduce an alternative set of macros that allows instructions to be presented to the assembler as normal and switch everything over to the new macros. Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 41 ++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index c385a0c4057f..e86681ad0931 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -77,6 +77,47 @@ void free_alternatives_memory(void); .org . - (662b-661b) + (664b-663b) .endm +/* + * Begin an alternative code sequence. + * + * The code that follows this macro will be assembled and linked as + * normal. There are no restrictions on this code. + */ +.macro alternative_if_not cap + .pushsection .altinstructions, "a" + altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f + .popsection +661: +.endm + +/* + * Provide the alternative code sequence. + * + * The code that follows this macro is assembled into a special + * section to be used for dynamic patching. Code that follows this + * macro must: + * + * 1. Be exactly the same length (in bytes) as the default code + * sequence. + * + * 2. Not contain a branch target that is used outside of the + * alternative sequence it is defined in (branches into an + * alternative sequence are not fixed up). + */ +.macro alternative_else +662: .pushsection .altinstr_replacement, "ax" +663: +.endm + +/* + * Complete an alternative code sequence. + */ +.macro alternative_endif +664: .popsection + .org . - (664b-663b) + (662b-661b) + .org . - (662b-661b) + (664b-663b) +.endm + #endif /* __ASSEMBLY__ */ #endif /* __ASM_ALTERNATIVE_H */ -- cgit v1.2.3 From 271d35eb77d0f53177b44968417b630d1fee8b99 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Wed, 22 Jul 2015 12:21:02 +0100 Subject: arm64: mm: Adopt new alternative assembler macros Convert the dynamic patching for ARM64_WORKAROUND_CLEAN_CACHE over to the newly added alternative assembler macros. Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/mm/cache.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index bdeb5d38c2dd..eb48d5df4a0f 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -143,7 +143,12 @@ __dma_clean_range: dcache_line_size x2, x3 sub x3, x2, #1 bic x0, x0, x3 -1: alternative_insn "dc cvac, x0", "dc civac, x0", ARM64_WORKAROUND_CLEAN_CACHE +1: +alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE + dc cvac, x0 +alternative_else + dc civac, x0 +alternative_endif add x0, x0, x2 cmp x0, x1 b.lo 1b -- cgit v1.2.3 From e28cabf12304717b1054d0a02f0850f91e8a2074 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Wed, 22 Jul 2015 12:21:03 +0100 Subject: arm64: kernel: Adopt new alternative assembler macros Convert the dynamic patching for ARM64_WORKAROUND_845719 over to the newly added alternative assembler macros. Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e16351819fed..d8a523600a4c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -122,26 +122,23 @@ ct_user_enter ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 - #ifdef CONFIG_ARM64_ERRATUM_845719 - -#undef SEQUENCE_ORG -#undef SEQUENCE_ALT - +alternative_if_not ARM64_WORKAROUND_845719 + nop + nop #ifdef CONFIG_PID_IN_CONTEXTIDR - -#define SEQUENCE_ORG "nop ; nop ; nop" -#define SEQUENCE_ALT "tbz x22, #4, 1f ; mrs x29, contextidr_el1; msr contextidr_el1, x29; 1:" - + nop +#endif +alternative_else + tbz x22, #4, 1f +#ifdef CONFIG_PID_IN_CONTEXTIDR + mrs x29, contextidr_el1 + msr contextidr_el1, x29 #else - -#define SEQUENCE_ORG "nop ; nop" -#define SEQUENCE_ALT "tbz x22, #4, 1f ; msr contextidr_el1, xzr; 1:" - + msr contextidr_el1, xzr #endif - - alternative_insn SEQUENCE_ORG, SEQUENCE_ALT, ARM64_WORKAROUND_845719 - +1: +alternative_endif #endif .endif msr elr_el1, x21 // set up the return data -- cgit v1.2.3 From fc032421ef4a825660fcc4d11672bc2dea202893 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Wed, 22 Jul 2015 12:21:04 +0100 Subject: arm64: kvm: Adopt new alternative assembler macros Convert the dynamic patching for ARM64_HAS_SYSREG_GIC_CPUIF over to the newly added alternative assembler macros. Acked-by: Marc Zyngier Signed-off-by: Daniel Thompson Signed-off-by: Will Deacon --- arch/arm64/kvm/hyp.S | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 17a8fb14f428..10915aaf0b01 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -810,7 +810,11 @@ * Call into the vgic backend for state saving */ .macro save_vgic_state - alternative_insn "bl __save_vgic_v2_state", "bl __save_vgic_v3_state", ARM64_HAS_SYSREG_GIC_CPUIF +alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF + bl __save_vgic_v2_state +alternative_else + bl __save_vgic_v3_state +alternative_endif mrs x24, hcr_el2 mov x25, #HCR_INT_OVERRIDE neg x25, x25 @@ -827,7 +831,11 @@ orr x24, x24, #HCR_INT_OVERRIDE orr x24, x24, x25 msr hcr_el2, x24 - alternative_insn "bl __restore_vgic_v2_state", "bl __restore_vgic_v3_state", ARM64_HAS_SYSREG_GIC_CPUIF +alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF + bl __restore_vgic_v2_state +alternative_else + bl __restore_vgic_v3_state +alternative_endif .endm .macro save_timer_state -- cgit v1.2.3 From 870828e57b141eff76a5325f20e4691dd2a599b1 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Jul 2015 13:23:27 +0100 Subject: arm64: kernel: Move config_sctlr_el1 Later patches need config_sctlr_el1 to set/clear bits in the sctlr_el1 register. This patch moves this function into header a file. Acked-by: Catalin Marinas Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/cputype.h | 3 --- arch/arm64/include/asm/sysreg.h | 12 ++++++++++++ arch/arm64/kernel/armv8_deprecated.c | 11 +---------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index a84ec605bed8..ee6403df9fe4 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -81,9 +81,6 @@ #define ID_AA64MMFR0_BIGEND(mmfr0) \ (((mmfr0) & ID_AA64MMFR0_BIGEND_MASK) >> ID_AA64MMFR0_BIGEND_SHIFT) -#define SCTLR_EL1_CP15BEN (0x1 << 5) -#define SCTLR_EL1_SED (0x1 << 8) - #ifndef __ASSEMBLY__ /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5c89df0acbcb..56391fbae1e1 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,9 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#define SCTLR_EL1_CP15BEN (0x1 << 5) +#define SCTLR_EL1_SED (0x1 << 8) + #define sys_reg(op0, op1, crn, crm, op2) \ ((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) @@ -55,6 +58,15 @@ asm( " .endm\n" ); +static inline void config_sctlr_el1(u32 clear, u32 set) +{ + u32 val; + + asm volatile("mrs %0, sctlr_el1" : "=r" (val)); + val &= ~clear; + val |= set; + asm volatile("msr sctlr_el1, %0" : : "r" (val)); +} #endif #endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 7922c2e710ca..78d56bff91fd 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include @@ -504,16 +505,6 @@ ret: return 0; } -static inline void config_sctlr_el1(u32 clear, u32 set) -{ - u32 val; - - asm volatile("mrs %0, sctlr_el1" : "=r" (val)); - val &= ~clear; - val |= set; - asm volatile("msr sctlr_el1, %0" : : "r" (val)); -} - static int cp15_barrier_set_hw_mode(bool enable) { if (enable) -- cgit v1.2.3 From 1c0763037f1e1caef739e36e09c6d41ed7b61b2d Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Jul 2015 13:23:28 +0100 Subject: arm64: kernel: Add cpufeature 'enable' callback This patch adds an 'enable()' callback to cpu capability/feature detection, allowing features that require some setup or configuration to get this opportunity once the feature has been detected. Acked-by: Catalin Marinas Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 1 + arch/arm64/kernel/cpufeature.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9fafa7537997..484fa9425314 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -34,6 +34,7 @@ struct arm64_cpu_capabilities { const char *desc; u16 capability; bool (*matches)(const struct arm64_cpu_capabilities *); + void (*enable)(void); union { struct { /* To be used for erratum handling only */ u32 midr_model; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 5ad86ceac010..650ffc28bedc 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -55,6 +55,12 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, pr_info("%s %s\n", info, caps[i].desc); cpus_set_cap(caps[i].capability); } + + /* second pass allows enable() to consider interacting capabilities */ + for (i = 0; caps[i].desc; i++) { + if (cpus_have_cap(caps[i].capability) && caps[i].enable) + caps[i].enable(); + } } void check_local_cpu_features(void) -- cgit v1.2.3 From 18ffa046c509d0cd011eeea2c0418f2d014771fc Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Jul 2015 13:23:29 +0100 Subject: arm64: kernel: Add min_field_value and use '>=' for feature detection When a new cpu feature is available, the cpu feature bits will have some initial value, which is incremented when the feature is updated. This patch changes 'register_value' to be 'min_field_value', and checks the feature bits value (interpreted as a signed int) is greater than this minimum. Acked-by: Catalin Marinas Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 484fa9425314..f595f7ddd43b 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -42,8 +42,8 @@ struct arm64_cpu_capabilities { }; struct { /* Feature register checking */ - u64 register_mask; - u64 register_value; + int field_pos; + int min_field_value; }; }; }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 650ffc28bedc..74fd0f74b065 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -22,13 +22,21 @@ #include #include +static bool +feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) +{ + int val = cpuid_feature_extract_field(reg, entry->field_pos); + + return val >= entry->min_field_value; +} + static bool has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry) { u64 val; val = read_cpuid(id_aa64pfr0_el1); - return (val & entry->register_mask) == entry->register_value; + return feature_matches(val, entry); } static const struct arm64_cpu_capabilities arm64_features[] = { @@ -36,8 +44,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "GIC system register CPU interface", .capability = ARM64_HAS_SYSREG_GIC_CPUIF, .matches = has_id_aa64pfr0_feature, - .register_mask = (0xf << 24), - .register_value = (1 << 24), + .field_pos = 24, + .min_field_value = 1, }, {}, }; -- cgit v1.2.3 From 91a5cefa2f98bdd3404c2fba57048c4fa225cc37 Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 21 Jul 2015 13:23:30 +0100 Subject: arm64: kernel: Add optional CONFIG_ parameter to ALTERNATIVE() Some uses of ALTERNATIVE() may depend on a feature that is disabled at compile time by a Kconfig option. In this case the unused alternative instructions waste space, and if the original instruction is a nop, it wastes time and space. This patch adds an optional 'config' option to ALTERNATIVE() and alternative_insn that allows the compiler to remove both the original and alternative instructions if the config option is not defined. Suggested-by: Catalin Marinas Acked-by: Catalin Marinas Signed-off-by: James Morse Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index e86681ad0931..20367882226c 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -3,6 +3,7 @@ #ifndef __ASSEMBLY__ +#include #include #include #include @@ -40,7 +41,8 @@ void free_alternatives_memory(void); * be fixed in a binutils release posterior to 2.25.51.0.2 (anything * containing commit 4e4d08cf7399b606 or c1baaddf8861). */ -#define ALTERNATIVE(oldinstr, newinstr, feature) \ +#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \ + ".if "__stringify(cfg_enabled)" == 1\n" \ "661:\n\t" \ oldinstr "\n" \ "662:\n" \ @@ -53,7 +55,11 @@ void free_alternatives_memory(void); "664:\n\t" \ ".popsection\n\t" \ ".org . - (664b-663b) + (662b-661b)\n\t" \ - ".org . - (662b-661b) + (664b-663b)\n" + ".org . - (662b-661b) + (664b-663b)\n" \ + ".endif\n" + +#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \ + __ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg)) #else @@ -65,7 +71,8 @@ void free_alternatives_memory(void); .byte \alt_len .endm -.macro alternative_insn insn1 insn2 cap +.macro alternative_insn insn1, insn2, cap, enable = 1 + .if \enable 661: \insn1 662: .pushsection .altinstructions, "a" altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f @@ -75,6 +82,7 @@ void free_alternatives_memory(void); 664: .popsection .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) + .endif .endm /* @@ -118,6 +126,20 @@ void free_alternatives_memory(void); .org . - (662b-661b) + (664b-663b) .endm +#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ + alternative_insn insn1, insn2, cap, IS_ENABLED(cfg) + + #endif /* __ASSEMBLY__ */ +/* + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature)); + * + * Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO)); + * N.B. If CONFIG_FOO is specified, but not selected, the whole block + * will be omitted, including oldinstr. + */ +#define ALTERNATIVE(oldinstr, newinstr, ...) \ + _ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1) + #endif /* __ASM_ALTERNATIVE_H */ -- cgit v1.2.3 From 9ded63aaf83eba76e1a54ac02581c2badc497f1a Mon Sep 17 00:00:00 2001 From: "Suzuki K. Poulose" Date: Wed, 22 Jul 2015 11:38:14 +0100 Subject: arm64: Generalise msr_s/mrs_s operations The system register encoding generated by sys_reg() works only for MRS/MSR(Register) operations, as we hardcode Bit20 to 1 in mrs_s/msr_s mask. This makes it unusable for generating instructions accessing registers with Op0 < 2(e.g, PSTATE.x with Op0=0). As per ARMv8 ARM, (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", C5.2, version:ARM DDI 0487A.f), the instruction encoding reserves bits [20-19] for Op0. This patch generalises the sys_reg, mrs_s and msr_s macros, so that we could use them to access any of the supported system register. Cc: James Morse Cc: Catalin Marinas Signed-off-by: Suzuki K. Poulose Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/sysreg.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 56391fbae1e1..5295bcbcb374 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -23,8 +23,18 @@ #define SCTLR_EL1_CP15BEN (0x1 << 5) #define SCTLR_EL1_SED (0x1 << 8) +/* + * ARMv8 ARM reserves the following encoding for system registers: + * (Ref: ARMv8 ARM, Section: "System instruction class encoding overview", + * C5.2, version:ARM DDI 0487A.f) + * [20-19] : Op0 + * [18-16] : Op1 + * [15-12] : CRn + * [11-8] : CRm + * [7-5] : Op2 + */ #define sys_reg(op0, op1, crn, crm, op2) \ - ((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) #ifdef __ASSEMBLY__ @@ -34,11 +44,11 @@ .equ __reg_num_xzr, 31 .macro mrs_s, rt, sreg - .inst 0xd5300000|(\sreg)|(__reg_num_\rt) + .inst 0xd5200000|(\sreg)|(__reg_num_\rt) .endm .macro msr_s, sreg, rt - .inst 0xd5100000|(\sreg)|(__reg_num_\rt) + .inst 0xd5000000|(\sreg)|(__reg_num_\rt) .endm #else @@ -50,11 +60,11 @@ asm( " .equ __reg_num_xzr, 31\n" "\n" " .macro mrs_s, rt, sreg\n" -" .inst 0xd5300000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5200000|(\\sreg)|(__reg_num_\\rt)\n" " .endm\n" "\n" " .macro msr_s, sreg, rt\n" -" .inst 0xd5100000|(\\sreg)|(__reg_num_\\rt)\n" +" .inst 0xd5000000|(\\sreg)|(__reg_num_\\rt)\n" " .endm\n" ); -- cgit v1.2.3 From 338d4f49d6f7114a017d294ccf7374df4f998edc Mon Sep 17 00:00:00 2001 From: James Morse Date: Wed, 22 Jul 2015 19:05:54 +0100 Subject: arm64: kernel: Add support for Privileged Access Never 'Privileged Access Never' is a new arm8.1 feature which prevents privileged code from accessing any virtual address where read or write access is also permitted at EL0. This patch enables the PAN feature on all CPUs, and modifies {get,put}_user helpers temporarily to permit access. This will catch kernel bugs where user memory is accessed directly. 'Unprivileged loads and stores' using ldtrb et al are unaffected by PAN. Reviewed-by: Catalin Marinas Signed-off-by: James Morse [will: use ALTERNATIVE in asm and tidy up pan_enable check] Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 14 ++++++++++++++ arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/include/asm/futex.h | 8 ++++++++ arch/arm64/include/asm/processor.h | 2 ++ arch/arm64/include/asm/sysreg.h | 8 ++++++++ arch/arm64/include/asm/uaccess.h | 11 +++++++++++ arch/arm64/include/uapi/asm/ptrace.h | 1 + arch/arm64/kernel/armv8_deprecated.c | 8 +++++++- arch/arm64/kernel/cpufeature.c | 20 ++++++++++++++++++++ arch/arm64/lib/clear_user.S | 8 ++++++++ arch/arm64/lib/copy_from_user.S | 8 ++++++++ arch/arm64/lib/copy_in_user.S | 8 ++++++++ arch/arm64/lib/copy_to_user.S | 8 ++++++++ arch/arm64/mm/fault.c | 16 ++++++++++++++++ 14 files changed, 121 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index de8dee60fd82..c2bd79a02a6c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -596,6 +596,20 @@ config FORCE_MAX_ZONEORDER default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) default "11" +config ARM64_PAN + bool "Enable support for Privileged Access Never (PAN)" + default y + help + Privileged Access Never (PAN; part of the ARMv8.1 Extensions) + prevents the kernel or hypervisor from accessing user-space (EL0) + memory directly. + + Choosing this option will cause any unprotected (not using + copy_to_user et al) memory access to fail with a permission fault. + + The feature is detected at runtime, and will remain as a 'nop' + instruction if the cpu does not implement the feature. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index f595f7ddd43b..d71140b76773 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -25,8 +25,9 @@ #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 #define ARM64_WORKAROUND_845719 2 #define ARM64_HAS_SYSREG_GIC_CPUIF 3 +#define ARM64_HAS_PAN 4 -#define ARM64_NCAPS 4 +#define ARM64_NCAPS 5 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 74069b3bd919..775e85b9d1f2 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -20,10 +20,16 @@ #include #include + +#include +#include #include +#include #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ asm volatile( \ + ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ @@ -39,6 +45,8 @@ " .align 3\n" \ " .quad 1b, 4b, 2b, 4b\n" \ " .popsection\n" \ + ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ : "memory") diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index e4c893e54f01..98f32355dc97 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -186,4 +186,6 @@ static inline void spin_lock_prefetch(const void *x) #endif +void cpu_enable_pan(void); + #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 5295bcbcb374..a7f3d4b2514d 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,8 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include + #define SCTLR_EL1_CP15BEN (0x1 << 5) #define SCTLR_EL1_SED (0x1 << 8) @@ -36,6 +38,12 @@ #define sys_reg(op0, op1, crn, crm, op2) \ ((((op0)&3)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) +#define REG_PSTATE_PAN_IMM sys_reg(0, 0, 4, 0, 4) +#define SCTLR_EL1_SPAN (1 << 23) + +#define SET_PSTATE_PAN(x) __inst_arm(0xd5000000 | REG_PSTATE_PAN_IMM |\ + (!!x)<<8 | 0x1f) + #ifdef __ASSEMBLY__ .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 07e1ba449bf1..b2ede967fe7d 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -24,7 +24,10 @@ #include #include +#include +#include #include +#include #include #include #include @@ -131,6 +134,8 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_val; \ __chk_user_ptr(ptr); \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ __get_user_asm("ldrb", "%w", __gu_val, (ptr), (err)); \ @@ -148,6 +153,8 @@ do { \ BUILD_BUG(); \ } \ (x) = (__force __typeof__(*(ptr)))__gu_val; \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ } while (0) #define __get_user(x, ptr) \ @@ -194,6 +201,8 @@ do { \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ switch (sizeof(*(ptr))) { \ case 1: \ __put_user_asm("strb", "%w", __pu_val, (ptr), (err)); \ @@ -210,6 +219,8 @@ do { \ default: \ BUILD_BUG(); \ } \ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN)); \ } while (0) #define __put_user(x, ptr) \ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index 6913643bbe54..208db3df135a 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -44,6 +44,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_PAN_BIT 0x00400000 #define PSR_Q_BIT 0x08000000 #define PSR_V_BIT 0x10000000 #define PSR_C_BIT 0x20000000 diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 78d56bff91fd..bcee7abac68e 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include #include @@ -280,6 +282,8 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) */ #define __user_swpX_asm(data, addr, res, temp, B) \ __asm__ __volatile__( \ + ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ " mov %w2, %w1\n" \ "0: ldxr"B" %w1, [%3]\n" \ "1: stxr"B" %w0, %w2, [%3]\n" \ @@ -295,7 +299,9 @@ static void register_insn_emulation_sysctl(struct ctl_table *table) " .align 3\n" \ " .quad 0b, 3b\n" \ " .quad 1b, 3b\n" \ - " .popsection" \ + " .popsection\n" \ + ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) \ : "=&r" (res), "+r" (data), "=&r" (temp) \ : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ : "memory") diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 74fd0f74b065..978fa169d3c3 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -21,6 +21,7 @@ #include #include #include +#include static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) @@ -39,6 +40,15 @@ has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry) return feature_matches(val, entry); } +static bool __maybe_unused +has_id_aa64mmfr1_feature(const struct arm64_cpu_capabilities *entry) +{ + u64 val; + + val = read_cpuid(id_aa64mmfr1_el1); + return feature_matches(val, entry); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -47,6 +57,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = 24, .min_field_value = 1, }, +#ifdef CONFIG_ARM64_PAN + { + .desc = "Privileged Access Never", + .capability = ARM64_HAS_PAN, + .matches = has_id_aa64mmfr1_feature, + .field_pos = 20, + .min_field_value = 1, + .enable = cpu_enable_pan, + }, +#endif /* CONFIG_ARM64_PAN */ {}, }; diff --git a/arch/arm64/lib/clear_user.S b/arch/arm64/lib/clear_user.S index c17967fdf5f6..a9723c71c52b 100644 --- a/arch/arm64/lib/clear_user.S +++ b/arch/arm64/lib/clear_user.S @@ -16,7 +16,11 @@ * along with this program. If not, see . */ #include + +#include #include +#include +#include .text @@ -29,6 +33,8 @@ * Alignment fixed up by hardware. */ ENTRY(__clear_user) +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) mov x2, x1 // save the size for fixup return subs x1, x1, #8 b.mi 2f @@ -48,6 +54,8 @@ USER(9f, strh wzr, [x0], #2 ) b.mi 5f USER(9f, strb wzr, [x0] ) 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__clear_user) diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 47c3fa5ae4ae..1be9ef27be97 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -15,7 +15,11 @@ */ #include + +#include #include +#include +#include /* * Copy from user space to a kernel buffer (alignment handled by the hardware) @@ -28,6 +32,8 @@ * x0 - bytes not copied */ ENTRY(__copy_from_user) +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) add x5, x1, x2 // upper user buffer boundary subs x2, x2, #16 b.mi 1f @@ -56,6 +62,8 @@ USER(9f, ldrh w3, [x1], #2 ) USER(9f, ldrb w3, [x1] ) strb w3, [x0] 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__copy_from_user) diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 436bcc5d77b5..1b94661e22b3 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -17,7 +17,11 @@ */ #include + +#include #include +#include +#include /* * Copy from user space to user space (alignment handled by the hardware) @@ -30,6 +34,8 @@ * x0 - bytes not copied */ ENTRY(__copy_in_user) +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) add x5, x0, x2 // upper user buffer boundary subs x2, x2, #16 b.mi 1f @@ -58,6 +64,8 @@ USER(9f, strh w3, [x0], #2 ) USER(9f, ldrb w3, [x1] ) USER(9f, strb w3, [x0] ) 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__copy_in_user) diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index f5e1f526f408..a257b47e2dc4 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -15,7 +15,11 @@ */ #include + +#include #include +#include +#include /* * Copy to user space from a kernel buffer (alignment handled by the hardware) @@ -28,6 +32,8 @@ * x0 - bytes not copied */ ENTRY(__copy_to_user) +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(0)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) add x5, x0, x2 // upper user buffer boundary subs x2, x2, #16 b.mi 1f @@ -56,6 +62,8 @@ USER(9f, strh w3, [x0], #2 ) ldrb w3, [x1] USER(9f, strb w3, [x0] ) 5: mov x0, #0 +ALTERNATIVE("nop", __stringify(SET_PSTATE_PAN(1)), ARM64_HAS_PAN, \ + CONFIG_ARM64_PAN) ret ENDPROC(__copy_to_user) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 94d98cd1aad8..ce591211434e 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -30,9 +30,11 @@ #include #include +#include #include #include #include +#include #include #include #include @@ -223,6 +225,13 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } + /* + * PAN bit set implies the fault happened in kernel space, but not + * in the arch's user access functions. + */ + if (IS_ENABLED(CONFIG_ARM64_PAN) && (regs->pstate & PSR_PAN_BIT)) + goto no_context; + /* * As per x86, we may deadlock here. However, since the kernel only * validly references user space from well defined areas of the code, @@ -536,3 +545,10 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, return 0; } + +#ifdef CONFIG_ARM64_PAN +void cpu_enable_pan(void) +{ + config_sctlr_el1(SCTLR_EL1_SPAN, 0); +} +#endif /* CONFIG_ARM64_PAN */ -- cgit v1.2.3 From 77ee306c0aea9a219daec256ad25982944affef8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Jul 2015 13:51:46 +0100 Subject: arm64: alternatives: add enable parameter to conditional asm macros There are cases where we want to compile out both versions of an alternative code block, so add an enable parameter to the new conditional alternative assembly macros in the same way as alternative_insn. Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 20367882226c..b474e9106bc2 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -91,11 +91,13 @@ void free_alternatives_memory(void); * The code that follows this macro will be assembled and linked as * normal. There are no restrictions on this code. */ -.macro alternative_if_not cap +.macro alternative_if_not cap, enable = 1 + .if \enable .pushsection .altinstructions, "a" altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f .popsection 661: + .endif .endm /* @@ -112,18 +114,22 @@ void free_alternatives_memory(void); * alternative sequence it is defined in (branches into an * alternative sequence are not fixed up). */ -.macro alternative_else +.macro alternative_else, enable = 1 + .if \enable 662: .pushsection .altinstr_replacement, "ax" 663: + .endif .endm /* * Complete an alternative code sequence. */ -.macro alternative_endif +.macro alternative_endif, enable = 1 + .if \enable 664: .popsection .org . - (664b-663b) + (662b-661b) .org . - (662b-661b) + (664b-663b) + .endif .endm #define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \ -- cgit v1.2.3 From e094d44568680d4e5e2722c4ad090ff0810719b9 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Thu, 23 Jul 2015 18:28:26 +0100 Subject: arm64: kernel: remove non-legit DT warnings when booting using ACPI Since both CONFIG_ACPI and CONFIG_OF are enabled when booting using ACPI tables on ARM64 platforms, we get few device tree warnings which are not valid for ACPI boot. We can use of_have_populated_dt to check if the device tree is populated or not before throwing out those errors. This patch uses of_have_populated_dt to remove non legitimate device tree warning when booting using ACPI tables. Cc: Lorenzo Pieralisi Acked-by: Catalin Marinas Acked-by: Mark Rutland Signed-off-by: Sudeep Holla Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 9 +++++++-- arch/arm64/kernel/topology.c | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index cf609cf3fcb5..e7a1e719f127 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -423,8 +423,13 @@ void __init setup_arch(char **cmdline_p) static int __init arm64_device_init(void) { - of_iommu_init(); - of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + if (of_have_populated_dt()) { + of_iommu_init(); + of_platform_populate(NULL, of_default_bus_match_table, + NULL, NULL); + } else if (acpi_disabled) { + pr_crit("Device tree not populated\n"); + } return 0; } arch_initcall_sync(arm64_device_init); diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index fcb8f7b42271..694f6deedbab 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -300,6 +300,6 @@ void __init init_cpu_topology(void) * Discard anything that was parsed if we hit an error so we * don't use partial information. */ - if (parse_dt_topology()) + if (of_have_populated_dt() && parse_dt_topology()) reset_cpu_topology(); } -- cgit v1.2.3 From e38457c361b30c5a2379cc767877c3e5c2505dc6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 24 Jul 2015 12:38:27 +0100 Subject: arm64: efi: prefer AllocatePages() over efi_low_alloc() for vmlinux When allocating memory for the kernel image, try the AllocatePages() boot service to obtain memory at the preferred offset of 'dram_base + TEXT_OFFSET', and only revert to efi_low_alloc() if that fails. This is the only way to allocate at the base of DRAM if DRAM starts at 0x0, since efi_low_alloc() refuses to allocate at 0x0. Tested-by: Haojian Zhuang Reviewed-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/efi-stub.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c index f5374065ad53..816120ece6bc 100644 --- a/arch/arm64/kernel/efi-stub.c +++ b/arch/arm64/kernel/efi-stub.c @@ -13,7 +13,7 @@ #include #include -efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table, +efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table_arg, unsigned long *image_addr, unsigned long *image_size, unsigned long *reserve_addr, @@ -23,21 +23,44 @@ efi_status_t __init handle_kernel_image(efi_system_table_t *sys_table, { efi_status_t status; unsigned long kernel_size, kernel_memsize = 0; + unsigned long nr_pages; + void *old_image_addr = (void *)*image_addr; /* Relocate the image, if required. */ kernel_size = _edata - _text; if (*image_addr != (dram_base + TEXT_OFFSET)) { kernel_memsize = kernel_size + (_end - _edata); - status = efi_low_alloc(sys_table, kernel_memsize + TEXT_OFFSET, - SZ_2M, reserve_addr); + + /* + * First, try a straight allocation at the preferred offset. + * This will work around the issue where, if dram_base == 0x0, + * efi_low_alloc() refuses to allocate at 0x0 (to prevent the + * address of the allocation to be mistaken for a FAIL return + * value or a NULL pointer). It will also ensure that, on + * platforms where the [dram_base, dram_base + TEXT_OFFSET) + * interval is partially occupied by the firmware (like on APM + * Mustang), we can still place the kernel at the address + * 'dram_base + TEXT_OFFSET'. + */ + *image_addr = *reserve_addr = dram_base + TEXT_OFFSET; + nr_pages = round_up(kernel_memsize, EFI_ALLOC_ALIGN) / + EFI_PAGE_SIZE; + status = efi_call_early(allocate_pages, EFI_ALLOCATE_ADDRESS, + EFI_LOADER_DATA, nr_pages, + (efi_physical_addr_t *)reserve_addr); if (status != EFI_SUCCESS) { - pr_efi_err(sys_table, "Failed to relocate kernel\n"); - return status; + kernel_memsize += TEXT_OFFSET; + status = efi_low_alloc(sys_table_arg, kernel_memsize, + SZ_2M, reserve_addr); + + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table_arg, "Failed to relocate kernel\n"); + return status; + } + *image_addr = *reserve_addr + TEXT_OFFSET; } - memcpy((void *)*reserve_addr + TEXT_OFFSET, (void *)*image_addr, - kernel_size); - *image_addr = *reserve_addr + TEXT_OFFSET; - *reserve_size = kernel_memsize + TEXT_OFFSET; + memcpy((void *)*image_addr, old_image_addr, kernel_size); + *reserve_size = kernel_memsize; } -- cgit v1.2.3 From 51650dc2a18b1c65026c3bb9e35da2750e985706 Mon Sep 17 00:00:00 2001 From: yalin wang Date: Fri, 24 Jul 2015 12:52:28 +0100 Subject: arm64: insn: use set_fixmap_offset to make it more clear A little change to patch_map() function, use set_fixmap_offset() to make code more clear. Signed-off-by: yalin wang Signed-off-by: Will Deacon --- arch/arm64/kernel/insn.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index dd9671cd0bb2..f341866aa810 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -101,9 +101,8 @@ static void __kprobes *patch_map(void *addr, int fixmap) return addr; BUG_ON(!page); - set_fixmap(fixmap, page_to_phys(page)); - - return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK)); + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + + (uintaddr & ~PAGE_MASK)); } static void __kprobes patch_unmap(int fixmap) -- cgit v1.2.3 From 951757ae83ec611b1c4f1dcfda67246cf5157451 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:41 +0100 Subject: arm64/debug: Eliminate magic number for size of BRK instruction The size of an A64 BRK instruction is the same as the size of all other A64 instructions, because all A64 instructions are the same size. BREAK_INSTR_SIZE is retained for readibility, but it should not be an independent constant from AARCH64_INSN_SIZE. Signed-off-by: Dave Martin Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 40ec68aa6870..f3d2dbd5f0a7 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -18,6 +18,8 @@ #ifdef __KERNEL__ +#include + /* Low-level stepping controls. */ #define DBG_MDSCR_SS (1 << 0) #define DBG_SPSR_SS (1 << 21) @@ -38,7 +40,7 @@ /* * Break point instruction encoding */ -#define BREAK_INSTR_SIZE 4 +#define BREAK_INSTR_SIZE AARCH64_INSN_SIZE /* * ESR values expected for dynamic and compile time BRK instruction -- cgit v1.2.3 From dfac68314c969481fcce3dd528728dfa9d85caf5 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:42 +0100 Subject: arm64/debug: Mask off all reserved bits from generated ESR values There are only 16 comment bits in a BRK instruction, which correspond to ESR bits 15:0. Bits 24:16 of the ESR are RES0, and might have weird meanings in the future. This code inserts 16 bits of comment in the ESR value instead of 20 (almost certainly a typo in the original code). Signed-off-by: Dave Martin Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index f3d2dbd5f0a7..ab7d5a875f8d 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -45,7 +45,7 @@ /* * ESR values expected for dynamic and compile time BRK instruction */ -#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xfffff)) +#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xffff)) /* * #imm16 values used for BRK instruction generation -- cgit v1.2.3 From 03923696a966bd3eeb3a169d71b8a337aaa5ab76 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:43 +0100 Subject: arm64/debug: Eliminate magic number from ESR template definition has perfectly good constants for defining ESR values already. Let's use them. Signed-off-by: Dave Martin Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index ab7d5a875f8d..ff09058587e3 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -18,6 +18,7 @@ #ifdef __KERNEL__ +#include #include /* Low-level stepping controls. */ @@ -45,7 +46,8 @@ /* * ESR values expected for dynamic and compile time BRK instruction */ -#define DBG_ESR_VAL_BRK(x) (0xf2000000 | ((x) & 0xffff)) +#define DBG_ESR_VAL_BRK(x) \ + ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | ((x) & 0xffff)) /* * #imm16 values used for BRK instruction generation -- cgit v1.2.3 From c172d994e1fd57e46e85424c23124756c66d4b62 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:44 +0100 Subject: arm64/debug: More consistent naming for the BRK ESR template macro The naming of DBG_ESR_VAL_BRK is inconsistent with the way other similar macros are named. This patch makes the naming more consistent, and appends "64" as a reminder that this ESR pattern only matches from AArch64 state. Signed-off-by: Dave Martin Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 5 +++-- arch/arm64/kernel/kgdb.c | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index ff09058587e3..bb97e9d88d23 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -46,8 +46,9 @@ /* * ESR values expected for dynamic and compile time BRK instruction */ -#define DBG_ESR_VAL_BRK(x) \ - ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | ((x) & 0xffff)) +#define ESR_ELx_VAL_BRK64(imm) \ + ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ + ((imm) & 0xffff)) /* * #imm16 values used for BRK instruction generation diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a0d10c55f307..a5a838e72c5e 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -235,13 +235,13 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, - .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DBG_BRK_IMM), + .esr_val = ESR_ELx_VAL_BRK64(KGDB_DYN_DBG_BRK_IMM), .fn = kgdb_brk_fn }; static struct break_hook kgdb_compiled_brkpt_hook = { .esr_mask = 0xffffffff, - .esr_val = DBG_ESR_VAL_BRK(KGDB_COMPILED_DBG_BRK_IMM), + .esr_val = ESR_ELx_VAL_BRK64(KGDB_COMPILED_DBG_BRK_IMM), .fn = kgdb_compiled_brk_fn }; -- cgit v1.2.3 From 72d033e80a6f25a7e2f79cacac202f19ede289e4 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:45 +0100 Subject: arm64/debug: Move BRK ESR template macro into It makes sense to keep all the architectural exception syndrome definitions in the same place. Signed-off-by: Dave Martin Acked-by: Mark Rutland Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 7 ------- arch/arm64/include/asm/esr.h | 7 +++++++ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index bb97e9d88d23..e28b1ddf22b9 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -43,13 +43,6 @@ */ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE -/* - * ESR values expected for dynamic and compile time BRK instruction - */ -#define ESR_ELx_VAL_BRK64(imm) \ - ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ - ((imm) & 0xffff)) - /* * #imm16 values used for BRK instruction generation * Allowed values for kgbd are 0x400 - 0x7ff diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 70522450ca23..1b44cf6be4b5 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -99,6 +99,13 @@ #define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) #define ESR_ELx_xVC_IMM_MASK ((1UL << 16) - 1) +/* ESR value templates for specific events */ + +/* BRK instruction trap from AArch64 state */ +#define ESR_ELx_VAL_BRK64(imm) \ + ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \ + ((imm) & 0xffff)) + #ifndef __ASSEMBLY__ #include -- cgit v1.2.3 From c696b93461f53db89d61450ace7c73babf920b99 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:46 +0100 Subject: arm64/debug: Simplify BRK insn opcode declarations The way the KGDB_DYN_BRK_INS_BYTEx macros are declared is more complex than it needs to be. Also, the macros are only used in one place, which is arch-specific anyway. This patch refactors the macros to simplify them, and exposes an argument so that we can have a single macro instead of 4. As a side effect, this patch also fixes some anomalous spellings of "KGDB". These changes alter the compile types of some integer constants that are harmless but trigger truncation warnings in gcc when assigning to 32-bit variables. This patch adds an explicit cast for the affected cases. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 23 ++++------------------- arch/arm64/kernel/kgdb.c | 12 ++++++------ 2 files changed, 10 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index e28b1ddf22b9..6a17fb8a16d1 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -66,25 +66,10 @@ */ #define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) -/* - * Extract byte from BRK instruction - */ -#define KGDB_DYN_DBG_BRK_INS_BYTE(x) \ - ((((AARCH64_BREAK_MON) & 0xffe0001f) >> (x * 8)) & 0xff) - -/* - * Extract byte from BRK #imm16 - */ -#define KGBD_DYN_DBG_BRK_IMM_BYTE(x) \ - (((((KGDB_DYN_DBG_BRK_IMM) & 0xffff) << 5) >> (x * 8)) & 0xff) - -#define KGDB_DYN_DBG_BRK_BYTE(x) \ - (KGDB_DYN_DBG_BRK_INS_BYTE(x) | KGBD_DYN_DBG_BRK_IMM_BYTE(x)) - -#define KGDB_DYN_BRK_INS_BYTE0 KGDB_DYN_DBG_BRK_BYTE(0) -#define KGDB_DYN_BRK_INS_BYTE1 KGDB_DYN_DBG_BRK_BYTE(1) -#define KGDB_DYN_BRK_INS_BYTE2 KGDB_DYN_DBG_BRK_BYTE(2) -#define KGDB_DYN_BRK_INS_BYTE3 KGDB_DYN_DBG_BRK_BYTE(3) +#define AARCH64_BREAK_KGDB_DYN_DBG \ + (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) +#define KGDB_DYN_BRK_INS_BYTE(x) \ + ((AARCH64_BREAK_KGDB_DYN_DBG >> (8 * (x))) & 0xff) #define CACHE_FLUSH_IS_SAFE 1 diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index a5a838e72c5e..bcac81e600b9 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -235,13 +235,13 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) static struct break_hook kgdb_brkpt_hook = { .esr_mask = 0xffffffff, - .esr_val = ESR_ELx_VAL_BRK64(KGDB_DYN_DBG_BRK_IMM), + .esr_val = (u32)ESR_ELx_VAL_BRK64(KGDB_DYN_DBG_BRK_IMM), .fn = kgdb_brk_fn }; static struct break_hook kgdb_compiled_brkpt_hook = { .esr_mask = 0xffffffff, - .esr_val = ESR_ELx_VAL_BRK64(KGDB_COMPILED_DBG_BRK_IMM), + .esr_val = (u32)ESR_ELx_VAL_BRK64(KGDB_COMPILED_DBG_BRK_IMM), .fn = kgdb_compiled_brk_fn }; @@ -328,9 +328,9 @@ void kgdb_arch_exit(void) */ struct kgdb_arch arch_kgdb_ops = { .gdb_bpt_instr = { - KGDB_DYN_BRK_INS_BYTE0, - KGDB_DYN_BRK_INS_BYTE1, - KGDB_DYN_BRK_INS_BYTE2, - KGDB_DYN_BRK_INS_BYTE3, + KGDB_DYN_BRK_INS_BYTE(0), + KGDB_DYN_BRK_INS_BYTE(1), + KGDB_DYN_BRK_INS_BYTE(2), + KGDB_DYN_BRK_INS_BYTE(3), } }; -- cgit v1.2.3 From d7a33f4fbd12ca0a32a24cc46c0d02b47f6b54d1 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:47 +0100 Subject: arm64/debug: Add missing #includes relies on , but doesn't declare this dependency. This becomes a problem once debug-monitors.h starts getting included all over the place to get the BRK immedates. The missing include of (for UL()) in is also added. The series no longer relies on this, but I spotted it during development and it may as well get fixed. No functional change. Signed-off-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 3 +++ arch/arm64/include/asm/esr.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 6a17fb8a16d1..777c36a1f645 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -18,8 +18,11 @@ #ifdef __KERNEL__ +#include +#include #include #include +#include /* Low-level stepping controls. */ #define DBG_MDSCR_SS (1 << 0) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 1b44cf6be4b5..77eeb2cc648f 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -18,6 +18,8 @@ #ifndef __ASM_ESR_H #define __ASM_ESR_H +#include + #define ESR_ELx_EC_UNKNOWN (0x00) #define ESR_ELx_EC_WFx (0x01) /* Unallocated EC: 0x02 */ -- cgit v1.2.3 From 9fb7410f955f7a62c1f882ca8f9ffd4525907e28 Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:48 +0100 Subject: arm64/BUG: Use BRK instruction for generic BUG traps Currently, the minimal default BUG() implementation from asm- generic is used for arm64. This patch uses the BRK software breakpoint instruction to generate a trap instead, similarly to most other arches, with the generic BUG code generating the dmesg boilerplate. This allows bug metadata to be moved to a separate table and reduces the amount of inline code at BUG and WARN sites. This also avoids clobbering any registers before they can be dumped. To mitigate the size of the bug table further, this patch makes use of the existing infrastructure for encoding addresses within the bug table as 32-bit offsets instead of absolute pointers. (Note that this limits the kernel size to 2GB.) Traps are registered at arch_initcall time for aarch64, but BUG has minimal real dependencies and it is desirable to be able to generate bug splats as early as possible. This patch redirects all debug exceptions caused by BRK directly to bug_handler() until the full debug exception support has been initialised. Signed-off-by: Dave Martin Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 8 +++++ arch/arm64/include/asm/bug.h | 64 +++++++++++++++++++++++++++++++++ arch/arm64/include/asm/debug-monitors.h | 2 ++ arch/arm64/kernel/traps.c | 59 +++++++++++++++++++++++++++++- arch/arm64/mm/fault.c | 12 +++++-- 5 files changed, 142 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/bug.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c2bd79a02a6c..5372e1e0c11c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -113,6 +113,14 @@ config TRACE_IRQFLAGS_SUPPORT config RWSEM_XCHGADD_ALGORITHM def_bool y +config GENERIC_BUG + def_bool y + depends on BUG + +config GENERIC_BUG_RELATIVE_POINTERS + def_bool y + depends on GENERIC_BUG + config GENERIC_HWEIGHT def_bool y diff --git a/arch/arm64/include/asm/bug.h b/arch/arm64/include/asm/bug.h new file mode 100644 index 000000000000..4a748ce9ba1a --- /dev/null +++ b/arch/arm64/include/asm/bug.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2015 ARM Limited + * Author: Dave Martin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _ARCH_ARM64_ASM_BUG_H +#define _ARCH_ARM64_ASM_BUG_H + +#include + +#ifdef CONFIG_GENERIC_BUG +#define HAVE_ARCH_BUG + +#ifdef CONFIG_DEBUG_BUGVERBOSE +#define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line) +#define __BUGVERBOSE_LOCATION(file, line) \ + ".pushsection .rodata.str,\"aMS\",@progbits,1\n" \ + "2: .string \"" file "\"\n\t" \ + ".popsection\n\t" \ + \ + ".long 2b - 0b\n\t" \ + ".short " #line "\n\t" +#else +#define _BUGVERBOSE_LOCATION(file, line) +#endif + +#define _BUG_FLAGS(flags) __BUG_FLAGS(flags) + +#define __BUG_FLAGS(flags) asm volatile ( \ + ".pushsection __bug_table,\"a\"\n\t" \ + ".align 2\n\t" \ + "0: .long 1f - 0b\n\t" \ +_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \ + ".short " #flags "\n\t" \ + ".popsection\n" \ + \ + "1: brk %[imm]" \ + :: [imm] "i" (BUG_BRK_IMM) \ +) + +#define BUG() do { \ + _BUG_FLAGS(0); \ + unreachable(); \ +} while (0) + +#define __WARN_TAINT(taint) _BUG_FLAGS(BUGFLAG_TAINT(taint)) + +#endif /* ! CONFIG_GENERIC_BUG */ + +#include + +#endif /* ! _ARCH_ARM64_ASM_BUG_H */ diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 777c36a1f645..e3f2bad788c9 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -52,10 +52,12 @@ * 0x100: for triggering a fault on purpose (reserved) * 0x400: for dynamic BRK instruction * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps */ #define FAULT_BRK_IMM 0x100 #define KGDB_DYN_DBG_BRK_IMM 0x400 #define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 /* * BRK instruction encoding diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 1ea920cbd66d..824ba5ac6361 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -32,8 +33,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -466,7 +469,61 @@ void __pgd_error(const char *file, int line, unsigned long val) pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); } +/* GENERIC_BUG traps */ + +int is_valid_bugaddr(unsigned long addr) +{ + /* + * bug_handler() only called for BRK #BUG_BRK_IMM. + * So the answer is trivial -- any spurious instances with no + * bug table entry will be rejected by report_bug() and passed + * back to the debug-monitors code and handled as a fatal + * unexpected debug exception. + */ + return 1; +} + +static int bug_handler(struct pt_regs *regs, unsigned int esr) +{ + if (user_mode(regs)) + return DBG_HOOK_ERROR; + + switch (report_bug(regs->pc, regs)) { + case BUG_TRAP_TYPE_BUG: + die("Oops - BUG", regs, 0); + break; + + case BUG_TRAP_TYPE_WARN: + break; + + default: + /* unknown/unrecognised bug trap type */ + return DBG_HOOK_ERROR; + } + + /* If thread survives, skip over the BUG instruction and continue: */ + regs->pc += AARCH64_INSN_SIZE; /* skip BRK and resume */ + return DBG_HOOK_HANDLED; +} + +static struct break_hook bug_break_hook = { + .esr_val = 0xf2000000 | BUG_BRK_IMM, + .esr_mask = 0xffffffff, + .fn = bug_handler, +}; + +/* + * Initial handler for AArch64 BRK exceptions + * This handler only used until debug_traps_init(). + */ +int __init early_brk64(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + return bug_handler(regs, esr) != DBG_HOOK_HANDLED; +} + +/* This registration must happen early, before debug_traps_init(). */ void __init trap_init(void) { - return; + register_break_hook(&bug_break_hook); } diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index ce591211434e..aba9ead1384c 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -501,14 +501,22 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, arm64_notify_die("Oops - SP/PC alignment exception", regs, &info, esr); } -static struct fault_info debug_fault_info[] = { +int __init early_brk64(unsigned long addr, unsigned int esr, + struct pt_regs *regs); + +/* + * __refdata because early_brk64 is __init, but the reference to it is + * clobbered at arch_initcall time. + * See traps.c and debug-monitors.c:debug_traps_init(). + */ +static struct fault_info __refdata debug_fault_info[] = { { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware breakpoint" }, { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware single-step" }, { do_bad, SIGTRAP, TRAP_HWBKPT, "hardware watchpoint" }, { do_bad, SIGBUS, 0, "unknown 3" }, { do_bad, SIGTRAP, TRAP_BRKPT, "aarch32 BKPT" }, { do_bad, SIGTRAP, 0, "aarch32 vector catch" }, - { do_bad, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, + { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }, { do_bad, SIGBUS, 0, "unknown 7" }, }; -- cgit v1.2.3 From a4653228a0f8d0a4a76d03a2dd15beaf6e78c22b Mon Sep 17 00:00:00 2001 From: Dave P Martin Date: Fri, 24 Jul 2015 16:37:49 +0100 Subject: arm64/BUG: Show explicit backtrace for WARNs The generic slowpath WARN implementation prints a backtrace, but the report_bug() based implementation does not, opting to print the registers instead which is generally not as useful. Ideally, report_bug() should be fixed to make the behaviour more consistent, but in the meantime this patch generates a backtrace directly from the arm64 backend instead so that this functionality is not lost with the migration to report_bug(). As a side-effect, the backtrace will be outside the oops end marker, but that's hard to avoid without modifying generic code. This patch can go away if report_bug() grows the ability in the future to generate a backtrace directly or call an arch hook at the appropriate time. Signed-off-by: Dave Martin Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 824ba5ac6361..f93aae5e4307 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -494,6 +494,8 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr) break; case BUG_TRAP_TYPE_WARN: + /* Ideally, report_bug() should backtrace for us... but no. */ + dump_backtrace(regs, NULL); break; default: -- cgit v1.2.3 From 772d68355e2f65f71e0402e39aabfdea56f55083 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jul 2015 11:07:03 +0100 Subject: arm64: include linux/types.h in asm/spinlock_types.h Our ticket-based spinlock structures rely on a definition of u16, so include linux/types.h explicitly to ensure the thing compiles. Found by a module build failure in -next: arch/arm64/include/asm/spinlock_types.h:27:2: error: unknown type name 'u16' arch/arm64/include/asm/spinlock_types.h:28:2: error: unknown type name 'u16' arch/arm64/include/asm/spinlock_types.h:33:13: error: expected declaration specifiers or '...' before numeric constant include/linux/spinlock_types.h:21:2: error: unknown type name 'arch_spinlock_t' arch/arm64/include/asm/spinlock.h:34:35: error: unknown type name 'arch_spinlock_t' arch/arm64/include/asm/spinlock.h:65:37: error: unknown type name 'arch_spinlock_t' Reported-by: Russell King Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock_types.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h index b8d383665f56..55be59a35e3f 100644 --- a/arch/arm64/include/asm/spinlock_types.h +++ b/arch/arm64/include/asm/spinlock_types.h @@ -20,6 +20,8 @@ # error "please don't include this file directly" #endif +#include + #define TICKET_SHIFT 16 typedef struct { -- cgit v1.2.3 From 9511ca19dafbd503fb467d451fe331a6008f08cf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Jul 2015 18:25:52 +0100 Subject: arm64: rwlocks: don't fail trylock purely due to contention STXR can fail for a number of reasons, so don't fail an rwlock trylock operation simply because the STXR reported failure. I'm not aware of any issues with the current code, but this makes it consistent with spin_trylock and also other architectures (e.g. arch/arm). Reported-by: Catalin Marinas Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index cee128732435..0f08ba5cfb33 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -140,10 +140,11 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) unsigned int tmp; asm volatile( - " ldaxr %w0, %1\n" - " cbnz %w0, 1f\n" + "1: ldaxr %w0, %1\n" + " cbnz %w0, 2f\n" " stxr %w0, %w2, %1\n" - "1:\n" + " cbnz %w0, 1b\n" + "2:\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -209,11 +210,12 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) unsigned int tmp, tmp2 = 1; asm volatile( - " ldaxr %w0, %2\n" + "1: ldaxr %w0, %2\n" " add %w0, %w0, #1\n" - " tbnz %w0, #31, 1f\n" + " tbnz %w0, #31, 2f\n" " stxr %w1, %w0, %2\n" - "1:\n" + " cbnz %w1, 1b\n" + "2:\n" : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock) : : "memory"); -- cgit v1.2.3 From 144e9697a9e70b5549fd52df90111f1410bcbfeb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Apr 2015 18:55:50 +0100 Subject: arm64: cpufeature.h: add missing #include of kernel.h cpufeature.h makes use of DECLARE_BITMAP, which in turn relies on the BITS_TO_LONGS and DIV_ROUND_UP macros. This patch includes kernel.h in cpufeature.h to prevent all users having to do the same thing. Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d71140b76773..40e106f81f27 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -31,6 +31,8 @@ #ifndef __ASSEMBLY__ +#include + struct arm64_cpu_capabilities { const char *desc; u16 capability; -- cgit v1.2.3 From c275f76bb4ce16fd0205da1e15f31b875013678e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 3 Feb 2015 11:26:53 +0000 Subject: arm64: atomics: move ll/sc atomics into separate header file In preparation for the Large System Extension (LSE) atomic instructions introduced by ARM v8.1, move the current exclusive load/store (LL/SC) atomics into their own header file. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic.h | 180 +--------------------------- arch/arm64/include/asm/atomic_ll_sc.h | 215 ++++++++++++++++++++++++++++++++++ 2 files changed, 221 insertions(+), 174 deletions(-) create mode 100644 arch/arm64/include/asm/atomic_ll_sc.h diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 866a71fca9a3..632c47064722 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -30,6 +30,8 @@ #ifdef __KERNEL__ +#include + /* * On ARM, ordinary assignment (str instruction) doesn't clear the local * strex/ldrex monitor on some implementations. The reason we can use it for @@ -38,86 +40,6 @@ #define atomic_read(v) ACCESS_ONCE((v)->counter) #define atomic_set(v,i) (((v)->counter) = (i)) -/* - * AArch64 UP and SMP safe atomic ops. We use load exclusive and - * store exclusive to ensure that these are atomic. We may loop - * to ensure that the update happens. - */ - -#define ATOMIC_OP(op, asm_op) \ -static inline void atomic_##op(int i, atomic_t *v) \ -{ \ - unsigned long tmp; \ - int result; \ - \ - asm volatile("// atomic_" #op "\n" \ -"1: ldxr %w0, %2\n" \ -" " #asm_op " %w0, %w0, %w3\n" \ -" stxr %w1, %w0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i)); \ -} \ - -#define ATOMIC_OP_RETURN(op, asm_op) \ -static inline int atomic_##op##_return(int i, atomic_t *v) \ -{ \ - unsigned long tmp; \ - int result; \ - \ - asm volatile("// atomic_" #op "_return\n" \ -"1: ldxr %w0, %2\n" \ -" " #asm_op " %w0, %w0, %w3\n" \ -" stlxr %w1, %w0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i) \ - : "memory"); \ - \ - smp_mb(); \ - return result; \ -} - -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN(op, asm_op) - -ATOMIC_OPS(add, add) -ATOMIC_OPS(sub, sub) - -#define atomic_andnot atomic_andnot - -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, orr) -ATOMIC_OP(xor, eor) - -#undef ATOMIC_OPS -#undef ATOMIC_OP_RETURN -#undef ATOMIC_OP - -static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) -{ - unsigned long tmp; - int oldval; - - smp_mb(); - - asm volatile("// atomic_cmpxchg\n" -"1: ldxr %w1, %2\n" -" cmp %w1, %w3\n" -" b.ne 2f\n" -" stxr %w0, %w4, %2\n" -" cbnz %w0, 1b\n" -"2:" - : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) - : "Ir" (old), "r" (new) - : "cc"); - - smp_mb(); - return oldval; -} - #define atomic_xchg(v, new) (xchg(&((v)->counter), new)) static inline int __atomic_add_unless(atomic_t *v, int a, int u) @@ -141,6 +63,8 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) +#define atomic_andnot atomic_andnot + /* * 64-bit atomic operations. */ @@ -149,102 +73,8 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) #define atomic64_read(v) ACCESS_ONCE((v)->counter) #define atomic64_set(v,i) (((v)->counter) = (i)) -#define ATOMIC64_OP(op, asm_op) \ -static inline void atomic64_##op(long i, atomic64_t *v) \ -{ \ - long result; \ - unsigned long tmp; \ - \ - asm volatile("// atomic64_" #op "\n" \ -"1: ldxr %0, %2\n" \ -" " #asm_op " %0, %0, %3\n" \ -" stxr %w1, %0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i)); \ -} \ - -#define ATOMIC64_OP_RETURN(op, asm_op) \ -static inline long atomic64_##op##_return(long i, atomic64_t *v) \ -{ \ - long result; \ - unsigned long tmp; \ - \ - asm volatile("// atomic64_" #op "_return\n" \ -"1: ldxr %0, %2\n" \ -" " #asm_op " %0, %0, %3\n" \ -" stlxr %w1, %0, %2\n" \ -" cbnz %w1, 1b" \ - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ - : "Ir" (i) \ - : "memory"); \ - \ - smp_mb(); \ - return result; \ -} - -#define ATOMIC64_OPS(op, asm_op) \ - ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN(op, asm_op) - -ATOMIC64_OPS(add, add) -ATOMIC64_OPS(sub, sub) - -#define atomic64_andnot atomic64_andnot - -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, orr) -ATOMIC64_OP(xor, eor) - -#undef ATOMIC64_OPS -#undef ATOMIC64_OP_RETURN -#undef ATOMIC64_OP - -static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) -{ - long oldval; - unsigned long res; - - smp_mb(); - - asm volatile("// atomic64_cmpxchg\n" -"1: ldxr %1, %2\n" -" cmp %1, %3\n" -" b.ne 2f\n" -" stxr %w0, %4, %2\n" -" cbnz %w0, 1b\n" -"2:" - : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) - : "Ir" (old), "r" (new) - : "cc"); - - smp_mb(); - return oldval; -} - #define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) -static inline long atomic64_dec_if_positive(atomic64_t *v) -{ - long result; - unsigned long tmp; - - asm volatile("// atomic64_dec_if_positive\n" -"1: ldxr %0, %2\n" -" subs %0, %0, #1\n" -" b.mi 2f\n" -" stlxr %w1, %0, %2\n" -" cbnz %w1, 1b\n" -" dmb ish\n" -"2:" - : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : - : "cc", "memory"); - - return result; -} - static inline int atomic64_add_unless(atomic64_t *v, long a, long u) { long c, old; @@ -266,5 +96,7 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u) #define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) #define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) +#define atomic64_andnot atomic64_andnot + #endif #endif diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h new file mode 100644 index 000000000000..66e992a58f6b --- /dev/null +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -0,0 +1,215 @@ +/* + * Based on arch/arm/include/asm/atomic.h + * + * Copyright (C) 1996 Russell King. + * Copyright (C) 2002 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ASM_ATOMIC_LL_SC_H +#define __ASM_ATOMIC_LL_SC_H + +/* + * AArch64 UP and SMP safe atomic ops. We use load exclusive and + * store exclusive to ensure that these are atomic. We may loop + * to ensure that the update happens. + * + * NOTE: these functions do *not* follow the PCS and must explicitly + * save any clobbered registers other than x0 (regardless of return + * value). This is achieved through -fcall-saved-* compiler flags for + * this file, which unfortunately don't work on a per-function basis + * (the optimize attribute silently ignores these options). + */ + +#ifndef __LL_SC_INLINE +#define __LL_SC_INLINE static inline +#endif + +#ifndef __LL_SC_PREFIX +#define __LL_SC_PREFIX(x) x +#endif + +#define ATOMIC_OP(op, asm_op) \ +__LL_SC_INLINE void \ +__LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + asm volatile("// atomic_" #op "\n" \ +"1: ldxr %w0, %2\n" \ +" " #asm_op " %w0, %w0, %w3\n" \ +" stxr %w1, %w0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i)); \ +} \ + +#define ATOMIC_OP_RETURN(op, asm_op) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ +{ \ + unsigned long tmp; \ + int result; \ + \ + asm volatile("// atomic_" #op "_return\n" \ +"1: ldxr %w0, %2\n" \ +" " #asm_op " %w0, %w0, %w3\n" \ +" stlxr %w1, %w0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : "memory"); \ + \ + smp_mb(); \ + return result; \ +} + +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_OP_RETURN(op, asm_op) + +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) + +ATOMIC_OP(and, and) +ATOMIC_OP(andnot, bic) +ATOMIC_OP(or, orr) +ATOMIC_OP(xor, eor) + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +__LL_SC_INLINE int +__LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) +{ + unsigned long tmp; + int oldval; + + smp_mb(); + + asm volatile("// atomic_cmpxchg\n" +"1: ldxr %w1, %2\n" +" cmp %w1, %w3\n" +" b.ne 2f\n" +" stxr %w0, %w4, %2\n" +" cbnz %w0, 1b\n" +"2:" + : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) + : "Ir" (old), "r" (new) + : "cc"); + + smp_mb(); + return oldval; +} + +#define ATOMIC64_OP(op, asm_op) \ +__LL_SC_INLINE void \ +__LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ +{ \ + long result; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "\n" \ +"1: ldxr %0, %2\n" \ +" " #asm_op " %0, %0, %3\n" \ +" stxr %w1, %0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i)); \ +} \ + +#define ATOMIC64_OP_RETURN(op, asm_op) \ +__LL_SC_INLINE long \ +__LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ +{ \ + long result; \ + unsigned long tmp; \ + \ + asm volatile("// atomic64_" #op "_return\n" \ +"1: ldxr %0, %2\n" \ +" " #asm_op " %0, %0, %3\n" \ +" stlxr %w1, %0, %2\n" \ +" cbnz %w1, 1b" \ + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ + : "Ir" (i) \ + : "memory"); \ + \ + smp_mb(); \ + return result; \ +} + +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_OP_RETURN(op, asm_op) + +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) + +ATOMIC64_OP(and, and) +ATOMIC64_OP(andnot, bic) +ATOMIC64_OP(or, orr) +ATOMIC64_OP(xor, eor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +__LL_SC_INLINE long +__LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) +{ + long oldval; + unsigned long res; + + smp_mb(); + + asm volatile("// atomic64_cmpxchg\n" +"1: ldxr %1, %2\n" +" cmp %1, %3\n" +" b.ne 2f\n" +" stxr %w0, %4, %2\n" +" cbnz %w0, 1b\n" +"2:" + : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) + : "Ir" (old), "r" (new) + : "cc"); + + smp_mb(); + return oldval; +} + +__LL_SC_INLINE long +__LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) +{ + long result; + unsigned long tmp; + + asm volatile("// atomic64_dec_if_positive\n" +"1: ldxr %0, %2\n" +" subs %0, %0, #1\n" +" b.mi 2f\n" +" stlxr %w1, %0, %2\n" +" cbnz %w1, 1b\n" +" dmb ish\n" +"2:" + : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) + : + : "cc", "memory"); + + return result; +} + +#endif /* __ASM_ATOMIC_LL_SC_H */ -- cgit v1.2.3 From 40a1db2434a1b62332b1af25cfa14d7b8c0301fe Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 26 Jan 2015 18:46:19 +0000 Subject: arm64: elf: advertise 8.1 atomic instructions as new hwcap The ARM v8.1 architecture introduces new atomic instructions to the A64 instruction set for things like cmpxchg, so advertise their availability to userspace using a hwcap. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/uapi/asm/hwcap.h | 1 + arch/arm64/kernel/setup.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 73cf0f54d57c..361c8a8ef55f 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -27,5 +27,6 @@ #define HWCAP_SHA1 (1 << 5) #define HWCAP_SHA2 (1 << 6) #define HWCAP_CRC32 (1 << 7) +#define HWCAP_ATOMICS (1 << 8) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index e7a1e719f127..b2f9895ecf7b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -278,6 +278,19 @@ static void __init setup_processor(void) if (block && !(block & 0x8)) elf_hwcap |= HWCAP_CRC32; + block = (features >> 20) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_ATOMICS; + case 1: + /* RESERVED */ + case 0: + break; + } + } + #ifdef CONFIG_COMPAT /* * ID_ISAR5_EL1 carries similar information as above, but pertaining to @@ -457,6 +470,7 @@ static const char *hwcap_str[] = { "sha1", "sha2", "crc32", + "atomics", NULL }; -- cgit v1.2.3 From d964b7229e7f94428a1e8d26999adffbe8a69db2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 4 Feb 2015 12:17:55 +0000 Subject: arm64: alternatives: add cpu feature for lse atomics Add a CPU feature for the LSE atomic instructions, so that they can be patched in at runtime when we detect that they are supported. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/kernel/setup.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 40e106f81f27..d9262d4b4dcd 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -26,8 +26,9 @@ #define ARM64_WORKAROUND_845719 2 #define ARM64_HAS_SYSREG_GIC_CPUIF 3 #define ARM64_HAS_PAN 4 +#define ARM64_CPU_FEAT_LSE_ATOMICS 5 -#define ARM64_NCAPS 5 +#define ARM64_NCAPS 6 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b2f9895ecf7b..f4fbbc884893 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -284,6 +284,7 @@ static void __init setup_processor(void) default: case 2: elf_hwcap |= HWCAP_ATOMICS; + cpus_set_cap(ARM64_CPU_FEAT_LSE_ATOMICS); case 1: /* RESERVED */ case 0: -- cgit v1.2.3 From c0385b24af15020a1e505f2c984db0d7c0d017e1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 3 Feb 2015 12:39:03 +0000 Subject: arm64: introduce CONFIG_ARM64_LSE_ATOMICS as fallback to ll/sc atomics In order to patch in the new atomic instructions at runtime, we need to generate wrappers around the out-of-line exclusive load/store atomics. This patch adds a new Kconfig option, CONFIG_ARM64_LSE_ATOMICS. which causes our atomic functions to branch to the out-of-line ll/sc implementations. To avoid the register spill overhead of the PCS, the out-of-line functions are compiled with specific compiler flags to force out-of-line save/restore of any registers that are usually caller-saved. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 12 +++ arch/arm64/include/asm/atomic.h | 9 ++ arch/arm64/include/asm/atomic_ll_sc.h | 19 +++- arch/arm64/include/asm/atomic_lse.h | 170 ++++++++++++++++++++++++++++++++++ arch/arm64/lib/Makefile | 13 +++ arch/arm64/lib/atomic_ll_sc.c | 3 + 6 files changed, 224 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/atomic_lse.h create mode 100644 arch/arm64/lib/atomic_ll_sc.c diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5372e1e0c11c..8dabffa82ef8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -618,6 +618,18 @@ config ARM64_PAN The feature is detected at runtime, and will remain as a 'nop' instruction if the cpu does not implement the feature. +config ARM64_LSE_ATOMICS + bool "ARMv8.1 atomic instructions" + help + As part of the Large System Extensions, ARMv8.1 introduces new + atomic instructions that are designed specifically to scale in + very large systems. + + Say Y here to make use of these instructions for the in-kernel + atomic routines. This incurs a small overhead on CPUs that do + not support these instructions and requires the kernel to be + built with binutils >= 2.25. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 632c47064722..84635f2d3d0a 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -21,6 +21,7 @@ #define __ASM_ATOMIC_H #include +#include #include #include @@ -30,7 +31,15 @@ #ifdef __KERNEL__ +#define __ARM64_IN_ATOMIC_IMPL + +#ifdef CONFIG_ARM64_LSE_ATOMICS +#include +#else #include +#endif + +#undef __ARM64_IN_ATOMIC_IMPL /* * On ARM, ordinary assignment (str instruction) doesn't clear the local diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 66e992a58f6b..c33fa2cd399e 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -21,6 +21,10 @@ #ifndef __ASM_ATOMIC_LL_SC_H #define __ASM_ATOMIC_LL_SC_H +#ifndef __ARM64_IN_ATOMIC_IMPL +#error "please don't include this file directly" +#endif + /* * AArch64 UP and SMP safe atomic ops. We use load exclusive and * store exclusive to ensure that these are atomic. We may loop @@ -41,6 +45,10 @@ #define __LL_SC_PREFIX(x) x #endif +#ifndef __LL_SC_EXPORT +#define __LL_SC_EXPORT(x) +#endif + #define ATOMIC_OP(op, asm_op) \ __LL_SC_INLINE void \ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ @@ -56,6 +64,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i)); \ } \ +__LL_SC_EXPORT(atomic_##op); #define ATOMIC_OP_RETURN(op, asm_op) \ __LL_SC_INLINE int \ @@ -75,7 +84,8 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ \ smp_mb(); \ return result; \ -} +} \ +__LL_SC_EXPORT(atomic_##op##_return); #define ATOMIC_OPS(op, asm_op) \ ATOMIC_OP(op, asm_op) \ @@ -115,6 +125,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) smp_mb(); return oldval; } +__LL_SC_EXPORT(atomic_cmpxchg); #define ATOMIC64_OP(op, asm_op) \ __LL_SC_INLINE void \ @@ -131,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) \ : "Ir" (i)); \ } \ +__LL_SC_EXPORT(atomic64_##op); #define ATOMIC64_OP_RETURN(op, asm_op) \ __LL_SC_INLINE long \ @@ -150,7 +162,8 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ \ smp_mb(); \ return result; \ -} +} \ +__LL_SC_EXPORT(atomic64_##op##_return); #define ATOMIC64_OPS(op, asm_op) \ ATOMIC64_OP(op, asm_op) \ @@ -190,6 +203,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) smp_mb(); return oldval; } +__LL_SC_EXPORT(atomic64_cmpxchg); __LL_SC_INLINE long __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) @@ -211,5 +225,6 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) return result; } +__LL_SC_EXPORT(atomic64_dec_if_positive); #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h new file mode 100644 index 000000000000..dce6ede740e8 --- /dev/null +++ b/arch/arm64/include/asm/atomic_lse.h @@ -0,0 +1,170 @@ +/* + * Based on arch/arm/include/asm/atomic.h + * + * Copyright (C) 1996 Russell King. + * Copyright (C) 2002 Deep Blue Solutions Ltd. + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ASM_ATOMIC_LSE_H +#define __ASM_ATOMIC_LSE_H + +#ifndef __ARM64_IN_ATOMIC_IMPL +#error "please don't include this file directly" +#endif + +/* Move the ll/sc atomics out-of-line */ +#define __LL_SC_INLINE +#define __LL_SC_PREFIX(x) __ll_sc_##x +#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x)) + +/* Macros for constructing calls to out-of-line ll/sc atomics */ +#define __LL_SC_CALL(op) \ + "bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n" +#define __LL_SC_CALL64(op) \ + "bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n" + +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL(op) \ + : "+r" (w0), "+Q" (v->counter) \ + : "r" (x1) \ + : "x30"); \ +} \ + +#define ATOMIC_OP_RETURN(op, asm_op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL(op##_return) \ + : "+r" (w0) \ + : "r" (x1) \ + : "x30", "memory"); \ + \ + return w0; \ +} + +#define ATOMIC_OPS(op, asm_op) \ + ATOMIC_OP(op, asm_op) \ + ATOMIC_OP_RETURN(op, asm_op) + +ATOMIC_OPS(add, add) +ATOMIC_OPS(sub, sub) + +ATOMIC_OP(and, and) +ATOMIC_OP(andnot, bic) +ATOMIC_OP(or, orr) +ATOMIC_OP(xor, eor) + +#undef ATOMIC_OPS +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) +{ + register unsigned long x0 asm ("x0") = (unsigned long)ptr; + register int w1 asm ("w1") = old; + register int w2 asm ("w2") = new; + + asm volatile( + __LL_SC_CALL(cmpxchg) + : "+r" (x0) + : "r" (w1), "r" (w2) + : "x30", "cc", "memory"); + + return x0; +} + +#define ATOMIC64_OP(op, asm_op) \ +static inline void atomic64_##op(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL64(op) \ + : "+r" (x0), "+Q" (v->counter) \ + : "r" (x1) \ + : "x30"); \ +} \ + +#define ATOMIC64_OP_RETURN(op, asm_op) \ +static inline long atomic64_##op##_return(long i, atomic64_t *v) \ +{ \ + register long x0 asm ("x0") = i; \ + register atomic64_t *x1 asm ("x1") = v; \ + \ + asm volatile( \ + __LL_SC_CALL64(op##_return) \ + : "+r" (x0) \ + : "r" (x1) \ + : "x30", "memory"); \ + \ + return x0; \ +} + +#define ATOMIC64_OPS(op, asm_op) \ + ATOMIC64_OP(op, asm_op) \ + ATOMIC64_OP_RETURN(op, asm_op) + +ATOMIC64_OPS(add, add) +ATOMIC64_OPS(sub, sub) + +ATOMIC64_OP(and, and) +ATOMIC64_OP(andnot, bic) +ATOMIC64_OP(or, orr) +ATOMIC64_OP(xor, eor) + +#undef ATOMIC64_OPS +#undef ATOMIC64_OP_RETURN +#undef ATOMIC64_OP + +static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) +{ + register unsigned long x0 asm ("x0") = (unsigned long)ptr; + register long x1 asm ("x1") = old; + register long x2 asm ("x2") = new; + + asm volatile( + __LL_SC_CALL64(cmpxchg) + : "+r" (x0) + : "r" (x1), "r" (x2) + : "x30", "cc", "memory"); + + return x0; +} + +static inline long atomic64_dec_if_positive(atomic64_t *v) +{ + register unsigned long x0 asm ("x0") = (unsigned long)v; + + asm volatile( + __LL_SC_CALL64(dec_if_positive) + : "+r" (x0) + : + : "x30", "cc", "memory"); + + return x0; +} + +#endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index d98d3e39879e..1a811ecf71da 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -3,3 +3,16 @@ lib-y := bitops.o clear_user.o delay.o copy_from_user.o \ clear_page.o memchr.o memcpy.o memmove.o memset.o \ memcmp.o strcmp.o strncmp.o strlen.o strnlen.o \ strchr.o strrchr.o + +# Tell the compiler to treat all general purpose registers as +# callee-saved, which allows for efficient runtime patching of the bl +# instruction in the caller with an atomic instruction when supported by +# the CPU. Result and argument registers are handled correctly, based on +# the function prototype. +lib-$(CONFIG_ARM64_LSE_ATOMICS) += atomic_ll_sc.o +CFLAGS_atomic_ll_sc.o := -fcall-used-x0 -ffixed-x1 -ffixed-x2 \ + -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 \ + -ffixed-x7 -fcall-saved-x8 -fcall-saved-x9 \ + -fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \ + -fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \ + -fcall-saved-x16 -fcall-saved-x17 -fcall-saved-x18 diff --git a/arch/arm64/lib/atomic_ll_sc.c b/arch/arm64/lib/atomic_ll_sc.c new file mode 100644 index 000000000000..b0c538b0da28 --- /dev/null +++ b/arch/arm64/lib/atomic_ll_sc.c @@ -0,0 +1,3 @@ +#include +#define __ARM64_IN_ATOMIC_IMPL +#include -- cgit v1.2.3 From c09d6a04d17d730b0463207a26ece082772b59ee Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 3 Feb 2015 16:14:13 +0000 Subject: arm64: atomics: patch in lse instructions when supported by the CPU On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of atomic_t and atomic64_t routines so that the call-site for the out-of-line ll/sc sequences is patched with an LSE atomic instruction when we detect that the CPU supports it. If binutils is not recent enough to assemble the LSE instructions, then the ll/sc sequences are inlined as though CONFIG_ARM64_LSE_ATOMICS=n. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Makefile | 13 +- arch/arm64/include/asm/atomic.h | 4 +- arch/arm64/include/asm/atomic_ll_sc.h | 12 - arch/arm64/include/asm/atomic_lse.h | 400 +++++++++++++++++++++++++--------- arch/arm64/include/asm/lse.h | 34 +++ arch/arm64/kernel/setup.c | 3 + 6 files changed, 342 insertions(+), 124 deletions(-) create mode 100644 arch/arm64/include/asm/lse.h diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 0953a97b5119..15ff5b4156fd 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -17,7 +17,18 @@ GZFLAGS :=-9 KBUILD_DEFCONFIG := defconfig -KBUILD_CFLAGS += -mgeneral-regs-only +# Check for binutils support for specific extensions +lseinstr := $(call as-instr,.arch_extension lse,-DCONFIG_AS_LSE=1) + +ifeq ($(CONFIG_ARM64_LSE_ATOMICS), y) + ifeq ($(lseinstr),) +$(warning LSE atomics not supported by binutils) + endif +endif + +KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) +KBUILD_AFLAGS += $(lseinstr) + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian AS += -EB diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 84635f2d3d0a..836226d5e12c 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -21,11 +21,11 @@ #define __ASM_ATOMIC_H #include -#include #include #include #include +#include #define ATOMIC_INIT(i) { (i) } @@ -33,7 +33,7 @@ #define __ARM64_IN_ATOMIC_IMPL -#ifdef CONFIG_ARM64_LSE_ATOMICS +#if defined(CONFIG_ARM64_LSE_ATOMICS) && defined(CONFIG_AS_LSE) #include #else #include diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index c33fa2cd399e..4b981ba57e78 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -37,18 +37,6 @@ * (the optimize attribute silently ignores these options). */ -#ifndef __LL_SC_INLINE -#define __LL_SC_INLINE static inline -#endif - -#ifndef __LL_SC_PREFIX -#define __LL_SC_PREFIX(x) x -#endif - -#ifndef __LL_SC_EXPORT -#define __LL_SC_EXPORT(x) -#endif - #define ATOMIC_OP(op, asm_op) \ __LL_SC_INLINE void \ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index dce6ede740e8..6e21b5e0c9d6 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -25,60 +25,129 @@ #error "please don't include this file directly" #endif -/* Move the ll/sc atomics out-of-line */ -#define __LL_SC_INLINE -#define __LL_SC_PREFIX(x) __ll_sc_##x -#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x)) - -/* Macros for constructing calls to out-of-line ll/sc atomics */ -#define __LL_SC_CALL(op) \ - "bl\t" __stringify(__LL_SC_PREFIX(atomic_##op)) "\n" -#define __LL_SC_CALL64(op) \ - "bl\t" __stringify(__LL_SC_PREFIX(atomic64_##op)) "\n" - -#define ATOMIC_OP(op, asm_op) \ -static inline void atomic_##op(int i, atomic_t *v) \ -{ \ - register int w0 asm ("w0") = i; \ - register atomic_t *x1 asm ("x1") = v; \ - \ - asm volatile( \ - __LL_SC_CALL(op) \ - : "+r" (w0), "+Q" (v->counter) \ - : "r" (x1) \ - : "x30"); \ -} \ - -#define ATOMIC_OP_RETURN(op, asm_op) \ -static inline int atomic_##op##_return(int i, atomic_t *v) \ -{ \ - register int w0 asm ("w0") = i; \ - register atomic_t *x1 asm ("x1") = v; \ - \ - asm volatile( \ - __LL_SC_CALL(op##_return) \ - : "+r" (w0) \ - : "r" (x1) \ - : "x30", "memory"); \ - \ - return w0; \ -} - -#define ATOMIC_OPS(op, asm_op) \ - ATOMIC_OP(op, asm_op) \ - ATOMIC_OP_RETURN(op, asm_op) - -ATOMIC_OPS(add, add) -ATOMIC_OPS(sub, sub) - -ATOMIC_OP(and, and) -ATOMIC_OP(andnot, bic) -ATOMIC_OP(or, orr) -ATOMIC_OP(xor, eor) - -#undef ATOMIC_OPS -#undef ATOMIC_OP_RETURN -#undef ATOMIC_OP +#define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) + +static inline void atomic_andnot(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), + " stclr %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_or(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), + " stset %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_xor(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), + " steor %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_add(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), + " stadd %w[i], %[v]\n") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC(add_return), + /* LSE atomics */ + " ldaddal %w[i], w30, %[v]\n" + " add %w[i], %w[i], w30") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30", "memory"); + + return w0; +} + +static inline void atomic_and(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC(and), + /* LSE atomics */ + " mvn %w[i], %w[i]\n" + " stclr %w[i], %[v]") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC(sub), + /* LSE atomics */ + " neg %w[i], %w[i]\n" + " stadd %w[i], %[v]") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline int atomic_sub_return(int i, atomic_t *v) +{ + register int w0 asm ("w0") = i; + register atomic_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC(sub_return) + " nop", + /* LSE atomics */ + " neg %w[i], %w[i]\n" + " ldaddal %w[i], w30, %[v]\n" + " add %w[i], %w[i], w30") + : [i] "+r" (w0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30", "memory"); + + return w0; +} static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) { @@ -86,69 +155,164 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) register int w1 asm ("w1") = old; register int w2 asm ("w2") = new; - asm volatile( - __LL_SC_CALL(cmpxchg) - : "+r" (x0) - : "r" (w1), "r" (w2) + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC(cmpxchg) + " nop", + /* LSE atomics */ + " mov w30, %w[old]\n" + " casal w30, %w[new], %[v]\n" + " mov %w[ret], w30") + : [ret] "+r" (x0), [v] "+Q" (ptr->counter) + : [old] "r" (w1), [new] "r" (w2) : "x30", "cc", "memory"); return x0; } -#define ATOMIC64_OP(op, asm_op) \ -static inline void atomic64_##op(long i, atomic64_t *v) \ -{ \ - register long x0 asm ("x0") = i; \ - register atomic64_t *x1 asm ("x1") = v; \ - \ - asm volatile( \ - __LL_SC_CALL64(op) \ - : "+r" (x0), "+Q" (v->counter) \ - : "r" (x1) \ - : "x30"); \ -} \ - -#define ATOMIC64_OP_RETURN(op, asm_op) \ -static inline long atomic64_##op##_return(long i, atomic64_t *v) \ -{ \ - register long x0 asm ("x0") = i; \ - register atomic64_t *x1 asm ("x1") = v; \ - \ - asm volatile( \ - __LL_SC_CALL64(op##_return) \ - : "+r" (x0) \ - : "r" (x1) \ - : "x30", "memory"); \ - \ - return x0; \ -} - -#define ATOMIC64_OPS(op, asm_op) \ - ATOMIC64_OP(op, asm_op) \ - ATOMIC64_OP_RETURN(op, asm_op) - -ATOMIC64_OPS(add, add) -ATOMIC64_OPS(sub, sub) - -ATOMIC64_OP(and, and) -ATOMIC64_OP(andnot, bic) -ATOMIC64_OP(or, orr) -ATOMIC64_OP(xor, eor) - -#undef ATOMIC64_OPS -#undef ATOMIC64_OP_RETURN -#undef ATOMIC64_OP +#undef __LL_SC_ATOMIC +#define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) + +static inline void atomic64_andnot(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), + " stclr %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_or(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), + " stset %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_xor(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), + " steor %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_add(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(add), + " stadd %[i], %[v]\n") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline long atomic64_add_return(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(add_return), + /* LSE atomics */ + " ldaddal %[i], x30, %[v]\n" + " add %[i], %[i], x30") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30", "memory"); + + return x0; +} + +static inline void atomic64_and(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(and), + /* LSE atomics */ + " mvn %[i], %[i]\n" + " stclr %[i], %[v]") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline void atomic64_sub(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(sub), + /* LSE atomics */ + " neg %[i], %[i]\n" + " stadd %[i], %[v]") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30"); +} + +static inline long atomic64_sub_return(long i, atomic64_t *v) +{ + register long x0 asm ("x0") = i; + register atomic64_t *x1 asm ("x1") = v; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(sub_return) + " nop", + /* LSE atomics */ + " neg %[i], %[i]\n" + " ldaddal %[i], x30, %[v]\n" + " add %[i], %[i], x30") + : [i] "+r" (x0), [v] "+Q" (v->counter) + : "r" (x1) + : "x30", "memory"); + + return x0; +} static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) { register unsigned long x0 asm ("x0") = (unsigned long)ptr; register long x1 asm ("x1") = old; register long x2 asm ("x2") = new; - asm volatile( - __LL_SC_CALL64(cmpxchg) - : "+r" (x0) - : "r" (x1), "r" (x2) + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(cmpxchg) + " nop", + /* LSE atomics */ + " mov x30, %[old]\n" + " casal x30, %[new], %[v]\n" + " mov %[ret], x30") + : [ret] "+r" (x0), [v] "+Q" (ptr->counter) + : [old] "r" (x1), [new] "r" (x2) : "x30", "cc", "memory"); return x0; @@ -156,15 +320,33 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) static inline long atomic64_dec_if_positive(atomic64_t *v) { - register unsigned long x0 asm ("x0") = (unsigned long)v; + register long x0 asm ("x0") = (long)v; - asm volatile( - __LL_SC_CALL64(dec_if_positive) - : "+r" (x0) + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " nop\n" + __LL_SC_ATOMIC64(dec_if_positive) + " nop\n" + " nop\n" + " nop\n" + " nop\n" + " nop", + /* LSE atomics */ + "1: ldr x30, %[v]\n" + " subs %[ret], x30, #1\n" + " b.mi 2f\n" + " casal x30, %[ret], %[v]\n" + " sub x30, x30, #1\n" + " sub x30, x30, %[ret]\n" + " cbnz x30, 1b\n" + "2:") + : [ret] "+&r" (x0), [v] "+Q" (v->counter) : : "x30", "cc", "memory"); return x0; } +#undef __LL_SC_ATOMIC64 + #endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h new file mode 100644 index 000000000000..d516624a461e --- /dev/null +++ b/arch/arm64/include/asm/lse.h @@ -0,0 +1,34 @@ +#ifndef __ASM_LSE_H +#define __ASM_LSE_H + +#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) + +#include + +#include +#include + +__asm__(".arch_extension lse"); + +/* Move the ll/sc atomics out-of-line */ +#define __LL_SC_INLINE +#define __LL_SC_PREFIX(x) __ll_sc_##x +#define __LL_SC_EXPORT(x) EXPORT_SYMBOL(__LL_SC_PREFIX(x)) + +/* Macro for constructing calls to out-of-line ll/sc atomics */ +#define __LL_SC_CALL(op) "bl\t" __stringify(__LL_SC_PREFIX(op)) "\n" + +/* In-line patching at runtime */ +#define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ + ALTERNATIVE(llsc, lse, ARM64_CPU_FEAT_LSE_ATOMICS) + +#else + +#define __LL_SC_INLINE static inline +#define __LL_SC_PREFIX(x) x +#define __LL_SC_EXPORT(x) + +#define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc + +#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ +#endif /* __ASM_LSE_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f4fbbc884893..97785c01acbf 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -285,6 +285,9 @@ static void __init setup_processor(void) case 2: elf_hwcap |= HWCAP_ATOMICS; cpus_set_cap(ARM64_CPU_FEAT_LSE_ATOMICS); + if (IS_ENABLED(CONFIG_AS_LSE) && + IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS)) + pr_info("LSE atomics supported\n"); case 1: /* RESERVED */ case 0: -- cgit v1.2.3 From 81bb5c6420635dfd058c210bd342c29c95ccd145 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 10 Feb 2015 03:03:15 +0000 Subject: arm64: locks: patch in lse instructions when supported by the CPU On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our locking functions so that LSE atomic instructions are used for spinlocks and rwlocks. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 137 ++++++++++++++++++++++++++++++-------- 1 file changed, 108 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 0f08ba5cfb33..87ae7efa1211 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -16,6 +16,7 @@ #ifndef __ASM_SPINLOCK_H #define __ASM_SPINLOCK_H +#include #include #include @@ -38,11 +39,21 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) asm volatile( /* Atomically increment the next ticket. */ + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ " prfm pstl1strm, %3\n" "1: ldaxr %w0, %3\n" " add %w1, %w0, %w5\n" " stxr %w2, %w1, %3\n" -" cbnz %w2, 1b\n" +" cbnz %w2, 1b\n", + /* LSE atomics */ +" mov %w2, %w5\n" +" ldadda %w2, %w0, %3\n" +" nop\n" +" nop\n" +" nop\n" + ) + /* Did we get the lock? */ " eor %w1, %w0, %w0, ror #16\n" " cbz %w1, 3f\n" @@ -67,15 +78,25 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) unsigned int tmp; arch_spinlock_t lockval; - asm volatile( -" prfm pstl1strm, %2\n" -"1: ldaxr %w0, %2\n" -" eor %w1, %w0, %w0, ror #16\n" -" cbnz %w1, 2f\n" -" add %w0, %w0, %3\n" -" stxr %w1, %w0, %2\n" -" cbnz %w1, 1b\n" -"2:" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " prfm pstl1strm, %2\n" + "1: ldaxr %w0, %2\n" + " eor %w1, %w0, %w0, ror #16\n" + " cbnz %w1, 2f\n" + " add %w0, %w0, %3\n" + " stxr %w1, %w0, %2\n" + " cbnz %w1, 1b\n" + "2:", + /* LSE atomics */ + " ldr %w0, %2\n" + " eor %w1, %w0, %w0, ror #16\n" + " cbnz %w1, 1f\n" + " add %w1, %w0, %3\n" + " casa %w0, %w1, %2\n" + " and %w1, %w1, #0xffff\n" + " eor %w1, %w1, %w0, lsr #16\n" + "1:") : "=&r" (lockval), "=&r" (tmp), "+Q" (*lock) : "I" (1 << TICKET_SHIFT) : "memory"); @@ -85,10 +106,19 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock) { - asm volatile( -" stlrh %w1, %0\n" - : "=Q" (lock->owner) - : "r" (lock->owner + 1) + unsigned long tmp; + + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " ldr %w1, %0\n" + " add %w1, %w1, #1\n" + " stlrh %w1, %0", + /* LSE atomics */ + " mov %w1, #1\n" + " nop\n" + " staddlh %w1, %0") + : "=Q" (lock->owner), "=&r" (tmp) + : : "memory"); } @@ -123,13 +153,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned int tmp; - asm volatile( + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ " sevl\n" "1: wfe\n" "2: ldaxr %w0, %1\n" " cbnz %w0, 1b\n" " stxr %w0, %w2, %1\n" " cbnz %w0, 2b\n" + " nop", + /* LSE atomics */ + "1: mov %w0, wzr\n" + "2: casa %w0, %w2, %1\n" + " cbz %w0, 3f\n" + " ldxr %w0, %1\n" + " cbz %w0, 2b\n" + " wfe\n" + " b 1b\n" + "3:") : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -139,12 +180,18 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned int tmp; - asm volatile( + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: ldaxr %w0, %1\n" " cbnz %w0, 2f\n" " stxr %w0, %w2, %1\n" " cbnz %w0, 1b\n" - "2:\n" + "2:", + /* LSE atomics */ + " mov %w0, wzr\n" + " casa %w0, %w2, %1\n" + " nop\n" + " nop") : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) : "memory"); @@ -154,9 +201,10 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) static inline void arch_write_unlock(arch_rwlock_t *rw) { - asm volatile( - " stlr %w1, %0\n" - : "=Q" (rw->lock) : "r" (0) : "memory"); + asm volatile(ARM64_LSE_ATOMIC_INSN( + " stlr wzr, %0", + " swpl wzr, wzr, %0") + : "=Q" (rw->lock) :: "memory"); } /* write_can_lock - would write_trylock() succeed? */ @@ -173,6 +221,10 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) * * The memory barriers are implicit with the load-acquire and store-release * instructions. + * + * Note that in UNDEFINED cases, such as unlocking a lock twice, the LL/SC + * and LSE implementations may exhibit different behaviour (although this + * will have no effect on lockdep). */ static inline void arch_read_lock(arch_rwlock_t *rw) { @@ -180,26 +232,43 @@ static inline void arch_read_lock(arch_rwlock_t *rw) asm volatile( " sevl\n" + ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: wfe\n" "2: ldaxr %w0, %2\n" " add %w0, %w0, #1\n" " tbnz %w0, #31, 1b\n" " stxr %w1, %w0, %2\n" - " cbnz %w1, 2b\n" + " nop\n" + " cbnz %w1, 2b", + /* LSE atomics */ + "1: wfe\n" + "2: ldxr %w0, %2\n" + " adds %w1, %w0, #1\n" + " tbnz %w1, #31, 1b\n" + " casa %w0, %w1, %2\n" + " sbc %w0, %w1, %w0\n" + " cbnz %w0, 2b") : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "memory"); + : "cc", "memory"); } static inline void arch_read_unlock(arch_rwlock_t *rw) { unsigned int tmp, tmp2; - asm volatile( + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: ldxr %w0, %2\n" " sub %w0, %w0, #1\n" " stlxr %w1, %w0, %2\n" - " cbnz %w1, 1b\n" + " cbnz %w1, 1b", + /* LSE atomics */ + " movn %w0, #0\n" + " nop\n" + " nop\n" + " staddl %w0, %2") : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : : "memory"); @@ -207,18 +276,28 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw) { - unsigned int tmp, tmp2 = 1; + unsigned int tmp, tmp2; - asm volatile( + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ + " mov %w1, #1\n" "1: ldaxr %w0, %2\n" " add %w0, %w0, #1\n" " tbnz %w0, #31, 2f\n" " stxr %w1, %w0, %2\n" " cbnz %w1, 1b\n" - "2:\n" - : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock) + "2:", + /* LSE atomics */ + " ldr %w0, %2\n" + " adds %w1, %w0, #1\n" + " tbnz %w1, #31, 1f\n" + " casa %w0, %w1, %2\n" + " sbc %w1, %w1, %w0\n" + " nop\n" + "1:") + : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "memory"); + : "cc", "memory"); return !tmp2; } -- cgit v1.2.3 From 084f903727e1c3a61d6bcdaeeed30bddc6d7f65a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 12 Feb 2015 04:17:37 +0000 Subject: arm64: bitops: patch in lse instructions when supported by the CPU On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our bitops functions so that LSE atomic instructions are used instead. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/lse.h | 23 +++++++++++++++++++++-- arch/arm64/lib/bitops.S | 43 ++++++++++++++++++++++++------------------- 2 files changed, 45 insertions(+), 21 deletions(-) diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index d516624a461e..fb3ac56a2cc0 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -4,10 +4,19 @@ #if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) #include - #include #include +#ifdef __ASSEMBLER__ + +.arch_extension lse + +.macro alt_lse, llsc, lse + alternative_insn "\llsc", "\lse", ARM64_CPU_FEAT_LSE_ATOMICS +.endm + +#else /* __ASSEMBLER__ */ + __asm__(".arch_extension lse"); /* Move the ll/sc atomics out-of-line */ @@ -22,7 +31,16 @@ __asm__(".arch_extension lse"); #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ ALTERNATIVE(llsc, lse, ARM64_CPU_FEAT_LSE_ATOMICS) -#else +#endif /* __ASSEMBLER__ */ +#else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ + +#ifdef __ASSEMBLER__ + +.macro alt_lse, llsc, lse + \llsc +.endm + +#else /* __ASSEMBLER__ */ #define __LL_SC_INLINE static inline #define __LL_SC_PREFIX(x) x @@ -30,5 +48,6 @@ __asm__(".arch_extension lse"); #define ARM64_LSE_ATOMIC_INSN(llsc, lse) llsc +#endif /* __ASSEMBLER__ */ #endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ #endif /* __ASM_LSE_H */ diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S index 7dac371cc9a2..bc18457c2bba 100644 --- a/arch/arm64/lib/bitops.S +++ b/arch/arm64/lib/bitops.S @@ -18,52 +18,57 @@ #include #include +#include /* * x0: bits 5:0 bit offset * bits 31:6 word offset * x1: address */ - .macro bitop, name, instr + .macro bitop, name, llsc, lse ENTRY( \name ) and w3, w0, #63 // Get bit offset eor w0, w0, w3 // Clear low bits mov x2, #1 add x1, x1, x0, lsr #3 // Get word offset lsl x3, x2, x3 // Create mask -1: ldxr x2, [x1] - \instr x2, x2, x3 - stxr w0, x2, [x1] - cbnz w0, 1b + +alt_lse "1: ldxr x2, [x1]", "\lse x3, [x1]" +alt_lse " \llsc x2, x2, x3", "nop" +alt_lse " stxr w0, x2, [x1]", "nop" +alt_lse " cbnz w0, 1b", "nop" + ret ENDPROC(\name ) .endm - .macro testop, name, instr + .macro testop, name, llsc, lse ENTRY( \name ) and w3, w0, #63 // Get bit offset eor w0, w0, w3 // Clear low bits mov x2, #1 add x1, x1, x0, lsr #3 // Get word offset lsl x4, x2, x3 // Create mask -1: ldxr x2, [x1] - lsr x0, x2, x3 // Save old value of bit - \instr x2, x2, x4 // toggle bit - stlxr w5, x2, [x1] - cbnz w5, 1b - dmb ish + +alt_lse "1: ldxr x2, [x1]", "\lse x4, x2, [x1]" + lsr x0, x2, x3 +alt_lse " \llsc x2, x2, x4", "nop" +alt_lse " stlxr w5, x2, [x1]", "nop" +alt_lse " cbnz w5, 1b", "nop" +alt_lse " dmb ish", "nop" + and x0, x0, #1 -3: ret + ret ENDPROC(\name ) .endm /* * Atomic bit operations. */ - bitop change_bit, eor - bitop clear_bit, bic - bitop set_bit, orr + bitop change_bit, eor, steor + bitop clear_bit, bic, stclr + bitop set_bit, orr, stset - testop test_and_change_bit, eor - testop test_and_clear_bit, bic - testop test_and_set_bit, orr + testop test_and_change_bit, eor, ldeoral + testop test_and_clear_bit, bic, ldclral + testop test_and_set_bit, orr, ldsetal -- cgit v1.2.3 From c8366ba0fb65063b6b4f69c7af1ea74152435590 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 31 Mar 2015 14:11:24 +0100 Subject: arm64: xchg: patch in lse instructions when supported by the CPU On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our xchg primitives so that the LSE swp instruction (yes, you read right!) is used instead. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index d8c25b7b18fb..d0cce8068902 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -22,6 +22,7 @@ #include #include +#include static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size) { @@ -29,37 +30,65 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size switch (size) { case 1: - asm volatile("// __xchg1\n" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: ldxrb %w0, %2\n" " stlxrb %w1, %w3, %2\n" " cbnz %w1, 1b\n" + " dmb ish", + /* LSE atomics */ + " nop\n" + " swpalb %w3, %w0, %2\n" + " nop\n" + " nop") : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) : "r" (x) : "memory"); break; case 2: - asm volatile("// __xchg2\n" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: ldxrh %w0, %2\n" " stlxrh %w1, %w3, %2\n" " cbnz %w1, 1b\n" + " dmb ish", + /* LSE atomics */ + " nop\n" + " swpalh %w3, %w0, %2\n" + " nop\n" + " nop") : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) : "r" (x) : "memory"); break; case 4: - asm volatile("// __xchg4\n" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: ldxr %w0, %2\n" " stlxr %w1, %w3, %2\n" " cbnz %w1, 1b\n" + " dmb ish", + /* LSE atomics */ + " nop\n" + " swpal %w3, %w0, %2\n" + " nop\n" + " nop") : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) : "r" (x) : "memory"); break; case 8: - asm volatile("// __xchg8\n" + asm volatile(ARM64_LSE_ATOMIC_INSN( + /* LL/SC */ "1: ldxr %0, %2\n" " stlxr %w1, %3, %2\n" " cbnz %w1, 1b\n" + " dmb ish", + /* LSE atomics */ + " nop\n" + " swpal %3, %0, %2\n" + " nop\n" + " nop") : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) : "r" (x) : "memory"); @@ -68,7 +97,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size BUILD_BUG(); } - smp_mb(); return ret; } -- cgit v1.2.3 From c342f78217e822d2178265b0b1de232eeb717149 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 23 Apr 2015 20:08:49 +0100 Subject: arm64: cmpxchg: patch in lse instructions when supported by the CPU On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our cmpxchg primitives so that the LSE cas instruction is used instead. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic.h | 3 +- arch/arm64/include/asm/atomic_ll_sc.h | 38 ++++++++++++++++ arch/arm64/include/asm/atomic_lse.h | 39 ++++++++++++++++ arch/arm64/include/asm/cmpxchg.h | 84 ++++++++--------------------------- 4 files changed, 98 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 836226d5e12c..1fe8f209aeb4 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -24,7 +24,6 @@ #include #include -#include #include #define ATOMIC_INIT(i) { (i) } @@ -41,6 +40,8 @@ #undef __ARM64_IN_ATOMIC_IMPL +#include + /* * On ARM, ordinary assignment (str instruction) doesn't clear the local * strex/ldrex monitor on some implementations. The reason we can use it for diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 4b981ba57e78..4864158d486e 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -215,4 +215,42 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) } __LL_SC_EXPORT(atomic64_dec_if_positive); +#define __CMPXCHG_CASE(w, sz, name, mb, cl) \ +__LL_SC_INLINE unsigned long \ +__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ + unsigned long old, \ + unsigned long new)) \ +{ \ + unsigned long tmp, oldval; \ + \ + asm volatile( \ + " " #mb "\n" \ + "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ + " cbnz %" #w "[tmp], 2f\n" \ + " stxr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ + " cbnz %w[tmp], 1b\n" \ + " " #mb "\n" \ + " mov %" #w "[oldval], %" #w "[old]\n" \ + "2:" \ + : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval), \ + [v] "+Q" (*(unsigned long *)ptr) \ + : [old] "Lr" (old), [new] "r" (new) \ + : cl); \ + \ + return oldval; \ +} \ +__LL_SC_EXPORT(__cmpxchg_case_##name); + +__CMPXCHG_CASE(w, b, 1, , ) +__CMPXCHG_CASE(w, h, 2, , ) +__CMPXCHG_CASE(w, , 4, , ) +__CMPXCHG_CASE( , , 8, , ) +__CMPXCHG_CASE(w, b, mb_1, dmb ish, "memory") +__CMPXCHG_CASE(w, h, mb_2, dmb ish, "memory") +__CMPXCHG_CASE(w, , mb_4, dmb ish, "memory") +__CMPXCHG_CASE( , , mb_8, dmb ish, "memory") + +#undef __CMPXCHG_CASE + #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 6e21b5e0c9d6..b39ae4c1451a 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -349,4 +349,43 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) #undef __LL_SC_ATOMIC64 +#define __LL_SC_CMPXCHG(op) __LL_SC_CALL(__cmpxchg_case_##op) + +#define __CMPXCHG_CASE(w, sz, name, mb, cl...) \ +static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ + unsigned long old, \ + unsigned long new) \ +{ \ + register unsigned long x0 asm ("x0") = (unsigned long)ptr; \ + register unsigned long x1 asm ("x1") = old; \ + register unsigned long x2 asm ("x2") = new; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + "nop\n" \ + __LL_SC_CMPXCHG(name) \ + "nop", \ + /* LSE atomics */ \ + " mov " #w "30, %" #w "[old]\n" \ + " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \ + " mov %" #w "[ret], " #w "30") \ + : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr) \ + : [old] "r" (x1), [new] "r" (x2) \ + : "x30" , ##cl); \ + \ + return x0; \ +} + +__CMPXCHG_CASE(w, b, 1, ) +__CMPXCHG_CASE(w, h, 2, ) +__CMPXCHG_CASE(w, , 4, ) +__CMPXCHG_CASE(x, , 8, ) +__CMPXCHG_CASE(w, b, mb_1, al, "memory") +__CMPXCHG_CASE(w, h, mb_2, al, "memory") +__CMPXCHG_CASE(w, , mb_4, al, "memory") +__CMPXCHG_CASE(x, , mb_8, al, "memory") + +#undef __LL_SC_CMPXCHG +#undef __CMPXCHG_CASE + #endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index d0cce8068902..60a558127cef 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -111,74 +112,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) { - unsigned long oldval = 0, res; - switch (size) { case 1: - do { - asm volatile("// __cmpxchg1\n" - " ldxrb %w1, %2\n" - " mov %w0, #0\n" - " cmp %w1, %w3\n" - " b.ne 1f\n" - " stxrb %w0, %w4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - + return __cmpxchg_case_1(ptr, old, new); case 2: - do { - asm volatile("// __cmpxchg2\n" - " ldxrh %w1, %2\n" - " mov %w0, #0\n" - " cmp %w1, %w3\n" - " b.ne 1f\n" - " stxrh %w0, %w4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - + return __cmpxchg_case_2(ptr, old, new); case 4: - do { - asm volatile("// __cmpxchg4\n" - " ldxr %w1, %2\n" - " mov %w0, #0\n" - " cmp %w1, %w3\n" - " b.ne 1f\n" - " stxr %w0, %w4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - + return __cmpxchg_case_4(ptr, old, new); case 8: - do { - asm volatile("// __cmpxchg8\n" - " ldxr %1, %2\n" - " mov %w0, #0\n" - " cmp %1, %3\n" - " b.ne 1f\n" - " stxr %w0, %4, %2\n" - "1:\n" - : "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr) - : "Ir" (old), "r" (new) - : "cc"); - } while (res); - break; - + return __cmpxchg_case_8(ptr, old, new); default: BUILD_BUG(); } - return oldval; + unreachable(); } #define system_has_cmpxchg_double() 1 @@ -229,13 +176,20 @@ static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2, static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, unsigned long new, int size) { - unsigned long ret; - - smp_mb(); - ret = __cmpxchg(ptr, old, new, size); - smp_mb(); + switch (size) { + case 1: + return __cmpxchg_case_mb_1(ptr, old, new); + case 2: + return __cmpxchg_case_mb_2(ptr, old, new); + case 4: + return __cmpxchg_case_mb_4(ptr, old, new); + case 8: + return __cmpxchg_case_mb_8(ptr, old, new); + default: + BUILD_BUG(); + } - return ret; + unreachable(); } #define cmpxchg(ptr, o, n) \ -- cgit v1.2.3 From e9a4b795652f654a7870727e5333c1b709b8736c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 14 May 2015 18:05:50 +0100 Subject: arm64: cmpxchg_dbl: patch in lse instructions when supported by the CPU On CPUs which support the LSE atomic instructions introduced in ARMv8.1, it makes sense to use them in preference to ll/sc sequences. This patch introduces runtime patching of our cmpxchg_double primitives so that the LSE casp instruction is used instead. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_ll_sc.h | 34 ++++++++++++++++++ arch/arm64/include/asm/atomic_lse.h | 43 ++++++++++++++++++++++ arch/arm64/include/asm/cmpxchg.h | 68 +++++++++-------------------------- 3 files changed, 94 insertions(+), 51 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 4864158d486e..f89f1e4ba577 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -253,4 +253,38 @@ __CMPXCHG_CASE( , , mb_8, dmb ish, "memory") #undef __CMPXCHG_CASE +#define __CMPXCHG_DBL(name, mb, cl) \ +__LL_SC_INLINE int \ +__LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ + unsigned long old2, \ + unsigned long new1, \ + unsigned long new2, \ + volatile void *ptr)) \ +{ \ + unsigned long tmp, ret; \ + \ + asm volatile("// __cmpxchg_double" #name "\n" \ + " " #mb "\n" \ + "1: ldxp %0, %1, %2\n" \ + " eor %0, %0, %3\n" \ + " eor %1, %1, %4\n" \ + " orr %1, %0, %1\n" \ + " cbnz %1, 2f\n" \ + " stxp %w0, %5, %6, %2\n" \ + " cbnz %w0, 1b\n" \ + " " #mb "\n" \ + "2:" \ + : "=&r" (tmp), "=&r" (ret), "+Q" (*(unsigned long *)ptr) \ + : "r" (old1), "r" (old2), "r" (new1), "r" (new2) \ + : cl); \ + \ + return ret; \ +} \ +__LL_SC_EXPORT(__cmpxchg_double##name); + +__CMPXCHG_DBL( , , ) +__CMPXCHG_DBL(_mb, dmb ish, "memory") + +#undef __CMPXCHG_DBL + #endif /* __ASM_ATOMIC_LL_SC_H */ diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index b39ae4c1451a..f3cb1052ab24 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -388,4 +388,47 @@ __CMPXCHG_CASE(x, , mb_8, al, "memory") #undef __LL_SC_CMPXCHG #undef __CMPXCHG_CASE +#define __LL_SC_CMPXCHG_DBL(op) __LL_SC_CALL(__cmpxchg_double##op) + +#define __CMPXCHG_DBL(name, mb, cl...) \ +static inline int __cmpxchg_double##name(unsigned long old1, \ + unsigned long old2, \ + unsigned long new1, \ + unsigned long new2, \ + volatile void *ptr) \ +{ \ + unsigned long oldval1 = old1; \ + unsigned long oldval2 = old2; \ + register unsigned long x0 asm ("x0") = old1; \ + register unsigned long x1 asm ("x1") = old2; \ + register unsigned long x2 asm ("x2") = new1; \ + register unsigned long x3 asm ("x3") = new2; \ + register unsigned long x4 asm ("x4") = (unsigned long)ptr; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + " nop\n" \ + " nop\n" \ + __LL_SC_CMPXCHG_DBL(name), \ + /* LSE atomics */ \ + " casp" #mb "\t%[old1], %[old2], %[new1], %[new2], %[v]\n"\ + " eor %[old1], %[old1], %[oldval1]\n" \ + " eor %[old2], %[old2], %[oldval2]\n" \ + " orr %[old1], %[old1], %[old2]") \ + : [old1] "+r" (x0), [old2] "+r" (x1), \ + [v] "+Q" (*(unsigned long *)ptr) \ + : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ + [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ + : "x30" , ##cl); \ + \ + return x0; \ +} + +__CMPXCHG_DBL( , ) +__CMPXCHG_DBL(_mb, al, "memory") + +#undef __LL_SC_CMPXCHG_DBL +#undef __CMPXCHG_DBL + #endif /* __ASM_ATOMIC_LSE_H */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 60a558127cef..f70212629d02 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -128,51 +128,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, unreachable(); } -#define system_has_cmpxchg_double() 1 - -static inline int __cmpxchg_double(volatile void *ptr1, volatile void *ptr2, - unsigned long old1, unsigned long old2, - unsigned long new1, unsigned long new2, int size) -{ - unsigned long loop, lost; - - switch (size) { - case 8: - VM_BUG_ON((unsigned long *)ptr2 - (unsigned long *)ptr1 != 1); - do { - asm volatile("// __cmpxchg_double8\n" - " ldxp %0, %1, %2\n" - " eor %0, %0, %3\n" - " eor %1, %1, %4\n" - " orr %1, %0, %1\n" - " mov %w0, #0\n" - " cbnz %1, 1f\n" - " stxp %w0, %5, %6, %2\n" - "1:\n" - : "=&r"(loop), "=&r"(lost), "+Q" (*(u64 *)ptr1) - : "r" (old1), "r"(old2), "r"(new1), "r"(new2)); - } while (loop); - break; - default: - BUILD_BUG(); - } - - return !lost; -} - -static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2, - unsigned long old1, unsigned long old2, - unsigned long new1, unsigned long new2, int size) -{ - int ret; - - smp_mb(); - ret = __cmpxchg_double(ptr1, ptr2, old1, old2, new1, new2, size); - smp_mb(); - - return ret; -} - static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, unsigned long new, int size) { @@ -210,21 +165,32 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, __ret; \ }) +#define system_has_cmpxchg_double() 1 + +#define __cmpxchg_double_check(ptr1, ptr2) \ +({ \ + if (sizeof(*(ptr1)) != 8) \ + BUILD_BUG(); \ + VM_BUG_ON((unsigned long *)(ptr2) - (unsigned long *)(ptr1) != 1); \ +}) + #define cmpxchg_double(ptr1, ptr2, o1, o2, n1, n2) \ ({\ int __ret;\ - __ret = __cmpxchg_double_mb((ptr1), (ptr2), (unsigned long)(o1), \ - (unsigned long)(o2), (unsigned long)(n1), \ - (unsigned long)(n2), sizeof(*(ptr1)));\ + __cmpxchg_double_check(ptr1, ptr2); \ + __ret = !__cmpxchg_double_mb((unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2), \ + ptr1); \ __ret; \ }) #define cmpxchg_double_local(ptr1, ptr2, o1, o2, n1, n2) \ ({\ int __ret;\ - __ret = __cmpxchg_double((ptr1), (ptr2), (unsigned long)(o1), \ - (unsigned long)(o2), (unsigned long)(n1), \ - (unsigned long)(n2), sizeof(*(ptr1)));\ + __cmpxchg_double_check(ptr1, ptr2); \ + __ret = !__cmpxchg_double((unsigned long)(o1), (unsigned long)(o2), \ + (unsigned long)(n1), (unsigned long)(n2), \ + ptr1); \ __ret; \ }) -- cgit v1.2.3 From 0bc671d3f4bee9c31110d096ada0de52380e693d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2015 14:26:59 +0100 Subject: arm64: cmpxchg: avoid "cc" clobber in ll/sc routines We can perform the cmpxchg comparison using eor and cbnz which avoids the "cc" clobber for the ll/sc case and consequently for the LSE case where we may have to fall-back on the ll/sc code at runtime. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_ll_sc.h | 14 ++++++-------- arch/arm64/include/asm/atomic_lse.h | 4 ++-- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index f89f1e4ba577..c02684d1eab3 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -101,14 +101,13 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) asm volatile("// atomic_cmpxchg\n" "1: ldxr %w1, %2\n" -" cmp %w1, %w3\n" -" b.ne 2f\n" +" eor %w0, %w1, %w3\n" +" cbnz %w0, 2f\n" " stxr %w0, %w4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) - : "Ir" (old), "r" (new) - : "cc"); + : "Lr" (old), "r" (new)); smp_mb(); return oldval; @@ -179,14 +178,13 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) asm volatile("// atomic64_cmpxchg\n" "1: ldxr %1, %2\n" -" cmp %1, %3\n" -" b.ne 2f\n" +" eor %0, %1, %3\n" +" cbnz %w0, 2f\n" " stxr %w0, %4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) - : "Ir" (old), "r" (new) - : "cc"); + : "Lr" (old), "r" (new)); smp_mb(); return oldval; diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index f3cb1052ab24..a3d21e7cee4f 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -166,7 +166,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) " mov %w[ret], w30") : [ret] "+r" (x0), [v] "+Q" (ptr->counter) : [old] "r" (w1), [new] "r" (w2) - : "x30", "cc", "memory"); + : "x30", "memory"); return x0; } @@ -313,7 +313,7 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) " mov %[ret], x30") : [ret] "+r" (x0), [v] "+Q" (ptr->counter) : [old] "r" (x1), [new] "r" (x2) - : "x30", "cc", "memory"); + : "x30", "memory"); return x0; } -- cgit v1.2.3 From 4e39715f4b5cb3b44576fedb2d38aca87de3cf48 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2015 14:47:59 +0100 Subject: arm64: cmpxchg: avoid memory barrier on comparison failure cmpxchg doesn't require memory barrier semantics when the value comparison fails, so make the barrier conditional on success. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_ll_sc.h | 48 ++++++++++++++++------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index c02684d1eab3..5a9fb37272d4 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -97,19 +97,18 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) unsigned long tmp; int oldval; - smp_mb(); - asm volatile("// atomic_cmpxchg\n" "1: ldxr %w1, %2\n" " eor %w0, %w1, %w3\n" " cbnz %w0, 2f\n" -" stxr %w0, %w4, %2\n" +" stlxr %w0, %w4, %2\n" " cbnz %w0, 1b\n" +" dmb ish\n" "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) - : "Lr" (old), "r" (new)); + : "Lr" (old), "r" (new) + : "memory"); - smp_mb(); return oldval; } __LL_SC_EXPORT(atomic_cmpxchg); @@ -174,19 +173,18 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) long oldval; unsigned long res; - smp_mb(); - asm volatile("// atomic64_cmpxchg\n" "1: ldxr %1, %2\n" " eor %0, %1, %3\n" " cbnz %w0, 2f\n" -" stxr %w0, %4, %2\n" +" stlxr %w0, %4, %2\n" " cbnz %w0, 1b\n" +" dmb ish\n" "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) - : "Lr" (old), "r" (new)); + : "Lr" (old), "r" (new) + : "memory"); - smp_mb(); return oldval; } __LL_SC_EXPORT(atomic64_cmpxchg); @@ -213,7 +211,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) } __LL_SC_EXPORT(atomic64_dec_if_positive); -#define __CMPXCHG_CASE(w, sz, name, mb, cl) \ +#define __CMPXCHG_CASE(w, sz, name, mb, rel, cl) \ __LL_SC_INLINE unsigned long \ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ unsigned long old, \ @@ -222,11 +220,10 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ unsigned long tmp, oldval; \ \ asm volatile( \ - " " #mb "\n" \ "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " cbnz %" #w "[tmp], 2f\n" \ - " stxr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ + " st" #rel "xr" #sz "\t%w[tmp], %" #w "[new], %[v]\n" \ " cbnz %w[tmp], 1b\n" \ " " #mb "\n" \ " mov %" #w "[oldval], %" #w "[old]\n" \ @@ -240,18 +237,18 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ } \ __LL_SC_EXPORT(__cmpxchg_case_##name); -__CMPXCHG_CASE(w, b, 1, , ) -__CMPXCHG_CASE(w, h, 2, , ) -__CMPXCHG_CASE(w, , 4, , ) -__CMPXCHG_CASE( , , 8, , ) -__CMPXCHG_CASE(w, b, mb_1, dmb ish, "memory") -__CMPXCHG_CASE(w, h, mb_2, dmb ish, "memory") -__CMPXCHG_CASE(w, , mb_4, dmb ish, "memory") -__CMPXCHG_CASE( , , mb_8, dmb ish, "memory") +__CMPXCHG_CASE(w, b, 1, , , ) +__CMPXCHG_CASE(w, h, 2, , , ) +__CMPXCHG_CASE(w, , 4, , , ) +__CMPXCHG_CASE( , , 8, , , ) +__CMPXCHG_CASE(w, b, mb_1, dmb ish, l, "memory") +__CMPXCHG_CASE(w, h, mb_2, dmb ish, l, "memory") +__CMPXCHG_CASE(w, , mb_4, dmb ish, l, "memory") +__CMPXCHG_CASE( , , mb_8, dmb ish, l, "memory") #undef __CMPXCHG_CASE -#define __CMPXCHG_DBL(name, mb, cl) \ +#define __CMPXCHG_DBL(name, mb, rel, cl) \ __LL_SC_INLINE int \ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ unsigned long old2, \ @@ -262,13 +259,12 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ unsigned long tmp, ret; \ \ asm volatile("// __cmpxchg_double" #name "\n" \ - " " #mb "\n" \ "1: ldxp %0, %1, %2\n" \ " eor %0, %0, %3\n" \ " eor %1, %1, %4\n" \ " orr %1, %0, %1\n" \ " cbnz %1, 2f\n" \ - " stxp %w0, %5, %6, %2\n" \ + " st" #rel "xp %w0, %5, %6, %2\n" \ " cbnz %w0, 1b\n" \ " " #mb "\n" \ "2:" \ @@ -280,8 +276,8 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ } \ __LL_SC_EXPORT(__cmpxchg_double##name); -__CMPXCHG_DBL( , , ) -__CMPXCHG_DBL(_mb, dmb ish, "memory") +__CMPXCHG_DBL( , , , ) +__CMPXCHG_DBL(_mb, dmb ish, l, "memory") #undef __CMPXCHG_DBL -- cgit v1.2.3 From a82e62382fcbbf5c3348e802af73583e0cac39c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 4 Jun 2015 16:41:36 +0100 Subject: arm64: atomics: tidy up common atomic{,64}_* macros The common (i.e. identical for ll/sc and lse) atomic macros in atomic.h are needlessley different for atomic_t and atomic64_t. This patch tidies up the definitions to make them consistent across the two atomic types and factors out common code such as the add_unless implementation based on cmpxchg. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic.h | 99 +++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 59 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 1fe8f209aeb4..0b26da365f3b 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -26,8 +26,6 @@ #include #include -#define ATOMIC_INIT(i) { (i) } - #ifdef __KERNEL__ #define __ARM64_IN_ATOMIC_IMPL @@ -42,71 +40,54 @@ #include -/* - * On ARM, ordinary assignment (str instruction) doesn't clear the local - * strex/ldrex monitor on some implementations. The reason we can use it for - * atomic_set() is the clrex or dummy strex done on every exception return. - */ -#define atomic_read(v) ACCESS_ONCE((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) - -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) - -static inline int __atomic_add_unless(atomic_t *v, int a, int u) -{ - int c, old; - - c = atomic_read(v); - while (c != u && (old = atomic_cmpxchg((v), c, c + a)) != c) - c = old; - return c; -} - -#define atomic_inc(v) atomic_add(1, v) -#define atomic_dec(v) atomic_sub(1, v) +#define ___atomic_add_unless(v, a, u, sfx) \ +({ \ + typeof((v)->counter) c, old; \ + \ + c = atomic##sfx##_read(v); \ + while (c != (u) && \ + (old = atomic##sfx##_cmpxchg((v), c, c + (a))) != c) \ + c = old; \ + c; \ + }) -#define atomic_inc_and_test(v) (atomic_add_return(1, v) == 0) -#define atomic_dec_and_test(v) (atomic_sub_return(1, v) == 0) -#define atomic_inc_return(v) (atomic_add_return(1, v)) -#define atomic_dec_return(v) (atomic_sub_return(1, v)) -#define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0) - -#define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0) +#define ATOMIC_INIT(i) { (i) } -#define atomic_andnot atomic_andnot +#define atomic_read(v) READ_ONCE((v)->counter) +#define atomic_set(v, i) (((v)->counter) = (i)) +#define atomic_xchg(v, new) xchg(&((v)->counter), (new)) + +#define atomic_inc(v) atomic_add(1, (v)) +#define atomic_dec(v) atomic_sub(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) +#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) +#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) +#define atomic_add_negative(i, v) (atomic_add_return((i), (v)) < 0) +#define __atomic_add_unless(v, a, u) ___atomic_add_unless(v, a, u,) +#define atomic_andnot atomic_andnot /* * 64-bit atomic operations. */ -#define ATOMIC64_INIT(i) { (i) } - -#define atomic64_read(v) ACCESS_ONCE((v)->counter) -#define atomic64_set(v,i) (((v)->counter) = (i)) - -#define atomic64_xchg(v, new) (xchg(&((v)->counter), new)) - -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) -{ - long c, old; - - c = atomic64_read(v); - while (c != u && (old = atomic64_cmpxchg((v), c, c + a)) != c) - c = old; - - return c != u; -} - -#define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) -#define atomic64_inc(v) atomic64_add(1LL, (v)) -#define atomic64_inc_return(v) atomic64_add_return(1LL, (v)) +#define ATOMIC64_INIT ATOMIC_INIT +#define atomic64_read atomic_read +#define atomic64_set atomic_set +#define atomic64_xchg atomic_xchg + +#define atomic64_inc(v) atomic64_add(1, (v)) +#define atomic64_dec(v) atomic64_sub(1, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) #define atomic64_inc_and_test(v) (atomic64_inc_return(v) == 0) -#define atomic64_sub_and_test(a, v) (atomic64_sub_return((a), (v)) == 0) -#define atomic64_dec(v) atomic64_sub(1LL, (v)) -#define atomic64_dec_return(v) atomic64_sub_return(1LL, (v)) -#define atomic64_dec_and_test(v) (atomic64_dec_return((v)) == 0) -#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1LL, 0LL) +#define atomic64_dec_and_test(v) (atomic64_dec_return(v) == 0) +#define atomic64_sub_and_test(i, v) (atomic64_sub_return((i), (v)) == 0) +#define atomic64_add_negative(i, v) (atomic64_add_return((i), (v)) < 0) +#define atomic64_add_unless(v, a, u) (___atomic_add_unless(v, a, u, 64) != u) +#define atomic64_andnot atomic64_andnot -#define atomic64_andnot atomic64_andnot +#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) #endif #endif -- cgit v1.2.3 From 0ea366f5e1b6413a6095dce60ea49ae51e468b61 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2015 13:31:10 +0100 Subject: arm64: atomics: prefetch the destination word for write prior to stxr The cost of changing a cacheline from shared to exclusive state can be significant, especially when this is triggered by an exclusive store, since it may result in having to retry the transaction. This patch makes use of prfm to prefetch cachelines for write prior to ldxr/stxr loops when using the ll/sc atomic routines. Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_ll_sc.h | 9 +++++++++ arch/arm64/include/asm/cmpxchg.h | 8 ++++++++ arch/arm64/include/asm/futex.h | 2 ++ arch/arm64/lib/bitops.S | 2 ++ 4 files changed, 21 insertions(+) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 5a9fb37272d4..50d6abd3c439 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -45,6 +45,7 @@ __LL_SC_PREFIX(atomic_##op(int i, atomic_t *v)) \ int result; \ \ asm volatile("// atomic_" #op "\n" \ +" prfm pstl1strm, %2\n" \ "1: ldxr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " stxr %w1, %w0, %2\n" \ @@ -62,6 +63,7 @@ __LL_SC_PREFIX(atomic_##op##_return(int i, atomic_t *v)) \ int result; \ \ asm volatile("// atomic_" #op "_return\n" \ +" prfm pstl1strm, %2\n" \ "1: ldxr %w0, %2\n" \ " " #asm_op " %w0, %w0, %w3\n" \ " stlxr %w1, %w0, %2\n" \ @@ -98,6 +100,7 @@ __LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) int oldval; asm volatile("// atomic_cmpxchg\n" +" prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " eor %w0, %w1, %w3\n" " cbnz %w0, 2f\n" @@ -121,6 +124,7 @@ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "\n" \ +" prfm pstl1strm, %2\n" \ "1: ldxr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " stxr %w1, %0, %2\n" \ @@ -138,6 +142,7 @@ __LL_SC_PREFIX(atomic64_##op##_return(long i, atomic64_t *v)) \ unsigned long tmp; \ \ asm volatile("// atomic64_" #op "_return\n" \ +" prfm pstl1strm, %2\n" \ "1: ldxr %0, %2\n" \ " " #asm_op " %0, %0, %3\n" \ " stlxr %w1, %0, %2\n" \ @@ -174,6 +179,7 @@ __LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) unsigned long res; asm volatile("// atomic64_cmpxchg\n" +" prfm pstl1strm, %2\n" "1: ldxr %1, %2\n" " eor %0, %1, %3\n" " cbnz %w0, 2f\n" @@ -196,6 +202,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" +" prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " subs %0, %0, #1\n" " b.mi 2f\n" @@ -220,6 +227,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ unsigned long tmp, oldval; \ \ asm volatile( \ + " prfm pstl1strm, %2\n" \ "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " cbnz %" #w "[tmp], 2f\n" \ @@ -259,6 +267,7 @@ __LL_SC_PREFIX(__cmpxchg_double##name(unsigned long old1, \ unsigned long tmp, ret; \ \ asm volatile("// __cmpxchg_double" #name "\n" \ + " prfm pstl1strm, %2\n" \ "1: ldxp %0, %1, %2\n" \ " eor %0, %0, %3\n" \ " eor %1, %1, %4\n" \ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index f70212629d02..7bfda0944c9b 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -33,12 +33,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size case 1: asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ + " prfm pstl1strm, %2\n" "1: ldxrb %w0, %2\n" " stlxrb %w1, %w3, %2\n" " cbnz %w1, 1b\n" " dmb ish", /* LSE atomics */ " nop\n" + " nop\n" " swpalb %w3, %w0, %2\n" " nop\n" " nop") @@ -49,12 +51,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size case 2: asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ + " prfm pstl1strm, %2\n" "1: ldxrh %w0, %2\n" " stlxrh %w1, %w3, %2\n" " cbnz %w1, 1b\n" " dmb ish", /* LSE atomics */ " nop\n" + " nop\n" " swpalh %w3, %w0, %2\n" " nop\n" " nop") @@ -65,12 +69,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size case 4: asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ + " prfm pstl1strm, %2\n" "1: ldxr %w0, %2\n" " stlxr %w1, %w3, %2\n" " cbnz %w1, 1b\n" " dmb ish", /* LSE atomics */ " nop\n" + " nop\n" " swpal %w3, %w0, %2\n" " nop\n" " nop") @@ -81,12 +87,14 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size case 8: asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ + " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " stlxr %w1, %3, %2\n" " cbnz %w1, 1b\n" " dmb ish", /* LSE atomics */ " nop\n" + " nop\n" " swpal %3, %0, %2\n" " nop\n" " nop") diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 775e85b9d1f2..007a69fc4f40 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -30,6 +30,7 @@ asm volatile( \ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ +" prfm pstl1strm, %2\n" \ "1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ @@ -120,6 +121,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" +" prfm pstl1strm, %2\n" "1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" " cbnz %w3, 3f\n" diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S index bc18457c2bba..43ac736baa5b 100644 --- a/arch/arm64/lib/bitops.S +++ b/arch/arm64/lib/bitops.S @@ -31,6 +31,7 @@ ENTRY( \name ) eor w0, w0, w3 // Clear low bits mov x2, #1 add x1, x1, x0, lsr #3 // Get word offset +alt_lse " prfm pstl1strm, [x1]", "nop" lsl x3, x2, x3 // Create mask alt_lse "1: ldxr x2, [x1]", "\lse x3, [x1]" @@ -48,6 +49,7 @@ ENTRY( \name ) eor w0, w0, w3 // Clear low bits mov x2, #1 add x1, x1, x0, lsr #3 // Get word offset +alt_lse " prfm pstl1strm, [x1]", "nop" lsl x4, x2, x3 // Create mask alt_lse "1: ldxr x2, [x1]", "\lse x4, x2, [x1]" -- cgit v1.2.3 From 6059a7b6e818023436a9058170a4fea1c670dc98 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 4 Jun 2015 17:46:37 +0100 Subject: arm64: atomics: implement atomic{,64}_cmpxchg using cmpxchg We don't need duplicate cmpxchg implementations, so use cmpxchg to implement atomic{,64}_cmpxchg, like we do for xchg already. Reviewed-by: Steve Capper Reviewed-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic.h | 2 ++ arch/arm64/include/asm/atomic_ll_sc.h | 46 ----------------------------------- arch/arm64/include/asm/atomic_lse.h | 43 -------------------------------- 3 files changed, 2 insertions(+), 89 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 0b26da365f3b..35a67783cfa0 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -56,6 +56,7 @@ #define atomic_read(v) READ_ONCE((v)->counter) #define atomic_set(v, i) (((v)->counter) = (i)) #define atomic_xchg(v, new) xchg(&((v)->counter), (new)) +#define atomic_cmpxchg(v, old, new) cmpxchg(&((v)->counter), (old), (new)) #define atomic_inc(v) atomic_add(1, (v)) #define atomic_dec(v) atomic_sub(1, (v)) @@ -75,6 +76,7 @@ #define atomic64_read atomic_read #define atomic64_set atomic_set #define atomic64_xchg atomic_xchg +#define atomic64_cmpxchg atomic_cmpxchg #define atomic64_inc(v) atomic64_add(1, (v)) #define atomic64_dec(v) atomic64_sub(1, (v)) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 50d6abd3c439..5e2d1db3a1db 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -93,29 +93,6 @@ ATOMIC_OP(xor, eor) #undef ATOMIC_OP_RETURN #undef ATOMIC_OP -__LL_SC_INLINE int -__LL_SC_PREFIX(atomic_cmpxchg(atomic_t *ptr, int old, int new)) -{ - unsigned long tmp; - int oldval; - - asm volatile("// atomic_cmpxchg\n" -" prfm pstl1strm, %2\n" -"1: ldxr %w1, %2\n" -" eor %w0, %w1, %w3\n" -" cbnz %w0, 2f\n" -" stlxr %w0, %w4, %2\n" -" cbnz %w0, 1b\n" -" dmb ish\n" -"2:" - : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) - : "Lr" (old), "r" (new) - : "memory"); - - return oldval; -} -__LL_SC_EXPORT(atomic_cmpxchg); - #define ATOMIC64_OP(op, asm_op) \ __LL_SC_INLINE void \ __LL_SC_PREFIX(atomic64_##op(long i, atomic64_t *v)) \ @@ -172,29 +149,6 @@ ATOMIC64_OP(xor, eor) #undef ATOMIC64_OP_RETURN #undef ATOMIC64_OP -__LL_SC_INLINE long -__LL_SC_PREFIX(atomic64_cmpxchg(atomic64_t *ptr, long old, long new)) -{ - long oldval; - unsigned long res; - - asm volatile("// atomic64_cmpxchg\n" -" prfm pstl1strm, %2\n" -"1: ldxr %1, %2\n" -" eor %0, %1, %3\n" -" cbnz %w0, 2f\n" -" stlxr %w0, %4, %2\n" -" cbnz %w0, 1b\n" -" dmb ish\n" -"2:" - : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) - : "Lr" (old), "r" (new) - : "memory"); - - return oldval; -} -__LL_SC_EXPORT(atomic64_cmpxchg); - __LL_SC_INLINE long __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) { diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index a3d21e7cee4f..30e5cbcfc707 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -149,28 +149,6 @@ static inline int atomic_sub_return(int i, atomic_t *v) return w0; } -static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) -{ - register unsigned long x0 asm ("x0") = (unsigned long)ptr; - register int w1 asm ("w1") = old; - register int w2 asm ("w2") = new; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC(cmpxchg) - " nop", - /* LSE atomics */ - " mov w30, %w[old]\n" - " casal w30, %w[new], %[v]\n" - " mov %w[ret], w30") - : [ret] "+r" (x0), [v] "+Q" (ptr->counter) - : [old] "r" (w1), [new] "r" (w2) - : "x30", "memory"); - - return x0; -} - #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) @@ -296,27 +274,6 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) return x0; } -static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) -{ - register unsigned long x0 asm ("x0") = (unsigned long)ptr; - register long x1 asm ("x1") = old; - register long x2 asm ("x2") = new; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " nop\n" - __LL_SC_ATOMIC64(cmpxchg) - " nop", - /* LSE atomics */ - " mov x30, %[old]\n" - " casal x30, %[new], %[v]\n" - " mov %[ret], x30") - : [ret] "+r" (x0), [v] "+Q" (ptr->counter) - : [old] "r" (x1), [new] "r" (x2) - : "x30", "memory"); - - return x0; -} static inline long atomic64_dec_if_positive(atomic64_t *v) { -- cgit v1.2.3 From db26217e6f54647b137a0fe1f2ab346de67d6f3e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2015 14:44:06 +0100 Subject: arm64: atomic64_dec_if_positive: fix incorrect branch condition If we attempt to atomic64_dec_if_positive on INT_MIN, we will underflow and incorrectly decide that the original parameter was positive. This patches fixes the broken condition code so that we handle this corner case correctly. Reviewed-by: Steve Capper Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_ll_sc.h | 2 +- arch/arm64/include/asm/atomic_lse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 5e2d1db3a1db..6671978e60fe 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -159,7 +159,7 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v)) " prfm pstl1strm, %2\n" "1: ldxr %0, %2\n" " subs %0, %0, #1\n" -" b.mi 2f\n" +" b.lt 2f\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b\n" " dmb ish\n" diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 30e5cbcfc707..f873bf61e17b 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -291,7 +291,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) /* LSE atomics */ "1: ldr x30, %[v]\n" " subs %[ret], x30, #1\n" - " b.mi 2f\n" + " b.lt 2f\n" " casal x30, %[ret], %[v]\n" " sub x30, x30, #1\n" " sub x30, x30, %[ret]\n" -- cgit v1.2.3 From 95eff6b27c40c7205f72f354712c3687d808e7f6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2015 14:57:47 +0100 Subject: arm64: kconfig: select HAVE_CMPXCHG_LOCAL We implement an optimised cmpxchg_local macro, so let the kernel know. Reviewed-by: Steve Capper Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8dabffa82ef8..e82b98dfc028 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -53,6 +53,7 @@ config ARM64 select HAVE_C_RECORDMCOUNT select HAVE_CC_STACKPROTECTOR select HAVE_CMPXCHG_DOUBLE + select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG -- cgit v1.2.3 From c739dc83a0b6db01abfbcc5246a30c7a575e4272 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jul 2015 14:11:55 +0100 Subject: arm64: lse: rename ARM64_CPU_FEAT_LSE_ATOMICS for consistency Other CPU features follow an 'ARM64_HAS_*' naming scheme, so do the same for the LSE atomics. Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpufeature.h | 2 +- arch/arm64/include/asm/lse.h | 4 ++-- arch/arm64/kernel/setup.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index d9262d4b4dcd..171570702bb8 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -26,7 +26,7 @@ #define ARM64_WORKAROUND_845719 2 #define ARM64_HAS_SYSREG_GIC_CPUIF 3 #define ARM64_HAS_PAN 4 -#define ARM64_CPU_FEAT_LSE_ATOMICS 5 +#define ARM64_HAS_LSE_ATOMICS 5 #define ARM64_NCAPS 6 diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index fb3ac56a2cc0..3de42d68611d 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -12,7 +12,7 @@ .arch_extension lse .macro alt_lse, llsc, lse - alternative_insn "\llsc", "\lse", ARM64_CPU_FEAT_LSE_ATOMICS + alternative_insn "\llsc", "\lse", ARM64_HAS_LSE_ATOMICS .endm #else /* __ASSEMBLER__ */ @@ -29,7 +29,7 @@ __asm__(".arch_extension lse"); /* In-line patching at runtime */ #define ARM64_LSE_ATOMIC_INSN(llsc, lse) \ - ALTERNATIVE(llsc, lse, ARM64_CPU_FEAT_LSE_ATOMICS) + ALTERNATIVE(llsc, lse, ARM64_HAS_LSE_ATOMICS) #endif /* __ASSEMBLER__ */ #else /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 97785c01acbf..82ae8429baf2 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -284,7 +284,7 @@ static void __init setup_processor(void) default: case 2: elf_hwcap |= HWCAP_ATOMICS; - cpus_set_cap(ARM64_CPU_FEAT_LSE_ATOMICS); + cpus_set_cap(ARM64_HAS_LSE_ATOMICS); if (IS_ENABLED(CONFIG_AS_LSE) && IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS)) pr_info("LSE atomics supported\n"); -- cgit v1.2.3 From 0e4a07092fc833dc3d972a3394398aef68217c13 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jul 2015 15:54:13 +0100 Subject: arm64: kconfig: group the v8.1 features together ARMv8 CPUs do not support any of the v8.1 features, so group them together in Kconfig to make it clear that they're part of 8.1 and not relevant to older cores. Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 90 ++++++++++++++++++++++++++++-------------------------- 1 file changed, 47 insertions(+), 43 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index e82b98dfc028..371a987b3c11 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -481,23 +481,6 @@ config ARM64_VA_BITS default 42 if ARM64_VA_BITS_42 default 48 if ARM64_VA_BITS_48 -config ARM64_HW_AFDBM - bool "Support for hardware updates of the Access and Dirty page flags" - default y - help - The ARMv8.1 architecture extensions introduce support for - hardware updates of the access and dirty information in page - table entries. When enabled in TCR_EL1 (HA and HD bits) on - capable processors, accesses to pages with PTE_AF cleared will - set this bit instead of raising an access flag fault. - Similarly, writes to read-only pages with the DBM bit set will - clear the read-only bit (AP[2]) instead of raising a - permission fault. - - Kernels built with this configuration option enabled continue - to work on pre-ARMv8.1 hardware and the performance impact is - minimal. If unsure, say Y. - config CPU_BIG_ENDIAN bool "Build big-endian kernel" help @@ -605,32 +588,6 @@ config FORCE_MAX_ZONEORDER default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE) default "11" -config ARM64_PAN - bool "Enable support for Privileged Access Never (PAN)" - default y - help - Privileged Access Never (PAN; part of the ARMv8.1 Extensions) - prevents the kernel or hypervisor from accessing user-space (EL0) - memory directly. - - Choosing this option will cause any unprotected (not using - copy_to_user et al) memory access to fail with a permission fault. - - The feature is detected at runtime, and will remain as a 'nop' - instruction if the cpu does not implement the feature. - -config ARM64_LSE_ATOMICS - bool "ARMv8.1 atomic instructions" - help - As part of the Large System Extensions, ARMv8.1 introduces new - atomic instructions that are designed specifically to scale in - very large systems. - - Say Y here to make use of these instructions for the in-kernel - atomic routines. This incurs a small overhead on CPUs that do - not support these instructions and requires the kernel to be - built with binutils >= 2.25. - menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT @@ -698,6 +655,53 @@ config SETEND_EMULATION If unsure, say Y endif +menu "ARMv8.1 architectural features" + +config ARM64_HW_AFDBM + bool "Support for hardware updates of the Access and Dirty page flags" + default y + help + The ARMv8.1 architecture extensions introduce support for + hardware updates of the access and dirty information in page + table entries. When enabled in TCR_EL1 (HA and HD bits) on + capable processors, accesses to pages with PTE_AF cleared will + set this bit instead of raising an access flag fault. + Similarly, writes to read-only pages with the DBM bit set will + clear the read-only bit (AP[2]) instead of raising a + permission fault. + + Kernels built with this configuration option enabled continue + to work on pre-ARMv8.1 hardware and the performance impact is + minimal. If unsure, say Y. + +config ARM64_PAN + bool "Enable support for Privileged Access Never (PAN)" + default y + help + Privileged Access Never (PAN; part of the ARMv8.1 Extensions) + prevents the kernel or hypervisor from accessing user-space (EL0) + memory directly. + + Choosing this option will cause any unprotected (not using + copy_to_user et al) memory access to fail with a permission fault. + + The feature is detected at runtime, and will remain as a 'nop' + instruction if the cpu does not implement the feature. + +config ARM64_LSE_ATOMICS + bool "Atomic instructions" + help + As part of the Large System Extensions, ARMv8.1 introduces new + atomic instructions that are designed specifically to scale in + very large systems. + + Say Y here to make use of these instructions for the in-kernel + atomic routines. This incurs a small overhead on CPUs that do + not support these instructions and requires the kernel to be + built with binutils >= 2.25. + +endmenu + endmenu menu "Boot options" -- cgit v1.2.3 From 2e94da13790336eb3fd00fb5e97610dd9aebe213 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jul 2015 16:23:58 +0100 Subject: arm64: lse: use generic cpufeature detection for LSE atomics Rework the cpufeature detection to support ISAR0 and use that for detecting the presence of LSE atomics. Signed-off-by: Will Deacon --- arch/arm64/kernel/cpufeature.c | 37 +++++++++++++++++++++---------------- arch/arm64/kernel/setup.c | 4 ---- 2 files changed, 21 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 978fa169d3c3..3c9aed32f70b 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -31,23 +31,19 @@ feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) return val >= entry->min_field_value; } -static bool -has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry) -{ - u64 val; - - val = read_cpuid(id_aa64pfr0_el1); - return feature_matches(val, entry); +#define __ID_FEAT_CHK(reg) \ +static bool __maybe_unused \ +has_##reg##_feature(const struct arm64_cpu_capabilities *entry) \ +{ \ + u64 val; \ + \ + val = read_cpuid(reg##_el1); \ + return feature_matches(val, entry); \ } -static bool __maybe_unused -has_id_aa64mmfr1_feature(const struct arm64_cpu_capabilities *entry) -{ - u64 val; - - val = read_cpuid(id_aa64mmfr1_el1); - return feature_matches(val, entry); -} +__ID_FEAT_CHK(id_aa64pfr0); +__ID_FEAT_CHK(id_aa64mmfr1); +__ID_FEAT_CHK(id_aa64isar0); static const struct arm64_cpu_capabilities arm64_features[] = { { @@ -67,6 +63,15 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .enable = cpu_enable_pan, }, #endif /* CONFIG_ARM64_PAN */ +#if defined(CONFIG_AS_LSE) && defined(CONFIG_ARM64_LSE_ATOMICS) + { + .desc = "LSE atomic instructions", + .capability = ARM64_HAS_LSE_ATOMICS, + .matches = has_id_aa64isar0_feature, + .field_pos = 20, + .min_field_value = 2, + }, +#endif /* CONFIG_AS_LSE && CONFIG_ARM64_LSE_ATOMICS */ {}, }; @@ -93,5 +98,5 @@ void check_cpu_capabilities(const struct arm64_cpu_capabilities *caps, void check_local_cpu_features(void) { - check_cpu_capabilities(arm64_features, "detected feature"); + check_cpu_capabilities(arm64_features, "detected feature:"); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 82ae8429baf2..b2f9895ecf7b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -284,10 +284,6 @@ static void __init setup_processor(void) default: case 2: elf_hwcap |= HWCAP_ATOMICS; - cpus_set_cap(ARM64_HAS_LSE_ATOMICS); - if (IS_ENABLED(CONFIG_AS_LSE) && - IS_ENABLED(CONFIG_ARM64_LSE_ATOMICS)) - pr_info("LSE atomics supported\n"); case 1: /* RESERVED */ case 0: -- cgit v1.2.3 From 309585b0b931b291d0525b2830161ee76a2f23ff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jul 2015 16:55:32 +0100 Subject: arm64: elf: use cpuid_feature_extract_field for hwcap detection cpuid_feature_extract_field takes care of the fiddly ID register field sign-extension, so use that instead of rolling our own version. Signed-off-by: Will Deacon --- arch/arm64/kernel/setup.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b2f9895ecf7b..be65ecc89e82 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -223,7 +223,8 @@ void __init up_late_init(void) static void __init setup_processor(void) { - u64 features, block; + u64 features; + s64 block; u32 cwg; int cls; @@ -253,8 +254,8 @@ static void __init setup_processor(void) * for non-negative values. Negative values are reserved. */ features = read_cpuid(ID_AA64ISAR0_EL1); - block = (features >> 4) & 0xf; - if (!(block & 0x8)) { + block = cpuid_feature_extract_field(features, 4); + if (block > 0) { switch (block) { default: case 2: @@ -266,20 +267,17 @@ static void __init setup_processor(void) } } - block = (features >> 8) & 0xf; - if (block && !(block & 0x8)) + if (cpuid_feature_extract_field(features, 8) > 0) elf_hwcap |= HWCAP_SHA1; - block = (features >> 12) & 0xf; - if (block && !(block & 0x8)) + if (cpuid_feature_extract_field(features, 12) > 0) elf_hwcap |= HWCAP_SHA2; - block = (features >> 16) & 0xf; - if (block && !(block & 0x8)) + if (cpuid_feature_extract_field(features, 16) > 0) elf_hwcap |= HWCAP_CRC32; - block = (features >> 20) & 0xf; - if (!(block & 0x8)) { + block = cpuid_feature_extract_field(features, 20); + if (block > 0) { switch (block) { default: case 2: @@ -294,11 +292,11 @@ static void __init setup_processor(void) #ifdef CONFIG_COMPAT /* * ID_ISAR5_EL1 carries similar information as above, but pertaining to - * the Aarch32 32-bit execution state. + * the AArch32 32-bit execution state. */ features = read_cpuid(ID_ISAR5_EL1); - block = (features >> 4) & 0xf; - if (!(block & 0x8)) { + block = cpuid_feature_extract_field(features, 4); + if (block > 0) { switch (block) { default: case 2: @@ -310,16 +308,13 @@ static void __init setup_processor(void) } } - block = (features >> 8) & 0xf; - if (block && !(block & 0x8)) + if (cpuid_feature_extract_field(features, 8) > 0) compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; - block = (features >> 12) & 0xf; - if (block && !(block & 0x8)) + if (cpuid_feature_extract_field(features, 12) > 0) compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; - block = (features >> 16) & 0xf; - if (block && !(block & 0x8)) + if (cpuid_feature_extract_field(features, 16) > 0) compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; #endif } -- cgit v1.2.3 From 662ba3dbceca3ca284885a464ecb8c936f417003 Mon Sep 17 00:00:00 2001 From: Wang Long Date: Mon, 27 Jul 2015 03:32:53 +0100 Subject: arm64: mm: add __init section marker to free_initrd_mem It is not needed after booting, this patch moves the free_initrd_mem() function to the __init section. This patch also make keep_initrd __initdata, to reduce kernel size. Signed-off-by: Wang Long Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index ad87ce826cce..f5c0680d17d9 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -358,9 +358,9 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD -static int keep_initrd; +static int keep_initrd __initdata; -void free_initrd_mem(unsigned long start, unsigned long end) +void __init free_initrd_mem(unsigned long start, unsigned long end) { if (!keep_initrd) free_reserved_area((void *)start, (void *)end, 0, "initrd"); -- cgit v1.2.3 From 6f883d10a18b971a3646303d12ef75138dfd31f9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 27 Jul 2015 18:36:54 +0100 Subject: arm64: debug: rename enum debug_el to avoid symbol collision lib/list_sort.c defines a 'struct debug_el', where "el" is assumedly a a contraction of "element". This conflicts with 'enum debug_el' in our asm/debug-monitors.h header file, where "el" stands for Exception Level. The result is build failure when targetting allmodconfig, so rename our enum to 'dbg_active_el' to be slightly more explicit about what it is. Signed-off-by: Will Deacon --- arch/arm64/include/asm/debug-monitors.h | 6 +++--- arch/arm64/kernel/debug-monitors.c | 4 ++-- arch/arm64/kernel/hw_breakpoint.c | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index e3f2bad788c9..279c85b5ec09 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -115,13 +115,13 @@ void unregister_break_hook(struct break_hook *hook); u8 debug_monitors_arch(void); -enum debug_el { +enum dbg_active_el { DBG_ACTIVE_EL0 = 0, DBG_ACTIVE_EL1, }; -void enable_debug_monitors(enum debug_el el); -void disable_debug_monitors(enum debug_el el); +void enable_debug_monitors(enum dbg_active_el el); +void disable_debug_monitors(enum dbg_active_el el); void user_rewind_single_step(struct task_struct *task); void user_fastforward_single_step(struct task_struct *task); diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index b056369fd47d..9b3b62ac9c24 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -82,7 +82,7 @@ early_param("nodebugmon", early_debug_disable); static DEFINE_PER_CPU(int, mde_ref_count); static DEFINE_PER_CPU(int, kde_ref_count); -void enable_debug_monitors(enum debug_el el) +void enable_debug_monitors(enum dbg_active_el el) { u32 mdscr, enable = 0; @@ -102,7 +102,7 @@ void enable_debug_monitors(enum debug_el el) } } -void disable_debug_monitors(enum debug_el el) +void disable_debug_monitors(enum dbg_active_el el) { u32 mdscr, disable = 0; diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 7a1a5da6c8c1..003bc3d50636 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -156,7 +156,7 @@ static void write_wb_reg(int reg, int n, u64 val) * Convert a breakpoint privilege level to the corresponding exception * level. */ -static enum debug_el debug_exception_level(int privilege) +static enum dbg_active_el debug_exception_level(int privilege) { switch (privilege) { case AARCH64_BREAKPOINT_EL0: @@ -230,7 +230,7 @@ static int hw_breakpoint_control(struct perf_event *bp, struct perf_event **slots; struct debug_info *debug_info = ¤t->thread.debug; int i, max_slots, ctrl_reg, val_reg, reg_enable; - enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege); + enum dbg_active_el dbg_el = debug_exception_level(info->ctrl.privilege); u32 ctrl; if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { @@ -537,7 +537,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) * exception level at the register level. * This is used when single-stepping after a breakpoint exception. */ -static void toggle_bp_registers(int reg, enum debug_el el, int enable) +static void toggle_bp_registers(int reg, enum dbg_active_el el, int enable) { int i, max_slots, privilege; u32 ctrl; -- cgit v1.2.3 From c53e0baa6f34b7051790e0fba9d782ec4efe58bd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 28 Jul 2015 10:31:06 +0100 Subject: arm64: mm: mark create_mapping as __init Currently create_mapping is marked with __ref, apparently because it refers to early_alloc. However, create_mapping has no logic to prevent erroneous use of early_alloc after it has been freed, and is only ever called by __init functions anyway. Thus the __ref marker is misleading and unnecessary. Instead, this patch marks create_mapping as __init, resulting in warnings if it is used from a a non __init functions, and allowing its memory to be reclaimed. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 63012fed46fc..9211b8527f25 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -267,7 +267,7 @@ static void *late_alloc(unsigned long size) return ptr; } -static void __ref create_mapping(phys_addr_t phys, unsigned long virt, +static void __init create_mapping(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot) { if (virt < VMALLOC_START) { -- cgit v1.2.3 From da4e73303e448aa23b36249a85e239ca118ce941 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 24 Jul 2015 09:59:55 +0100 Subject: arm64: Clean up __flush_tlb(_kernel)_range functions This patch moves the MAX_TLB_RANGE check into the flush_tlb(_kernel)_range functions directly to avoid the undescore-prefixed definitions (and for consistency with a subsequent patch). Signed-off-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlbflush.h | 47 +++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 7fedfa787a64..e972bf456558 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -91,11 +91,23 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } -static inline void __flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +/* + * This is meant to avoid soft lock-ups on large TLB flushing ranges and not + * necessarily a performance improvement. + */ +#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_mm(vma->vm_mm); + return; + } + start = asid | (start >> 12); end = asid | (end >> 12); @@ -105,9 +117,15 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } -static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; + + if ((end - start) > MAX_TLB_RANGE) { + flush_tlb_all(); + return; + } + start >>= 12; end >>= 12; @@ -118,29 +136,6 @@ static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long e isb(); } -/* - * This is meant to avoid soft lock-ups on large TLB flushing ranges and not - * necessarily a performance improvement. - */ -#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_range(vma, start, end); - else - flush_tlb_mm(vma->vm_mm); -} - -static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - if ((end - start) <= MAX_TLB_RANGE) - __flush_tlb_kernel_range(start, end); - else - flush_tlb_all(); -} - /* * Used to invalidate the TLB (walk caches) corresponding to intermediate page * table levels (pgd/pud/pmd). -- cgit v1.2.3 From 4150e50bf5f2171fbe7dfdbc7f2cdf44676b79a4 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 18 Mar 2015 11:28:06 +0000 Subject: arm64: Use last level TLBI for user pte changes The flush_tlb_page() function is used on user address ranges when PTEs (or PMDs/PUDs for huge pages) were changed (attributes or clearing). For such cases, it is more efficient to invalidate only the last level of the TLB with the "tlbi vale1is" instruction. In the TLB shoot-down case, the TLB caching of the intermediate page table levels (pmd, pud, pgd) is handled by __flush_tlb_pgtable() via the __(pte|pmd|pud)_free_tlb() functions and it is not deferred to tlb_finish_mmu() (as of commit 285994a62c80 - "arm64: Invalidate the TLB corresponding to intermediate page table levels"). The tlb_flush() function only needs to invalidate the TLB for the last level of page tables; the __flush_tlb_range() function gains a fourth argument for last level TLBI. Signed-off-by: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/tlb.h | 7 ++++++- arch/arm64/include/asm/tlbflush.h | 21 ++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 3a0242c7eb8d..d6e6b6660380 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -41,7 +41,12 @@ static inline void tlb_flush(struct mmu_gather *tlb) flush_tlb_mm(tlb->mm); } else { struct vm_area_struct vma = { .vm_mm = tlb->mm, }; - flush_tlb_range(&vma, tlb->start, tlb->end); + /* + * The intermediate page table levels are already handled by + * the __(pte|pmd|pud)_free_tlb() functions, so last level + * TLBI is sufficient here. + */ + __flush_tlb_range(&vma, tlb->start, tlb->end, true); } } diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index e972bf456558..7bd2da021658 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -87,7 +87,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, ((unsigned long)ASID(vma->vm_mm) << 48); dsb(ishst); - asm("tlbi vae1is, %0" : : "r" (addr)); + asm("tlbi vale1is, %0" : : "r" (addr)); dsb(ish); } @@ -97,8 +97,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, */ #define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + bool last_level) { unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; unsigned long addr; @@ -112,11 +113,21 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, end = asid | (end >> 12); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) - asm("tlbi vae1is, %0" : : "r"(addr)); + for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) { + if (last_level) + asm("tlbi vale1is, %0" : : "r"(addr)); + else + asm("tlbi vae1is, %0" : : "r"(addr)); + } dsb(ish); } +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + __flush_tlb_range(vma, start, end, false); +} + static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; -- cgit v1.2.3 From c1d7cd228b4b46eca1dbd9bb2c6053f477a1a6ff Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Jul 2015 14:48:00 +0100 Subject: arm64: spinlock: fix ll/sc unlock on big-endian systems When unlocking a spinlock, we perform a read-modify-write on the owner ticket in order to increment it and store it back with release semantics. In the LL/SC case, we load the 16-bit ticket using a 32-bit load and therefore store back the wrong halfword on a big-endian system, corrupting the lock after the first unlock and killing the system dead. This patch fixes the unlock code to use 16-bit accessors consistently. Signed-off-by: Will Deacon --- arch/arm64/include/asm/spinlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 87ae7efa1211..c85e96d174a5 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -110,7 +110,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) asm volatile(ARM64_LSE_ATOMIC_INSN( /* LL/SC */ - " ldr %w1, %0\n" + " ldrh %w1, %0\n" " add %w1, %w1, #1\n" " stlrh %w1, %0", /* LSE atomics */ -- cgit v1.2.3 From 766ffb69803943c2b580a44ac14a189b875d21f6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Jul 2015 16:14:03 +0100 Subject: arm64: pgtable: fix definition of pte_valid pte_valid should check if the PTE_VALID bit (1 << 0) is set in the pte, so fix the macro definition to use bitwise & instead of logical &&. Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d001846c13ac..6900b2d95371 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -153,7 +153,7 @@ extern struct page *empty_zero_page; #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) -#define pte_valid(pte) (!!(pte_val(pte) && PTE_VALID)) +#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID)) #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) #define pte_valid_not_user(pte) \ -- cgit v1.2.3 From 377bcff9a38a78083d7fff8e8a41cc894cf7813b Mon Sep 17 00:00:00 2001 From: Jonas Rabenstein Date: Wed, 29 Jul 2015 12:07:57 +0100 Subject: arm64: remove dead-code depending on CONFIG_UP_LATE_INIT Commit 4b3dc9679cf7 ("arm64: force CONFIG_SMP=y and remove redundant and therfore can not be selected anymore. Remove dead #ifdef-block depending on UP_LATE_INIT in arch/arm64/kernel/setup.c Signed-off-by: Jonas Rabenstein [will: kill do_post_cpus_up_work altogether] Signed-off-by: Will Deacon --- arch/arm64/include/asm/smp_plat.h | 2 -- arch/arm64/kernel/setup.c | 25 ------------------------- arch/arm64/kernel/smp.c | 15 ++++++++++++++- 3 files changed, 14 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/smp_plat.h b/arch/arm64/include/asm/smp_plat.h index 7abf7570c00f..af58dcdefb21 100644 --- a/arch/arm64/include/asm/smp_plat.h +++ b/arch/arm64/include/asm/smp_plat.h @@ -56,6 +56,4 @@ static inline int get_logical_index(u64 mpidr) return -EINVAL; } -void __init do_post_cpus_up_work(void); - #endif /* __ASM_SMP_PLAT_H */ diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index be65ecc89e82..0c8fd975306b 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -63,7 +63,6 @@ #include #include #include -#include #include unsigned long elf_hwcap __read_mostly; @@ -197,30 +196,6 @@ static void __init smp_build_mpidr_hash(void) __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); } -static void __init hyp_mode_check(void) -{ - if (is_hyp_mode_available()) - pr_info("CPU: All CPU(s) started at EL2\n"); - else if (is_hyp_mode_mismatched()) - WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC, - "CPU: CPUs started in inconsistent modes"); - else - pr_info("CPU: All CPU(s) started at EL1\n"); -} - -void __init do_post_cpus_up_work(void) -{ - hyp_mode_check(); - apply_alternatives_all(); -} - -#ifdef CONFIG_UP_LATE_INIT -void __init up_late_init(void) -{ - do_post_cpus_up_work(); -} -#endif /* CONFIG_UP_LATE_INIT */ - static void __init setup_processor(void) { u64 features; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 50fb4696654e..dbdaacddd9a5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -52,6 +52,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include @@ -310,10 +311,22 @@ void cpu_die(void) } #endif +static void __init hyp_mode_check(void) +{ + if (is_hyp_mode_available()) + pr_info("CPU: All CPU(s) started at EL2\n"); + else if (is_hyp_mode_mismatched()) + WARN_TAINT(1, TAINT_CPU_OUT_OF_SPEC, + "CPU: CPUs started in inconsistent modes"); + else + pr_info("CPU: All CPU(s) started at EL1\n"); +} + void __init smp_cpus_done(unsigned int max_cpus) { pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); - do_post_cpus_up_work(); + hyp_mode_check(); + apply_alternatives_all(); } void __init smp_prepare_boot_cpu(void) -- cgit v1.2.3 From 63a581865e9ee7b1277f1e4941d0765fdbde032a Mon Sep 17 00:00:00 2001 From: Jonas Rabenstein Date: Wed, 29 Jul 2015 12:13:20 +0100 Subject: arm64: remove redundant object file list Commit 4b3dc9679cf7 ("arm64: force CONFIG_SMP=y and remove redundant #ifdefs") forces SMP on arm64. To build the necessary objects for SMP, they were added to the arm64-obj-y rule in arch/arm64/kernel/Makefile, without removing the arm64-obj-$(CONFIG_SMP) rule. Remove redundant object file list depending on always-yes CONFIG_SMP in arch/arm64/kernel/Makefile. Signed-off-by: Jonas Rabenstein Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index f126cfe99003..c662197ee57c 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -25,7 +25,6 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_callchain.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o -- cgit v1.2.3 From 484c96dbb26965d712a808ab9e8b00090455bdf6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 29 Jul 2015 15:16:22 +0100 Subject: arm64: lse: fix lse cmpxchg code indentation For some reason, the ll/sc cmpxchg asm is all off to the left and awkward to read in conjunction with the following (correctly indented) LSE version. This patch shifts the ll/sc code back to where it should be. Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_lse.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index f873bf61e17b..55d740e63459 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -319,9 +319,9 @@ static inline unsigned long __cmpxchg_case_##name(volatile void *ptr, \ \ asm volatile(ARM64_LSE_ATOMIC_INSN( \ /* LL/SC */ \ - "nop\n" \ - __LL_SC_CMPXCHG(name) \ - "nop", \ + " nop\n" \ + __LL_SC_CMPXCHG(name) \ + " nop", \ /* LSE atomics */ \ " mov " #w "30, %" #w "[old]\n" \ " cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n" \ -- cgit v1.2.3 From 6c020ea8dc3a8adee81b6f141428a7a75249706e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 29 Jul 2015 12:30:39 +0100 Subject: arm64/Documentation: clarify wording regarding memory below the Image Clarify that the memory below the start of the image but inside the region covered by the linear mapping has no special significance to the kernel, and may be used by the firmware provided that it is marked as reserved. Also, fix up some whitespace errors. Acked-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- Documentation/arm64/booting.txt | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 1690350f16e7..7d9d3c2286b2 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -81,7 +81,7 @@ The decompressed kernel image contains a 64-byte header as follows: u64 res3 = 0; /* reserved */ u64 res4 = 0; /* reserved */ u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */ - u32 res5; /* reserved (used for PE COFF offset) */ + u32 res5; /* reserved (used for PE COFF offset) */ Header notes: @@ -103,7 +103,7 @@ Header notes: - The flags field (introduced in v3.17) is a little-endian 64-bit field composed as follows: - Bit 0: Kernel endianness. 1 if BE, 0 if LE. + Bit 0: Kernel endianness. 1 if BE, 0 if LE. Bits 1-63: Reserved. - When image_size is zero, a bootloader should attempt to keep as much @@ -115,11 +115,14 @@ The Image must be placed text_offset bytes from a 2MB aligned base address near the start of usable system RAM and called there. Memory below that base address is currently unusable by Linux, and therefore it is strongly recommended that this location is the start of system RAM. +The region between the 2 MB aligned base address and the start of the +image has no special significance to the kernel, and may be used for +other purposes. At least image_size bytes from the start of the image must be free for use by the kernel. -Any memory described to the kernel (even that below the 2MB aligned base -address) which is not marked as reserved from the kernel e.g. with a +Any memory described to the kernel (even that below the start of the +image) which is not marked as reserved from the kernel (e.g., with a memreserve region in the device tree) will be considered as available to the kernel. -- cgit v1.2.3 From ef5e724b25c9f90b7683bb2d45833ebac0989dcb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Jul 2015 19:07:28 +0100 Subject: arm64: alternative: put secondary CPUs into polling loop during patch When patching the kernel text with alternatives, we may end up patching parts of the stop_machine state machine (e.g. atomic_dec_and_test in ack_state) and consequently corrupt the instruction stream of any secondary CPUs. This patch passes the cpu_online_mask to stop_machine, forcing all of the CPUs into our own callback which can place the secondary cores into a dumb (but safe!) polling loop whilst the patching is carried out. Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 3 ++- arch/arm64/kernel/alternative.c | 29 ++++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index b474e9106bc2..d56ec0715157 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -3,6 +3,7 @@ #ifndef __ASSEMBLY__ +#include #include #include #include @@ -16,7 +17,7 @@ struct alt_instr { u8 alt_len; /* size of new instruction(s), <= orig_len */ }; -void apply_alternatives_all(void); +void __init apply_alternatives_all(void); void apply_alternatives(void *start, size_t length); void free_alternatives_memory(void); diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 221b98312f0c..fa1d575ab2c2 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -85,7 +85,7 @@ static u32 get_alt_insn(struct alt_instr *alt, u32 *insnptr, u32 *altinsnptr) return insn; } -static int __apply_alternatives(void *alt_region) +static void __apply_alternatives(void *alt_region) { struct alt_instr *alt; struct alt_region *region = alt_region; @@ -114,19 +114,38 @@ static int __apply_alternatives(void *alt_region) flush_icache_range((uintptr_t)origptr, (uintptr_t)(origptr + nr_inst)); } - - return 0; } -void apply_alternatives_all(void) +/* + * We might be patching the stop_machine state machine, so implement a + * really simple polling protocol here. + */ +static int __apply_alternatives_multi_stop(void *unused) { + static int patched = 0; struct alt_region region = { .begin = __alt_instructions, .end = __alt_instructions_end, }; + /* We always have a CPU 0 at this point (__init) */ + if (smp_processor_id()) { + while (!READ_ONCE(patched)) + cpu_relax(); + } else { + BUG_ON(patched); + __apply_alternatives(®ion); + /* Barriers provided by the cache flushing */ + WRITE_ONCE(patched, 1); + } + + return 0; +} + +void __init apply_alternatives_all(void) +{ /* better not try code patching on a live SMP system */ - stop_machine(__apply_alternatives, ®ion, NULL); + stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask); } void apply_alternatives(void *start, size_t length) -- cgit v1.2.3 From a14949e09a228dcd4cc5088c90c375429c7d102c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 30 Jul 2015 19:19:43 +0100 Subject: arm64: cmpxchg: truncate sub-word signed types before comparison When performing a cmpxchg operation on a signed sub-word type (e.g. s8), we need to ensure that the upper register bits of the "old" value used for comparison are zeroed, otherwise we may erroneously fail the cmpxchg which may even be interpreted as success by the caller (if the compiler performs the truncation as part of its check). This has been observed in mod_state, where negative values where causing problems with this_cpu_cmpxchg. This patch fixes the issue by explicitly casting 8-bit and 16-bit "old" values using unsigned types in our cmpxchg wrappers. 32-bit types can be left alone, since the underlying asm makes use of W registers in this case. Reported-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/include/asm/cmpxchg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 7bfda0944c9b..899e9f1d19e4 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -122,9 +122,9 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old, { switch (size) { case 1: - return __cmpxchg_case_1(ptr, old, new); + return __cmpxchg_case_1(ptr, (u8)old, new); case 2: - return __cmpxchg_case_2(ptr, old, new); + return __cmpxchg_case_2(ptr, (u16)old, new); case 4: return __cmpxchg_case_4(ptr, old, new); case 8: @@ -141,9 +141,9 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old, { switch (size) { case 1: - return __cmpxchg_case_mb_1(ptr, old, new); + return __cmpxchg_case_mb_1(ptr, (u8)old, new); case 2: - return __cmpxchg_case_mb_2(ptr, old, new); + return __cmpxchg_case_mb_2(ptr, (u16)old, new); case 4: return __cmpxchg_case_mb_4(ptr, old, new); case 8: -- cgit v1.2.3 From 72407514c92c4e4b3584cba5961e63fb10c1a04e Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 30 Jul 2015 14:13:59 +0100 Subject: ARM64: PCI: do not enable resources on PROBE_ONLY systems On ARM64 PROBE_ONLY PCI systems resources are not currently claimed, therefore they can't be enabled since they do not have a valid parent pointer; this in turn prevents enabling PCI devices on ARM64 PROBE_ONLY systems, causing PCI devices initialization to fail. To solve this issue, resources must be claimed when devices are added on PROBE_ONLY systems, which ensures that the resource hierarchy is validated and the resource tree is sane, but this requires changes in the ARM64 resource management that can affect adversely existing PCI set-ups (claiming resources on !PROBE_ONLY systems might break existing ARM64 PCI platform implementations). As a temporary solution in preparation for a proper resources claiming implementation in ARM64 core, to enable PCI PROBE_ONLY systems on ARM64, this patch adds a pcibios_enable_device() arch implementation that simply prevents enabling resources on PROBE_ONLY systems (mirroring ARM behaviour). This is always a safe thing to do because on PROBE_ONLY systems the configuration space set-up can be considered immutable, and it is in preparation of proper resource claiming that would finally validate the PCI resources tree in the ARM64 arch implementation on PROBE_ONLY systems. For !PROBE_ONLY systems resources enablement in pcibios_enable_device() on ARM64 is implemented as in current PCI core, leaving the behaviour unchanged. Signed-off-by: Lorenzo Pieralisi Cc: Will Deacon Cc: Bjorn Helgaas Cc: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/kernel/pci.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 4095379dc069..b3d098bd34aa 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -38,6 +38,19 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, return res->start; } +/** + * pcibios_enable_device - Enable I/O and memory. + * @dev: PCI device to be enabled + * @mask: bitmask of BARs to enable + */ +int pcibios_enable_device(struct pci_dev *dev, int mask) +{ + if (pci_has_flag(PCI_PROBE_ONLY)) + return 0; + + return pci_enable_resources(dev, mask); +} + /* * Try to assign the IRQ number from DT when adding a new device */ -- cgit v1.2.3 From b511a6592860f24725f34909392885c4e3e9fe95 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Fri, 31 Jul 2015 16:41:36 +0100 Subject: arm64: restore cpu suspend/resume functionality Commit 4b3dc9679cf7 ("arm64: force CONFIG_SMP=y and remove redundant #ifdefs") accidentally retained code for !CONFIG_SMP in cpu_resume function. This resulted in the hash index being zeroed in x7 after proper computation, which is then used to get the cpu context pointer while resuming. This patch removes the remanant code and restores back the cpu suspend/ resume functionality. Fixes: 4b3dc9679cf7 ("arm64: force CONFIG_SMP=y and remove redundant #ifdefs") Signed-off-by: Sudeep Holla Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/kernel/sleep.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 5686a3ae3940..fb3128ea3a4f 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -158,7 +158,6 @@ ENTRY(cpu_resume) ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 /* x7 contains hash index, let's use it to grab context pointer */ - mov x7, xzr ldr_l x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS ldr x0, [x0, x7, lsl #3] /* load sp from context */ -- cgit v1.2.3 From 97942c2862d74689b6241802f2aa43972042389f Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 31 Jul 2015 18:28:34 +0100 Subject: arm64: dma-mapping: Simplify pgprot handling Since __get_dma_pgprot() does The Right Thing(TM) in the non-coherent case, and the non-cacheable alias for DMA buffers is private to the kernel anyway, we can simplify things slightly and make the code more readable by just using PAGE_KERNEL as the base pgprot. Suggested-by: Catalin Marinas Reviewed-by: Catalin Marinas Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- arch/arm64/mm/dma-mapping.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e5d74cdfdb71..0bcc4bc94b4a 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -144,6 +144,7 @@ static void *__dma_alloc(struct device *dev, size_t size, struct page *page; void *ptr, *coherent_ptr; bool coherent = is_device_dma_coherent(dev); + pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); size = PAGE_ALIGN(size); @@ -171,9 +172,7 @@ static void *__dma_alloc(struct device *dev, size_t size, /* create a coherent mapping */ page = virt_to_page(ptr); coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP, - __get_dma_pgprot(attrs, - __pgprot(PROT_NORMAL_NC), false), - NULL); + prot, NULL); if (!coherent_ptr) goto no_map; -- cgit v1.2.3 From 7f08a414f29e7daea661d03231998625257ed3f1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 4 Aug 2015 17:27:34 +0100 Subject: arm64: make ll/sc __cmpxchg_case_##name asm consistent The ll/sc __cmpxchg_case_##name assembly mostly uses symbolic names for operands, but in a single case uses %2 to refer to what is otherwise known as %[v]. This makes the code more painful to read than is necessary. Use %[v] instead. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm64/include/asm/atomic_ll_sc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/atomic_ll_sc.h b/arch/arm64/include/asm/atomic_ll_sc.h index 6671978e60fe..b3b5c4ae3800 100644 --- a/arch/arm64/include/asm/atomic_ll_sc.h +++ b/arch/arm64/include/asm/atomic_ll_sc.h @@ -181,7 +181,7 @@ __LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr, \ unsigned long tmp, oldval; \ \ asm volatile( \ - " prfm pstl1strm, %2\n" \ + " prfm pstl1strm, %[v]\n" \ "1: ldxr" #sz "\t%" #w "[oldval], %[v]\n" \ " eor %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n" \ " cbnz %" #w "[tmp], 2f\n" \ -- cgit v1.2.3 From 04b8637be92f284409651088f3856f4290a931d8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Aug 2015 18:52:09 +0100 Subject: arm64: alternatives: ensure secondary CPUs execute ISB after patching In order to guarantee that the patched instruction stream is visible to a CPU, that CPU must execute an isb instruction after any related cache maintenance has completed. The instruction patching routines in kernel/insn.c get this right for things like jump labels and ftrace, but the alternatives patching omits it entirely leaving secondary cores in a potential limbo between the old and the new code. This patch adds an isb following the secondary polling loop in the altenatives patching. Signed-off-by: Will Deacon --- arch/arm64/kernel/alternative.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index fa1d575ab2c2..ab9db0e9818c 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -132,6 +132,7 @@ static int __apply_alternatives_multi_stop(void *unused) if (smp_processor_id()) { while (!READ_ONCE(patched)) cpu_relax(); + isb(); } else { BUG_ON(patched); __apply_alternatives(®ion); -- cgit v1.2.3 From 8ec41987436d566f7c4559c6871738b869f7ef07 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Aug 2015 17:49:36 +0100 Subject: arm64: mm: ensure patched kernel text is fetched from PoU The arm64 booting document requires that the bootloader has cleaned the kernel image to the PoC. However, when a CPU re-enters the kernel due to either a CPU hotplug "on" event or resuming from a low-power state (e.g. cpuidle), the kernel text may in-fact be dirty at the PoU due to things like alternative patching or even module loading. Thanks to I-cache speculation with the MMU off, stale instructions could be fetched prior to enabling the MMU, potentially leading to crashes when executing regions of code that have been modified at runtime. This patch addresses the issue by ensuring that the local I-cache is invalidated immediately after a CPU has enabled its MMU but before jumping out of the identity mapping. Any stale instructions fetched from the PoC will then be discarded and refetched correctly from the PoU. Patching kernel text executed prior to the MMU being enabled is prohibited, so the early entry code will always be clean. Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/head.S | 8 ++++++++ arch/arm64/kernel/sleep.S | 8 ++++++++ arch/arm64/mm/proc.S | 1 - 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 3a0654173997..a055be6125cf 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -634,5 +634,13 @@ __enable_mmu: isb msr sctlr_el1, x0 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb br x27 ENDPROC(__enable_mmu) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index fb3128ea3a4f..f586f7c875e2 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -133,6 +133,14 @@ ENTRY(cpu_resume_mmu) ldr x3, =cpu_resume_after_mmu msr sctlr_el1, x0 // restore sctlr_el1 isb + /* + * Invalidate the local I-cache so that any instructions fetched + * speculatively from the PoC are discarded, since they may have + * been dynamically patched at the PoU. + */ + ic iallu + dsb nsh + isb br x3 // global jump to virtual address ENDPROC(cpu_resume_mmu) .popsection diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 34da270f9e34..6e8765a2bddd 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -146,7 +146,6 @@ ENDPROC(cpu_do_switch_mm) * value of the SCTLR_EL1 register. */ ENTRY(__cpu_setup) - ic iallu // I+BTB cache invalidate tlbi vmalle1is // invalidate I + D TLBs dsb ish -- cgit v1.2.3 From 9a5ad7d0e3e1c6c0c11df89fbc5376f8aaf7a90f Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Wed, 12 Aug 2015 15:16:19 +0100 Subject: arm64: Add __exception_irq_entry definition for function graph The gic_handle_irq() is defined with __exception_irq_entry attribute. A single remaining work is to add its definition as ARM did. Below shows how function graph data is changed with these hunks. A prologue of an interrupt handler is drawn as follows. - current status 0) 0.208 us | cpuidle_not_available(); 0) | default_idle_call() { 0) | arch_cpu_idle() { 0) | __handle_domain_irq() { 0) | irq_enter() { 0) 0.313 us | rcu_irq_enter(); 0) 0.261 us | __local_bh_disable_ip(); - with this change 0) 0.625 us | cpuidle_not_available(); 0) | default_idle_call() { 0) | arch_cpu_idle() { 0) ==========> | 0) | gic_handle_irq() { 0) | __handle_domain_irq() { 0) | irq_enter() { 0) 0.885 us | rcu_irq_enter(); 0) 0.781 us | __local_bh_disable_ip(); An epilogue of an interrupt handler is recorded as follows. - current status 0) 0.261 us | idle_cpu(); 0) | rcu_irq_exit() { 0) 0.521 us | rcu_eqs_enter_common.isra.46(); 0) 2.552 us | } 0) ! 322.448 us | } 0) ! 583.437 us | } 0) # 1656.041 us | } 0) # 1658.073 us | } - with this change 0) 0.677 us | idle_cpu(); 0) | rcu_irq_exit() { 0) 1.770 us | rcu_eqs_enter_common.isra.46(); 0) 7.968 us | } 0) # 1803.541 us | } 0) # 2626.667 us | } 0) # 2632.969 us | } 0) <========== | 0) # 14425.00 us | } 0) # 14430.98 us | } Cc: AKASHI Takahiro Cc: Marc Zyngier Cc: Rabin Vincent Cc: Steven Rostedt Signed-off-by: Jungseok Lee Signed-off-by: Will Deacon --- arch/arm64/include/asm/exception.h | 6 ++++++ arch/arm64/include/asm/traps.h | 23 +++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 0303705fcad6..6cb7e1a6bc02 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -18,7 +18,13 @@ #ifndef __ASM_EXCEPTION_H #define __ASM_EXCEPTION_H +#include + #define __exception __attribute__((section(".exception.text"))) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#define __exception_irq_entry __irq_entry +#else #define __exception_irq_entry __exception +#endif #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 232e4ba5d314..0cc2f29bf9da 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -34,13 +34,32 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static inline int __in_irqentry_text(unsigned long ptr) +{ + extern char __irqentry_text_start[]; + extern char __irqentry_text_end[]; + + return ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end; +} +#else +static inline int __in_irqentry_text(unsigned long ptr) +{ + return 0; +} +#endif + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; extern char __exception_text_end[]; + int in; + + in = ptr >= (unsigned long)&__exception_text_start && + ptr < (unsigned long)&__exception_text_end; - return ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; + return in ? : __in_irqentry_text(ptr); } #endif -- cgit v1.2.3 From bf0c4e04732479f650ff59d1ee82de761c0071f0 Mon Sep 17 00:00:00 2001 From: Jeff Vander Stoep Date: Tue, 18 Aug 2015 20:50:10 +0100 Subject: arm64: kconfig: Move LIST_POISON to a safe value Move the poison pointer offset to 0xdead000000000000, a recognized value that is not mappable by user-space exploits. Cc: Acked-by: Catalin Marinas Signed-off-by: Thierry Strudel Signed-off-by: Jeff Vander Stoep Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 78b89fa337ee..f3d751a342f0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -106,6 +106,10 @@ config NO_IOPORT_MAP config STACKTRACE_SUPPORT def_bool y +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config LOCKDEP_SUPPORT def_bool y -- cgit v1.2.3 From d8d23fa0f27f3b2942a7bbc7378c7735324ed519 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 20 Aug 2015 11:47:13 +0100 Subject: arm64: mdscr_el1: avoid exposing DCC to userspace We don't want to expose the DCC to userspace, particularly as there is a kernel console driver for it. This patch resets mdscr_el1 to disable userspace access to the DCC registers on the cold boot path. Signed-off-by: Will Deacon --- arch/arm64/mm/proc.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 6e8765a2bddd..e4ee7bd8830a 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -151,7 +151,8 @@ ENTRY(__cpu_setup) mov x0, #3 << 20 msr cpacr_el1, x0 // Enable FP/ASIMD - msr mdscr_el1, xzr // Reset mdscr_el1 + mov x0, #1 << 12 // Reset mdscr_el1 and disable + msr mdscr_el1, x0 // access to the DCC from EL0 /* * Memory region attributes for LPAE: * -- cgit v1.2.3 From 412fcb6cebd758d080cacd5a41a0cbc656ea5fce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 19 Aug 2015 15:57:09 +0100 Subject: arm64: entry: always restore x0 from the stack on syscall return We have a micro-optimisation on the fast syscall return path where we take care to keep x0 live with the return value from the syscall so that we can avoid restoring it from the stack. The benefit of doing this is fairly suspect, since we will be restoring x1 from the stack anyway (which lives adjacent in the pt_regs structure) and the only additional cost is saving x0 back to pt_regs after the syscall handler, which could be seen as a poor man's prefetch. More importantly, this causes issues with the context tracking code. The ct_user_enter macro ends up branching into C code, which is free to use x0 as a scratch register and consequently leads to us returning junk back to userspace as the syscall return value. Rather than special case the context-tracking code, this patch removes the questionable optimisation entirely. Cc: Cc: Larry Bassel Cc: Kevin Hilman Reviewed-by: Catalin Marinas Reported-by: Hanjun Guo Tested-by: Hanjun Guo Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d8a523600a4c..4306c937b1ff 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -116,7 +116,7 @@ */ .endm - .macro kernel_exit, el, ret = 0 + .macro kernel_exit, el ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 ct_user_enter @@ -143,11 +143,7 @@ alternative_endif .endif msr elr_el1, x21 // set up the return data msr spsr_el1, x22 - .if \ret - ldr x1, [sp, #S_X1] // preserve x0 (syscall return) - .else ldp x0, x1, [sp, #16 * 0] - .endif ldp x2, x3, [sp, #16 * 1] ldp x4, x5, [sp, #16 * 2] ldp x6, x7, [sp, #16 * 3] @@ -610,22 +606,21 @@ ENDPROC(cpu_switch_to) */ ret_fast_syscall: disable_irq // disable interrupts + str x0, [sp, #S_X0] // returned x0 ldr x1, [tsk, #TI_FLAGS] // re-check for syscall tracing and x2, x1, #_TIF_SYSCALL_WORK cbnz x2, ret_fast_syscall_trace and x2, x1, #_TIF_WORK_MASK - cbnz x2, fast_work_pending + cbnz x2, work_pending enable_step_tsk x1, x2 - kernel_exit 0, ret = 1 + kernel_exit 0 ret_fast_syscall_trace: enable_irq // enable interrupts - b __sys_trace_return + b __sys_trace_return_skipped // we already saved x0 /* * Ok, we need to do extra processing, enter the slow path. */ -fast_work_pending: - str x0, [sp, #S_X0] // returned x0 work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ @@ -649,7 +644,7 @@ ret_to_user: cbnz x2, work_pending enable_step_tsk x1, x2 no_work_pending: - kernel_exit 0, ret = 0 + kernel_exit 0 ENDPROC(ret_to_user) /* -- cgit v1.2.3 From 5d3c2c352998fdefdc62795249cfc7311cf36df9 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Sun, 23 Aug 2015 14:24:44 +0100 Subject: arm64: Fix source code file path in comments Architecture specific code for i386 and x86_64 was unified and merged to the arch/x86. This patch fix old path of x86 architecture in a comment from the arch/arm64/include/asm/fixmap.h. Signed-off-by: Alexander Kuleshov Signed-off-by: Will Deacon --- arch/arm64/include/asm/fixmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h index c0739187a920..8b9884c726ad 100644 --- a/arch/arm64/include/asm/fixmap.h +++ b/arch/arm64/include/asm/fixmap.h @@ -8,7 +8,7 @@ * Copyright (C) 1998 Ingo Molnar * Copyright (C) 2013 Mark Salter * - * Adapted from arch/x86_64 version. + * Adapted from arch/x86 version. * */ -- cgit v1.2.3 From 8eafeb48022816513abc4f440bdad4c350fe81a3 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Aug 2015 10:34:41 +0100 Subject: of/fdt: make memblock maximum physical address arch configurable When parsing the memory nodes to populate the memblock memory table, we check against high and low limits and clip any memory that exceeds either one of them. However, for arm64, the high limit of (phys_addr_t)~0 is not very meaningful, since phys_addr_t is 64 bits (i.e., no limit) but there may be other constraints that limit the memory ranges that we can support. So rename MAX_PHYS_ADDR to MAX_MEMBLOCK_ADDR (for clarity) and only define it if the arch does not supply a definition of its own. Acked-by: Rob Herring Reviewed-by: Catalin Marinas Tested-by: Stuart Yoder Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- drivers/of/fdt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 07496560e5b9..6e82bc42373b 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -967,7 +967,9 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname, } #ifdef CONFIG_HAVE_MEMBLOCK -#define MAX_PHYS_ADDR ((phys_addr_t)~0) +#ifndef MAX_MEMBLOCK_ADDR +#define MAX_MEMBLOCK_ADDR ((phys_addr_t)~0) +#endif void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) { @@ -984,16 +986,16 @@ void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) } size &= PAGE_MASK; - if (base > MAX_PHYS_ADDR) { + if (base > MAX_MEMBLOCK_ADDR) { pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", base, base + size); return; } - if (base + size - 1 > MAX_PHYS_ADDR) { + if (base + size - 1 > MAX_MEMBLOCK_ADDR) { pr_warning("Ignoring memory range 0x%llx - 0x%llx\n", - ((u64)MAX_PHYS_ADDR) + 1, base + size); - size = MAX_PHYS_ADDR - base + 1; + ((u64)MAX_MEMBLOCK_ADDR) + 1, base + size); + size = MAX_MEMBLOCK_ADDR - base + 1; } if (base + size < phys_offset) { -- cgit v1.2.3 From 34ba2c4247e5c4b1542b1106e156af324660c4f0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 18 Aug 2015 10:34:42 +0100 Subject: arm64: set MAX_MEMBLOCK_ADDR according to linear region size The linear region size of a 39-bit VA kernel is only 256 GB, which may be insufficient to cover all of system RAM, even on platforms that have much less than 256 GB of memory but which is laid out very sparsely. So make sure we clip the memory we will not be able to map before installing it into the memblock memory table, by setting MAX_MEMBLOCK_ADDR accordingly. Reviewed-by: Catalin Marinas Tested-by: Stuart Yoder Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index f800d45ea226..44a59c20e773 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -113,6 +113,14 @@ extern phys_addr_t memstart_addr; /* PHYS_OFFSET - the physical address of the start of memory. */ #define PHYS_OFFSET ({ memstart_addr; }) +/* + * The maximum physical address that the linear direct mapping + * of system RAM can cover. (PAGE_OFFSET can be interpreted as + * a 2's complement signed quantity and negated to derive the + * maximum size of the linear mapping.) + */ +#define MAX_MEMBLOCK_ADDR ({ memstart_addr - PAGE_OFFSET - 1; }) + /* * PFNs are used to describe any physical page; this means * PFN 0 == physical address 0. -- cgit v1.2.3 From 5166c20ef95be89d10ffe0140e74df5cf26e9786 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 24 Aug 2015 13:35:51 +0100 Subject: arm64: makefile: fix perf_callchain.o kconfig dependency Commit 4b3dc9679cf7 ("arm64: force CONFIG_SMP=y and remove redundant #ifdefs") incorrectly resolved a conflict on arch/arm64/kernel/Makefile which resulted in a partial revert of 52da443ec4d0 ("arm64: perf: factor out callchain code"), leading to perf_callchain.o depending on CONFIG_HW_PERF_EVENTS instead of CONFIG_PERF_EVENTS. This patch restores the kconfig dependency for perf_callchain.o. Reported-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index c662197ee57c..22dc9bc781be 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -25,8 +25,8 @@ arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ ../../arm/kernel/opcodes.o arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o -arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_callchain.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o +arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o arm64-obj-$(CONFIG_CPU_PM) += sleep.o suspend.o arm64-obj-$(CONFIG_CPU_IDLE) += cpuidle.o -- cgit v1.2.3 From 674c242c9323d3c293fc4f9a3a3a619fe3063290 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Aug 2015 07:12:33 +0100 Subject: arm64: flush FP/SIMD state correctly after execve() When a task calls execve(), its FP/SIMD state is flushed so that none of the original program state is observeable by the incoming program. However, since this flushing consists of setting the in-memory copy of the FP/SIMD state to all zeroes, the CPU field is set to CPU 0 as well, which indicates to the lazy FP/SIMD preserve/restore code that the FP/SIMD state does not need to be reread from memory if the task is scheduled again on CPU 0 without any other tasks having entered userland (or used the FP/SIMD in kernel mode) on the same CPU in the mean time. If this happens, the FP/SIMD state of the old program will still be present in the registers when the new program starts. So set the CPU field to the invalid value of NR_CPUS when performing the flush, by calling fpsimd_flush_task_state(). Cc: Reported-by: Chunyan Zhang Reported-by: Janet Liu Signed-off-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 44d6f7545505..c56956a16d3f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -158,6 +158,7 @@ void fpsimd_thread_switch(struct task_struct *next) void fpsimd_flush_thread(void) { memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); + fpsimd_flush_task_state(current); set_thread_flag(TIF_FOREIGN_FPSTATE); } -- cgit v1.2.3