diff options
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/dump_hashpagetable.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/dump_linuxpagetables.c | 106 | ||||
-rw-r--r-- | arch/powerpc/mm/fault.c | 84 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_low_32.S | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 26 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-book3e.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage-radix.c | 11 | ||||
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 18 | ||||
-rw-r--r-- | arch/powerpc/mm/init_64.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/mmap.c | 53 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_book3s64.c | 116 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_iommu.c | 43 | ||||
-rw-r--r-- | arch/powerpc/mm/mmu_context_nohash.c | 5 | ||||
-rw-r--r-- | arch/powerpc/mm/numa.c | 7 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 4 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 82 | ||||
-rw-r--r-- | arch/powerpc/mm/slice.c | 258 | ||||
-rw-r--r-- | arch/powerpc/mm/subpage-prot.c | 3 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb-radix.c | 93 | ||||
-rw-r--r-- | arch/powerpc/mm/tlb_nohash.c | 2 |
20 files changed, 604 insertions, 322 deletions
diff --git a/arch/powerpc/mm/dump_hashpagetable.c b/arch/powerpc/mm/dump_hashpagetable.c index d979709a0239..c6b900f54c07 100644 --- a/arch/powerpc/mm/dump_hashpagetable.c +++ b/arch/powerpc/mm/dump_hashpagetable.c @@ -468,7 +468,7 @@ static void walk_linearmapping(struct pg_state *st) unsigned long psize = 1 << mmu_psize_defs[mmu_linear_psize].shift; for (addr = PAGE_OFFSET; addr < PAGE_OFFSET + - memblock_phys_mem_size(); addr += psize) + memblock_end_of_DRAM(); addr += psize) hpte_find(st, addr, mmu_linear_psize); } diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 49abaf4dc8e3..d659345a98d6 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -26,6 +26,10 @@ #include <asm/page.h> #include <asm/pgalloc.h> +#ifdef CONFIG_PPC32 +#define KERN_VIRT_START 0 +#endif + /* * To visualise what is happening, * @@ -56,6 +60,8 @@ struct pg_state { struct seq_file *seq; const struct addr_marker *marker; unsigned long start_address; + unsigned long start_pa; + unsigned long last_pa; unsigned int level; u64 current_flags; }; @@ -69,6 +75,7 @@ static struct addr_marker address_markers[] = { { 0, "Start of kernel VM" }, { 0, "vmalloc() Area" }, { 0, "vmalloc() End" }, +#ifdef CONFIG_PPC64 { 0, "isa I/O start" }, { 0, "isa I/O end" }, { 0, "phb I/O start" }, @@ -76,6 +83,20 @@ static struct addr_marker address_markers[] = { { 0, "I/O remap start" }, { 0, "I/O remap end" }, { 0, "vmemmap start" }, +#else + { 0, "Early I/O remap start" }, + { 0, "Early I/O remap end" }, +#ifdef CONFIG_NOT_COHERENT_CACHE + { 0, "Consistent mem start" }, + { 0, "Consistent mem end" }, +#endif +#ifdef CONFIG_HIGHMEM + { 0, "Highmem PTEs start" }, + { 0, "Highmem PTEs end" }, +#endif + { 0, "Fixmap start" }, + { 0, "Fixmap end" }, +#endif { -1, NULL }, }; @@ -100,8 +121,13 @@ static const struct flag_info flag_array[] = { .set = "user", .clear = " ", }, { +#if _PAGE_RO == 0 .mask = _PAGE_RW, .val = _PAGE_RW, +#else + .mask = _PAGE_RO, + .val = 0, +#endif .set = "rw", .clear = "ro", }, { @@ -154,11 +180,24 @@ static const struct flag_info flag_array[] = { .clear = " ", }, { #endif +#ifndef CONFIG_PPC_BOOK3S_64 .mask = _PAGE_NO_CACHE, .val = _PAGE_NO_CACHE, .set = "no cache", .clear = " ", }, { +#else + .mask = _PAGE_NON_IDEMPOTENT, + .val = _PAGE_NON_IDEMPOTENT, + .set = "non-idempotent", + .clear = " ", + }, { + .mask = _PAGE_TOLERANT, + .val = _PAGE_TOLERANT, + .set = "tolerant", + .clear = " ", + }, { +#endif #ifdef CONFIG_PPC_BOOK3S_64 .mask = H_PAGE_BUSY, .val = H_PAGE_BUSY, @@ -188,6 +227,10 @@ static const struct flag_info flag_array[] = { .mask = _PAGE_SPECIAL, .val = _PAGE_SPECIAL, .set = "special", + }, { + .mask = _PAGE_SHARED, + .val = _PAGE_SHARED, + .set = "shared", } }; @@ -252,7 +295,14 @@ static void dump_addr(struct pg_state *st, unsigned long addr) const char *unit = units; unsigned long delta; - seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1); +#ifdef CONFIG_PPC64 + seq_printf(st->seq, "0x%016lx-0x%016lx ", st->start_address, addr-1); + seq_printf(st->seq, "0x%016lx ", st->start_pa); +#else + seq_printf(st->seq, "0x%08lx-0x%08lx ", st->start_address, addr - 1); + seq_printf(st->seq, "0x%08lx ", st->start_pa); +#endif + delta = (addr - st->start_address) >> 10; /* Work out what appropriate unit to use */ while (!(delta & 1023) && unit[1]) { @@ -267,11 +317,15 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned int level, u64 val) { u64 flag = val & pg_level[level].mask; + u64 pa = val & PTE_RPN_MASK; + /* At first no level is set */ if (!st->level) { st->level = level; st->current_flags = flag; st->start_address = addr; + st->start_pa = pa; + st->last_pa = pa; seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); /* * Dump the section of virtual memory when: @@ -279,9 +333,11 @@ static void note_page(struct pg_state *st, unsigned long addr, * - we change levels in the tree. * - the address is in a different section of memory and is thus * used for a different purpose, regardless of the flags. + * - the pa of this page is not adjacent to the last inspected page */ } else if (flag != st->current_flags || level != st->level || - addr >= st->marker[1].start_address) { + addr >= st->marker[1].start_address || + pa != st->last_pa + PAGE_SIZE) { /* Check the PTE flags */ if (st->current_flags) { @@ -305,8 +361,12 @@ static void note_page(struct pg_state *st, unsigned long addr, seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); } st->start_address = addr; + st->start_pa = pa; + st->last_pa = pa; st->current_flags = flag; st->level = level; + } else { + st->last_pa = pa; } } @@ -377,20 +437,38 @@ static void walk_pagetables(struct pg_state *st) static void populate_markers(void) { - address_markers[0].start_address = PAGE_OFFSET; - address_markers[1].start_address = VMALLOC_START; - address_markers[2].start_address = VMALLOC_END; - address_markers[3].start_address = ISA_IO_BASE; - address_markers[4].start_address = ISA_IO_END; - address_markers[5].start_address = PHB_IO_BASE; - address_markers[6].start_address = PHB_IO_END; - address_markers[7].start_address = IOREMAP_BASE; - address_markers[8].start_address = IOREMAP_END; + int i = 0; + + address_markers[i++].start_address = PAGE_OFFSET; + address_markers[i++].start_address = VMALLOC_START; + address_markers[i++].start_address = VMALLOC_END; +#ifdef CONFIG_PPC64 + address_markers[i++].start_address = ISA_IO_BASE; + address_markers[i++].start_address = ISA_IO_END; + address_markers[i++].start_address = PHB_IO_BASE; + address_markers[i++].start_address = PHB_IO_END; + address_markers[i++].start_address = IOREMAP_BASE; + address_markers[i++].start_address = IOREMAP_END; #ifdef CONFIG_PPC_STD_MMU_64 - address_markers[9].start_address = H_VMEMMAP_BASE; + address_markers[i++].start_address = H_VMEMMAP_BASE; #else - address_markers[9].start_address = VMEMMAP_BASE; + address_markers[i++].start_address = VMEMMAP_BASE; +#endif +#else /* !CONFIG_PPC64 */ + address_markers[i++].start_address = ioremap_bot; + address_markers[i++].start_address = IOREMAP_TOP; +#ifdef CONFIG_NOT_COHERENT_CACHE + address_markers[i++].start_address = IOREMAP_TOP; + address_markers[i++].start_address = IOREMAP_TOP + + CONFIG_CONSISTENT_SIZE; +#endif +#ifdef CONFIG_HIGHMEM + address_markers[i++].start_address = PKMAP_BASE; + address_markers[i++].start_address = PKMAP_ADDR(LAST_PKMAP); #endif + address_markers[i++].start_address = FIXADDR_START; + address_markers[i++].start_address = FIXADDR_TOP; +#endif /* CONFIG_PPC64 */ } static int ptdump_show(struct seq_file *m, void *v) @@ -435,7 +513,7 @@ static int ptdump_init(void) populate_markers(); build_pgtable_complete_mask(); - debugfs_file = debugfs_create_file("kernel_pagetables", 0400, NULL, + debugfs_file = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); return debugfs_file ? 0 : -ENOMEM; } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 51def8a515be..3a7d580fdc59 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -120,8 +120,6 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, siginfo_t info; unsigned int lsb = 0; - up_read(¤t->mm->mmap_sem); - if (!user_mode(regs)) return MM_FAULT_ERR(SIGBUS); @@ -154,13 +152,6 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) * continue the pagefault. */ if (fatal_signal_pending(current)) { - /* - * If we have retry set, the mmap semaphore will have - * alrady been released in __lock_page_or_retry(). Else - * we release it now. - */ - if (!(fault & VM_FAULT_RETRY)) - up_read(¤t->mm->mmap_sem); /* Coming from kernel, we need to deal with uaccess fixups */ if (user_mode(regs)) return MM_FAULT_RETURN; @@ -173,8 +164,6 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) /* Out of memory */ if (fault & VM_FAULT_OOM) { - up_read(¤t->mm->mmap_sem); - /* * We ran out of memory, or some other thing happened to us that * made us unable to handle the page fault gracefully. @@ -298,7 +287,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * can result in fault, which will cause a deadlock when called with * mmap_sem held */ - if (user_mode(regs)) + if (!is_exec && user_mode(regs)) store_update_sp = store_updates_sp(regs); if (user_mode(regs)) @@ -458,9 +447,30 @@ good_area: * the fault. */ fault = handle_mm_fault(vma, address, flags); + + /* + * Handle the retry right now, the mmap_sem has been released in that + * case. + */ + if (unlikely(fault & VM_FAULT_RETRY)) { + /* We retry only once */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + /* + * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. + */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + flags |= FAULT_FLAG_TRIED; + if (!fatal_signal_pending(current)) + goto retry; + } + /* We will enter mm_fault_error() below */ + } else + up_read(¤t->mm->mmap_sem); + if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { if (fault & VM_FAULT_SIGSEGV) - goto bad_area; + goto bad_area_nosemaphore; rc = mm_fault_error(regs, address, fault); if (rc >= MM_FAULT_RETURN) goto bail; @@ -469,41 +479,29 @@ good_area: } /* - * Major/minor page fault accounting is only done on the - * initial attempt. If we go through a retry, it is extremely - * likely that the page will be found in page cache at that point. + * Major/minor page fault accounting. */ - if (flags & FAULT_FLAG_ALLOW_RETRY) { - if (fault & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); #ifdef CONFIG_PPC_SMLPAR - if (firmware_has_feature(FW_FEATURE_CMO)) { - u32 page_ins; - - preempt_disable(); - page_ins = be32_to_cpu(get_lppaca()->page_ins); - page_ins += 1 << PAGE_FACTOR; - get_lppaca()->page_ins = cpu_to_be32(page_ins); - preempt_enable(); - } -#endif /* CONFIG_PPC_SMLPAR */ - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - if (fault & VM_FAULT_RETRY) { - /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. */ - flags &= ~FAULT_FLAG_ALLOW_RETRY; - flags |= FAULT_FLAG_TRIED; - goto retry; + if (firmware_has_feature(FW_FEATURE_CMO)) { + u32 page_ins; + + preempt_disable(); + page_ins = be32_to_cpu(get_lppaca()->page_ins); + page_ins += 1 << PAGE_FACTOR; + get_lppaca()->page_ins = cpu_to_be32(page_ins); + preempt_enable(); } +#endif /* CONFIG_PPC_SMLPAR */ + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); } - up_read(&mm->mmap_sem); goto bail; bad_area: diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 09cc50c8dace..6f962e5cb5e1 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -31,10 +31,8 @@ #ifdef CONFIG_SMP .section .bss .align 2 - .globl mmu_hash_lock mmu_hash_lock: .space 4 -EXPORT_SYMBOL(mmu_hash_lock) #endif /* CONFIG_SMP */ /* diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index c554768b1fa2..f2095ce9d4b0 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -35,9 +35,8 @@ #include <linux/memblock.h> #include <linux/context_tracking.h> #include <linux/libfdt.h> -#include <linux/debugfs.h> -#include <asm/debug.h> +#include <asm/debugfs.h> #include <asm/processor.h> #include <asm/pgtable.h> #include <asm/mmu.h> @@ -927,11 +926,6 @@ static void __init htab_initialize(void) } #endif /* CONFIG_DEBUG_PAGEALLOC */ - /* On U3 based machines, we need to reserve the DART area and - * _NOT_ map it to avoid cache paradoxes as it's remapped non - * cacheable later on - */ - /* create bolted the linear mapping in the hash table */ for_each_memblock(memory, reg) { base = (unsigned long)__va(reg->base); @@ -981,6 +975,19 @@ void __init hash__early_init_devtree(void) void __init hash__early_init_mmu(void) { + /* + * We have code in __hash_page_64K() and elsewhere, which assumes it can + * do the following: + * new_pte |= (slot << H_PAGE_F_GIX_SHIFT) & (H_PAGE_F_SECOND | H_PAGE_F_GIX); + * + * Where the slot number is between 0-15, and values of 8-15 indicate + * the secondary bucket. For that code to work H_PAGE_F_SECOND and + * H_PAGE_F_GIX must occupy four contiguous bits in the PTE, and + * H_PAGE_F_SECOND must be placed above H_PAGE_F_GIX. Assert that here + * with a BUILD_BUG_ON(). + */ + BUILD_BUG_ON(H_PAGE_F_SECOND != (1ul << (H_PAGE_F_GIX_SHIFT + 3))); + htab_init_page_sizes(); /* @@ -1120,7 +1127,7 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr) copro_flush_all_slbs(mm); if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) { - copy_mm_to_paca(&mm->context); + copy_mm_to_paca(mm); slb_flush_and_rebolt(); } } @@ -1192,7 +1199,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, { if (user_region) { if (psize != get_paca_psize(ea)) { - copy_mm_to_paca(&mm->context); + copy_mm_to_paca(mm); slb_flush_and_rebolt(); } } else if (get_paca()->vmalloc_sllp != @@ -1855,5 +1862,4 @@ static int __init hash64_debugfs(void) return 0; } machine_device_initcall(pseries, hash64_debugfs); - #endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c index 83a8be791e06..bfe4e8526b2d 100644 --- a/arch/powerpc/mm/hugetlbpage-book3e.c +++ b/arch/powerpc/mm/hugetlbpage-book3e.c @@ -148,16 +148,9 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea, mm = vma->vm_mm; -#ifdef CONFIG_PPC_MM_SLICES - psize = get_slice_psize(mm, ea); - tsize = mmu_get_tsize(psize); - shift = mmu_psize_defs[psize].shift; -#else psize = vma_mmu_pagesize(vma); shift = __ilog2(psize); tsize = shift - 10; -#endif - /* * We can't be interrupted while we're setting up the MAS * regusters or after we've confirmed that no tlb exists. diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/hugetlbpage-radix.c index 35254a678456..6575b9aabef4 100644 --- a/arch/powerpc/mm/hugetlbpage-radix.c +++ b/arch/powerpc/mm/hugetlbpage-radix.c @@ -50,9 +50,12 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, struct hstate *h = hstate_file(file); struct vm_unmapped_area_info info; + if (unlikely(addr > mm->context.addr_limit && addr < TASK_SIZE)) + mm->context.addr_limit = TASK_SIZE; + if (len & ~huge_page_mask(h)) return -EINVAL; - if (len > TASK_SIZE) + if (len > mm->task_size) return -ENOMEM; if (flags & MAP_FIXED) { @@ -64,7 +67,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (addr) { addr = ALIGN(addr, huge_page_size(h)); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && + if (mm->task_size - len >= addr && (!vma || addr + len <= vma->vm_start)) return addr; } @@ -78,5 +81,9 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr, info.high_limit = current->mm->mmap_base; info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; + + if (addr > DEFAULT_MAP_WINDOW) + info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW; + return vm_unmapped_area(&info); } diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8c3389cbcd12..a4f33de4008e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -753,6 +753,24 @@ static int __init add_huge_page_size(unsigned long long size) if ((mmu_psize = shift_to_mmu_psize(shift)) < 0) return -EINVAL; +#ifdef CONFIG_PPC_BOOK3S_64 + /* + * We need to make sure that for different page sizes reported by + * firmware we only add hugetlb support for page sizes that can be + * supported by linux page table layout. + * For now we have + * Radix: 2M + * Hash: 16M and 16G + */ + if (radix_enabled()) { + if (mmu_psize != MMU_PAGE_2M) + return -EINVAL; + } else { + if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G) + return -EINVAL; + } +#endif + BUG_ON(mmu_psize_defs[mmu_psize].shift != shift); /* Return if huge page size has already been setup */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index c22f207aa656..ec84b31c6c86 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -71,10 +71,6 @@ #if H_PGTABLE_RANGE > USER_VSID_RANGE #warning Limited user VSID range means pagetable space is wasted #endif - -#if (TASK_SIZE_USER64 < H_PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) -#warning TASK_SIZE is smaller than it needs to be. -#endif #endif /* CONFIG_PPC_STD_MMU_64 */ phys_addr_t memstart_addr = ~0; diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index a5d9ef59debe..9dbd2a733d6b 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -59,13 +59,14 @@ static inline int mmap_is_legacy(void) unsigned long arch_mmap_rnd(void) { - unsigned long rnd; + unsigned long shift, rnd; - /* 8MB for 32bit, 1GB for 64bit */ + shift = mmap_rnd_bits; +#ifdef CONFIG_COMPAT if (is_32bit_task()) - rnd = get_random_long() % (1<<(23-PAGE_SHIFT)); - else - rnd = get_random_long() % (1UL<<(30-PAGE_SHIFT)); + shift = mmap_rnd_compat_bits; +#endif + rnd = get_random_long() % (1ul << shift); return rnd << PAGE_SHIFT; } @@ -79,7 +80,7 @@ static inline unsigned long mmap_base(unsigned long rnd) else if (gap > MAX_GAP) gap = MAX_GAP; - return PAGE_ALIGN(TASK_SIZE - gap - rnd); + return PAGE_ALIGN(DEFAULT_MAP_WINDOW - gap - rnd); } #ifdef CONFIG_PPC_RADIX_MMU @@ -97,7 +98,11 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, struct vm_area_struct *vma; struct vm_unmapped_area_info info; - if (len > TASK_SIZE - mmap_min_addr) + if (unlikely(addr > mm->context.addr_limit && + mm->context.addr_limit != TASK_SIZE)) + mm->context.addr_limit = TASK_SIZE; + + if (len > mm->task_size - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) @@ -106,7 +111,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + if (mm->task_size - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) return addr; } @@ -114,8 +119,13 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr, info.flags = 0; info.length = len; info.low_limit = mm->mmap_base; - info.high_limit = TASK_SIZE; info.align_mask = 0; + + if (unlikely(addr > DEFAULT_MAP_WINDOW)) + info.high_limit = mm->context.addr_limit; + else + info.high_limit = DEFAULT_MAP_WINDOW; + return vm_unmapped_area(&info); } @@ -131,8 +141,12 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr = addr0; struct vm_unmapped_area_info info; + if (unlikely(addr > mm->context.addr_limit && + mm->context.addr_limit != TASK_SIZE)) + mm->context.addr_limit = TASK_SIZE; + /* requested length too big for entire address space */ - if (len > TASK_SIZE - mmap_min_addr) + if (len > mm->task_size - mmap_min_addr) return -ENOMEM; if (flags & MAP_FIXED) @@ -142,7 +156,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, if (addr) { addr = PAGE_ALIGN(addr); vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && addr >= mmap_min_addr && + if (mm->task_size - len >= addr && addr >= mmap_min_addr && (!vma || addr + len <= vma->vm_start)) return addr; } @@ -152,7 +166,14 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, info.low_limit = max(PAGE_SIZE, mmap_min_addr); info.high_limit = mm->mmap_base; info.align_mask = 0; + + if (addr > DEFAULT_MAP_WINDOW) + info.high_limit += mm->context.addr_limit - DEFAULT_MAP_WINDOW; + addr = vm_unmapped_area(&info); + if (!(addr & ~PAGE_MASK)) + return addr; + VM_BUG_ON(addr != -ENOMEM); /* * A failed mmap() very likely causes application failure, @@ -160,15 +181,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp, * can happen with large stack limits and large mmap() * allocations. */ - if (addr & ~PAGE_MASK) { - VM_BUG_ON(addr != -ENOMEM); - info.flags = 0; - info.low_limit = TASK_UNMAPPED_BASE; - info.high_limit = TASK_SIZE; - addr = vm_unmapped_area(&info); - } - - return addr; + return radix__arch_get_unmapped_area(filp, addr0, len, pgoff, flags); } static void radix__arch_pick_mmap_layout(struct mm_struct *mm, diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 73bf6e14c3aa..c6dca2ae78ef 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -30,17 +30,16 @@ static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); -int __init_new_context(void) +static int alloc_context_id(int min_id, int max_id) { - int index; - int err; + int index, err; again: if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL)) return -ENOMEM; spin_lock(&mmu_context_lock); - err = ida_get_new_above(&mmu_context_ida, 1, &index); + err = ida_get_new_above(&mmu_context_ida, min_id, &index); spin_unlock(&mmu_context_lock); if (err == -EAGAIN) @@ -48,7 +47,7 @@ again: else if (err) return err; - if (index > MAX_USER_CONTEXT) { + if (index > max_id) { spin_lock(&mmu_context_lock); ida_remove(&mmu_context_ida, index); spin_unlock(&mmu_context_lock); @@ -57,48 +56,105 @@ again: return index; } -EXPORT_SYMBOL_GPL(__init_new_context); -static int radix__init_new_context(struct mm_struct *mm, int index) + +void hash__reserve_context_id(int id) +{ + int rc, result = 0; + + do { + if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL)) + break; + + spin_lock(&mmu_context_lock); + rc = ida_get_new_above(&mmu_context_ida, id, &result); + spin_unlock(&mmu_context_lock); + } while (rc == -EAGAIN); + + WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result); +} + +int hash__alloc_context_id(void) +{ + unsigned long max; + + if (mmu_has_feature(MMU_FTR_68_BIT_VA)) + max = MAX_USER_CONTEXT; + else + max = MAX_USER_CONTEXT_65BIT_VA; + + return alloc_context_id(MIN_USER_CONTEXT, max); +} +EXPORT_SYMBOL_GPL(hash__alloc_context_id); + +static int hash__init_new_context(struct mm_struct *mm) +{ + int index; + + index = hash__alloc_context_id(); + if (index < 0) + return index; + + /* + * We do switch_slb() early in fork, even before we setup the + * mm->context.addr_limit. Default to max task size so that we copy the + * default values to paca which will help us to handle slb miss early. + */ + mm->context.addr_limit = TASK_SIZE_128TB; + + /* + * The old code would re-promote on fork, we don't do that when using + * slices as it could cause problem promoting slices that have been + * forced down to 4K. + * + * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check + * explicitly against context.id == 0. This ensures that we properly + * initialize context slice details for newly allocated mm's (which will + * have id == 0) and don't alter context slice inherited via fork (which + * will have id != 0). + * + * We should not be calling init_new_context() on init_mm. Hence a + * check against 0 is OK. + */ + if (mm->context.id == 0) + slice_set_user_psize(mm, mmu_virtual_psize); + + subpage_prot_init_new_context(mm); + + return index; +} + +static int radix__init_new_context(struct mm_struct *mm) { unsigned long rts_field; + int index; + + index = alloc_context_id(1, PRTB_ENTRIES - 1); + if (index < 0) + return index; /* * set the process table entry, */ rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); - return 0; + + mm->context.npu_context = NULL; + + return index; } int init_new_context(struct task_struct *tsk, struct mm_struct *mm) { int index; - index = __init_new_context(); + if (radix_enabled()) + index = radix__init_new_context(mm); + else + index = hash__init_new_context(mm); + if (index < 0) return index; - if (radix_enabled()) { - radix__init_new_context(mm, index); - } else { - - /* The old code would re-promote on fork, we don't do that - * when using slices as it could cause problem promoting slices - * that have been forced down to 4K - * - * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check - * explicitly against context.id == 0. This ensures that we - * properly initialize context slice details for newly allocated - * mm's (which will have id == 0) and don't alter context slice - * inherited via fork (which will have id != 0). - * - * We should not be calling init_new_context() on init_mm. Hence a - * check against 0 is ok. - */ - if (mm->context.id == 0) - slice_set_user_psize(mm, mmu_virtual_psize); - subpage_prot_init_new_context(mm); - } mm->context.id = index; #ifdef CONFIG_PPC_ICSWX mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 497130c5c742..e0a2d8e806ed 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -81,7 +81,7 @@ struct page *new_iommu_non_cma_page(struct page *page, unsigned long private, gfp_t gfp_mask = GFP_USER; struct page *new_page; - if (PageHuge(page) || PageTransHuge(page) || PageCompound(page)) + if (PageCompound(page)) return NULL; if (PageHighMem(page)) @@ -100,7 +100,7 @@ static int mm_iommu_move_page_from_cma(struct page *page) LIST_HEAD(cma_migrate_pages); /* Ignore huge pages for now */ - if (PageHuge(page) || PageTransHuge(page) || PageCompound(page)) + if (PageCompound(page)) return -EBUSY; lru_add_drain(); @@ -314,6 +314,25 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, } EXPORT_SYMBOL_GPL(mm_iommu_lookup); +struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, + unsigned long ua, unsigned long size) +{ + struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + + list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list, + next) { + if ((mem->ua <= ua) && + (ua + size <= mem->ua + + (mem->entries << PAGE_SHIFT))) { + ret = mem; + break; + } + } + + return ret; +} +EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm); + struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, unsigned long ua, unsigned long entries) { @@ -345,6 +364,26 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, } EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); +long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, + unsigned long ua, unsigned long *hpa) +{ + const long entry = (ua - mem->ua) >> PAGE_SHIFT; + void *va = &mem->hpas[entry]; + unsigned long *pa; + + if (entry >= mem->entries) + return -EFAULT; + + pa = (void *) vmalloc_to_phys(va); + if (!pa) + return -EFAULT; + + *hpa = *pa | (ua & ~PAGE_MASK); + + return 0; +} +EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm); + long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) { if (atomic64_inc_not_zero(&mem->mapped)) diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index c491f2c8f2b9..4554d6527682 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -333,11 +333,6 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) mm->context.id = MMU_NO_CONTEXT; mm->context.active = 0; - -#ifdef CONFIG_PPC_MM_SLICES - slice_set_user_psize(mm, mmu_virtual_psize); -#endif - return 0; } diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9befaee237d6..371792e4418f 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -875,13 +875,6 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - if (spanned_pages) - pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start_pfn << PAGE_SHIFT, - (end_pfn << PAGE_SHIFT) - 1); - else - pr_info("Initmem setup node %d\n", nid); - nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); nd = __va(nd_pa); diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 5e01b2ece1d0..654a0d7ba0e7 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -131,7 +131,7 @@ static void __slb_flush_and_rebolt(void) "slbmte %2,%3\n" "isync" :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)), - "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, 1)), + "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, VMALLOC_INDEX)), "r"(ksp_vsid_data), "r"(ksp_esid_data) : "memory"); @@ -229,7 +229,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) asm volatile("slbie %0" : : "r" (slbie_data)); get_paca()->slb_cache_ptr = 0; - copy_mm_to_paca(&mm->context); + copy_mm_to_paca(mm); /* * preload some userspace segments into the SLB. diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index a85e06ea6c20..1519617aab36 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -23,6 +23,48 @@ #include <asm/pgtable.h> #include <asm/firmware.h> +/* + * This macro generates asm code to compute the VSID scramble + * function. Used in slb_allocate() and do_stab_bolted. The function + * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS + * + * rt = register containing the proto-VSID and into which the + * VSID will be stored + * rx = scratch register (clobbered) + * rf = flags + * + * - rt and rx must be different registers + * - The answer will end up in the low VSID_BITS bits of rt. The higher + * bits may contain other garbage, so you may need to mask the + * result. + */ +#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \ + lis rx,VSID_MULTIPLIER_##size@h; \ + ori rx,rx,VSID_MULTIPLIER_##size@l; \ + mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \ +/* \ + * powermac get slb fault before feature fixup, so make 65 bit part \ + * the default part of feature fixup \ + */ \ +BEGIN_MMU_FTR_SECTION \ + srdi rx,rt,VSID_BITS_65_##size; \ + clrldi rt,rt,(64-VSID_BITS_65_##size); \ + add rt,rt,rx; \ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS_65_##size; \ + add rt,rt,rx; \ + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \ +MMU_FTR_SECTION_ELSE \ + srdi rx,rt,VSID_BITS_##size; \ + clrldi rt,rt,(64-VSID_BITS_##size); \ + add rt,rt,rx; /* add high and low bits */ \ + addi rx,rt,1; \ + srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \ + add rt,rt,rx; \ + rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \ +ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA) + + /* void slb_allocate_realmode(unsigned long ea); * * Create an SLB entry for the given EA (user or kernel). @@ -45,13 +87,6 @@ _GLOBAL(slb_allocate_realmode) /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */ blt cr7,0f /* user or kernel? */ - /* kernel address: proto-VSID = ESID */ - /* WARNING - MAGIC: we don't use the VSID 0xfffffffff, but - * this code will generate the protoVSID 0xfffffffff for the - * top segment. That's ok, the scramble below will translate - * it to VSID 0, which is reserved as a bad VSID - one which - * will never have any pages in it. */ - /* Check if hitting the linear mapping or some other kernel space */ bne cr7,1f @@ -63,12 +98,10 @@ _GLOBAL(slb_allocate_realmode) slb_miss_kernel_load_linear: li r11,0 /* - * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * context = (ea >> 60) - (0xc - 1) * r9 = region id. */ - addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha - addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l - + subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET BEGIN_FTR_SECTION b .Lslb_finish_load @@ -77,9 +110,9 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) 1: #ifdef CONFIG_SPARSEMEM_VMEMMAP - /* Check virtual memmap region. To be patches at kernel boot */ cmpldi cr0,r9,0xf bne 1f +/* Check virtual memmap region. To be patched at kernel boot */ .globl slb_miss_kernel_load_vmemmap slb_miss_kernel_load_vmemmap: li r11,0 @@ -102,11 +135,10 @@ slb_miss_kernel_load_io: li r11,0 6: /* - * context = (MAX_USER_CONTEXT) + ((ea >> 60) - 0xc) + 1 + * context = (ea >> 60) - (0xc - 1) * r9 = region id. */ - addis r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@ha - addi r9,r9,(MAX_USER_CONTEXT - 0xc + 1)@l + subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET BEGIN_FTR_SECTION b .Lslb_finish_load @@ -117,7 +149,13 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT) * For userspace addresses, make sure this is region 0. */ cmpdi r9, 0 - bne 8f + bne- 8f + /* + * user space make sure we are within the allowed limit + */ + ld r11,PACA_ADDR_LIMIT(r13) + cmpld r3,r11 + bge- 8f /* when using slices, we extract the psize off the slice bitmaps * and then we need to get the sllp encoding off the mmu_psize_defs @@ -189,13 +227,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT) */ .Lslb_finish_load: rldimi r10,r9,ESID_BITS,0 - ASM_VSID_SCRAMBLE(r10,r9,256M) - /* - * bits above VSID_BITS_256M need to be ignored from r10 - * also combine VSID and flags - */ - rldimi r11,r10,SLB_VSID_SHIFT,(64 - (SLB_VSID_SHIFT + VSID_BITS_256M)) - + ASM_VSID_SCRAMBLE(r10,r9,r11,256M) /* r3 = EA, r11 = VSID data */ /* * Find a slot, round robin. Previously we tried to find a @@ -259,12 +291,12 @@ slb_compare_rr_to_size: .Lslb_finish_load_1T: srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */ rldimi r10,r9,ESID_BITS_1T,0 - ASM_VSID_SCRAMBLE(r10,r9,1T) + ASM_VSID_SCRAMBLE(r10,r9,r11,1T) /* * bits above VSID_BITS_1T need to be ignored from r10 * also combine VSID and flags */ - rldimi r11,r10,SLB_VSID_SHIFT_1T,(64 - (SLB_VSID_SHIFT_1T + VSID_BITS_1T)) + li r10,MMU_SEGSIZE_1T rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */ diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 2b27458902ee..966b9fccfa66 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -36,38 +36,29 @@ #include <asm/copro.h> #include <asm/hugetlb.h> -/* some sanity checks */ -#if (H_PGTABLE_RANGE >> 43) > SLICE_MASK_SIZE -#error H_PGTABLE_RANGE exceeds slice_mask high_slices size -#endif - static DEFINE_SPINLOCK(slice_convert_lock); - +/* + * One bit per slice. We have lower slices which cover 256MB segments + * upto 4G range. That gets us 16 low slices. For the rest we track slices + * in 1TB size. + */ +struct slice_mask { + u64 low_slices; + DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH); +}; #ifdef DEBUG int _slice_debug = 1; static void slice_print_mask(const char *label, struct slice_mask mask) { - char *p, buf[16 + 3 + 64 + 1]; - int i; - if (!_slice_debug) return; - p = buf; - for (i = 0; i < SLICE_NUM_LOW; i++) - *(p++) = (mask.low_slices & (1 << i)) ? '1' : '0'; - *(p++) = ' '; - *(p++) = '-'; - *(p++) = ' '; - for (i = 0; i < SLICE_NUM_HIGH; i++) - *(p++) = (mask.high_slices & (1ul << i)) ? '1' : '0'; - *(p++) = 0; - - printk(KERN_DEBUG "%s:%s\n", label, buf); + pr_devel("%s low_slice: %*pbl\n", label, (int)SLICE_NUM_LOW, &mask.low_slices); + pr_devel("%s high_slice: %*pbl\n", label, (int)SLICE_NUM_HIGH, mask.high_slices); } -#define slice_dbg(fmt...) do { if (_slice_debug) pr_debug(fmt); } while(0) +#define slice_dbg(fmt...) do { if (_slice_debug) pr_devel(fmt); } while (0) #else @@ -76,25 +67,28 @@ static void slice_print_mask(const char *label, struct slice_mask mask) {} #endif -static struct slice_mask slice_range_to_mask(unsigned long start, - unsigned long len) +static void slice_range_to_mask(unsigned long start, unsigned long len, + struct slice_mask *ret) { unsigned long end = start + len - 1; - struct slice_mask ret = { 0, 0 }; + + ret->low_slices = 0; + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); if (start < SLICE_LOW_TOP) { - unsigned long mend = min(end, SLICE_LOW_TOP); - unsigned long mstart = min(start, SLICE_LOW_TOP); + unsigned long mend = min(end, (SLICE_LOW_TOP - 1)); - ret.low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) - - (1u << GET_LOW_SLICE_INDEX(mstart)); + ret->low_slices = (1u << (GET_LOW_SLICE_INDEX(mend) + 1)) + - (1u << GET_LOW_SLICE_INDEX(start)); } - if ((start + len) > SLICE_LOW_TOP) - ret.high_slices = (1ul << (GET_HIGH_SLICE_INDEX(end) + 1)) - - (1ul << GET_HIGH_SLICE_INDEX(start)); + if ((start + len) > SLICE_LOW_TOP) { + unsigned long start_index = GET_HIGH_SLICE_INDEX(start); + unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT)); + unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index; - return ret; + bitmap_set(ret->high_slices, start_index, count); + } } static int slice_area_is_free(struct mm_struct *mm, unsigned long addr, @@ -128,53 +122,60 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) return !slice_area_is_free(mm, start, end - start); } -static struct slice_mask slice_mask_for_free(struct mm_struct *mm) +static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret) { - struct slice_mask ret = { 0, 0 }; unsigned long i; + ret->low_slices = 0; + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + for (i = 0; i < SLICE_NUM_LOW; i++) if (!slice_low_has_vma(mm, i)) - ret.low_slices |= 1u << i; + ret->low_slices |= 1u << i; if (mm->task_size <= SLICE_LOW_TOP) - return ret; + return; - for (i = 0; i < SLICE_NUM_HIGH; i++) + for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) if (!slice_high_has_vma(mm, i)) - ret.high_slices |= 1ul << i; - - return ret; + __set_bit(i, ret->high_slices); } -static struct slice_mask slice_mask_for_size(struct mm_struct *mm, int psize) +static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret) { unsigned char *hpsizes; int index, mask_index; - struct slice_mask ret = { 0, 0 }; unsigned long i; u64 lpsizes; + ret->low_slices = 0; + bitmap_zero(ret->high_slices, SLICE_NUM_HIGH); + lpsizes = mm->context.low_slices_psize; for (i = 0; i < SLICE_NUM_LOW; i++) if (((lpsizes >> (i * 4)) & 0xf) == psize) - ret.low_slices |= 1u << i; + ret->low_slices |= 1u << i; hpsizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) { + for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) { mask_index = i & 0x1; index = i >> 1; if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) - ret.high_slices |= 1ul << i; + __set_bit(i, ret->high_slices); } - - return ret; } -static int slice_check_fit(struct slice_mask mask, struct slice_mask available) +static int slice_check_fit(struct mm_struct *mm, + struct slice_mask mask, struct slice_mask available) { + DECLARE_BITMAP(result, SLICE_NUM_HIGH); + unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.addr_limit); + + bitmap_and(result, mask.high_slices, + available.high_slices, slice_count); + return (mask.low_slices & available.low_slices) == mask.low_slices && - (mask.high_slices & available.high_slices) == mask.high_slices; + bitmap_equal(result, mask.high_slices, slice_count); } static void slice_flush_segments(void *parm) @@ -185,7 +186,7 @@ static void slice_flush_segments(void *parm) if (mm != current->active_mm) return; - copy_mm_to_paca(¤t->active_mm->context); + copy_mm_to_paca(current->active_mm); local_irq_save(flags); slb_flush_and_rebolt(); @@ -218,18 +219,18 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz mm->context.low_slices_psize = lpsizes; hpsizes = mm->context.high_slices_psize; - for (i = 0; i < SLICE_NUM_HIGH; i++) { + for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.addr_limit); i++) { mask_index = i & 0x1; index = i >> 1; - if (mask.high_slices & (1ul << i)) + if (test_bit(i, mask.high_slices)) hpsizes[index] = (hpsizes[index] & ~(0xf << (mask_index * 4))) | (((unsigned long)psize) << (mask_index * 4)); } slice_dbg(" lsps=%lx, hsps=%lx\n", - mm->context.low_slices_psize, - mm->context.high_slices_psize); + (unsigned long)mm->context.low_slices_psize, + (unsigned long)mm->context.high_slices_psize); spin_unlock_irqrestore(&slice_convert_lock, flags); @@ -257,14 +258,14 @@ static bool slice_scan_available(unsigned long addr, slice = GET_HIGH_SLICE_INDEX(addr); *boundary_addr = (slice + end) ? ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP; - return !!(available.high_slices & (1ul << slice)); + return !!test_bit(slice, available.high_slices); } } static unsigned long slice_find_area_bottomup(struct mm_struct *mm, unsigned long len, struct slice_mask available, - int psize) + int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long addr, found, next_end; @@ -276,7 +277,10 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, info.align_offset = 0; addr = TASK_UNMAPPED_BASE; - while (addr < TASK_SIZE) { + /* + * Check till the allow max value for this mmap request + */ + while (addr < high_limit) { info.low_limit = addr; if (!slice_scan_available(addr, available, 1, &addr)) continue; @@ -288,8 +292,8 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, * Check if we need to reduce the range, or if we can * extend it to cover the next available slice. */ - if (addr >= TASK_SIZE) - addr = TASK_SIZE; + if (addr >= high_limit) + addr = high_limit; else if (slice_scan_available(addr, available, 1, &next_end)) { addr = next_end; goto next_slice; @@ -307,7 +311,7 @@ static unsigned long slice_find_area_bottomup(struct mm_struct *mm, static unsigned long slice_find_area_topdown(struct mm_struct *mm, unsigned long len, struct slice_mask available, - int psize) + int psize, unsigned long high_limit) { int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); unsigned long addr, found, prev; @@ -319,6 +323,15 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, info.align_offset = 0; addr = mm->mmap_base; + /* + * If we are trying to allocate above DEFAULT_MAP_WINDOW + * Add the different to the mmap_base. + * Only for that request for which high_limit is above + * DEFAULT_MAP_WINDOW we should apply this. + */ + if (high_limit > DEFAULT_MAP_WINDOW) + addr += mm->context.addr_limit - DEFAULT_MAP_WINDOW; + while (addr > PAGE_SIZE) { info.high_limit = addr; if (!slice_scan_available(addr - 1, available, 0, &addr)) @@ -350,29 +363,38 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm, * can happen with large stack limits and large mmap() * allocations. */ - return slice_find_area_bottomup(mm, len, available, psize); + return slice_find_area_bottomup(mm, len, available, psize, high_limit); } static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len, struct slice_mask mask, int psize, - int topdown) + int topdown, unsigned long high_limit) { if (topdown) - return slice_find_area_topdown(mm, len, mask, psize); + return slice_find_area_topdown(mm, len, mask, psize, high_limit); else - return slice_find_area_bottomup(mm, len, mask, psize); + return slice_find_area_bottomup(mm, len, mask, psize, high_limit); } -#define or_mask(dst, src) do { \ - (dst).low_slices |= (src).low_slices; \ - (dst).high_slices |= (src).high_slices; \ -} while (0) +static inline void slice_or_mask(struct slice_mask *dst, struct slice_mask *src) +{ + DECLARE_BITMAP(result, SLICE_NUM_HIGH); + + dst->low_slices |= src->low_slices; + bitmap_or(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); + bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); +} -#define andnot_mask(dst, src) do { \ - (dst).low_slices &= ~(src).low_slices; \ - (dst).high_slices &= ~(src).high_slices; \ -} while (0) +static inline void slice_andnot_mask(struct slice_mask *dst, struct slice_mask *src) +{ + DECLARE_BITMAP(result, SLICE_NUM_HIGH); + + dst->low_slices &= ~src->low_slices; + + bitmap_andnot(result, dst->high_slices, src->high_slices, SLICE_NUM_HIGH); + bitmap_copy(dst->high_slices, result, SLICE_NUM_HIGH); +} #ifdef CONFIG_PPC_64K_PAGES #define MMU_PAGE_BASE MMU_PAGE_64K @@ -384,14 +406,43 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, unsigned long flags, unsigned int psize, int topdown) { - struct slice_mask mask = {0, 0}; + struct slice_mask mask; struct slice_mask good_mask; - struct slice_mask potential_mask = {0,0} /* silence stupid warning */; - struct slice_mask compat_mask = {0, 0}; + struct slice_mask potential_mask; + struct slice_mask compat_mask; int fixed = (flags & MAP_FIXED); int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT); struct mm_struct *mm = current->mm; unsigned long newaddr; + unsigned long high_limit; + + /* + * Check if we need to expland slice area. + */ + if (unlikely(addr > mm->context.addr_limit && + mm->context.addr_limit != TASK_SIZE)) { + mm->context.addr_limit = TASK_SIZE; + on_each_cpu(slice_flush_segments, mm, 1); + } + /* + * This mmap request can allocate upt to 512TB + */ + if (addr > DEFAULT_MAP_WINDOW) + high_limit = mm->context.addr_limit; + else + high_limit = DEFAULT_MAP_WINDOW; + /* + * init different masks + */ + mask.low_slices = 0; + bitmap_zero(mask.high_slices, SLICE_NUM_HIGH); + + /* silence stupid warning */; + potential_mask.low_slices = 0; + bitmap_zero(potential_mask.high_slices, SLICE_NUM_HIGH); + + compat_mask.low_slices = 0; + bitmap_zero(compat_mask.high_slices, SLICE_NUM_HIGH); /* Sanity checks */ BUG_ON(mm->task_size == 0); @@ -423,7 +474,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - good_mask = slice_mask_for_size(mm, psize); + slice_mask_for_size(mm, psize, &good_mask); slice_print_mask(" good_mask", good_mask); /* @@ -448,22 +499,22 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, #ifdef CONFIG_PPC_64K_PAGES /* If we support combo pages, we can allow 64k pages in 4k slices */ if (psize == MMU_PAGE_64K) { - compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K); + slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask); if (fixed) - or_mask(good_mask, compat_mask); + slice_or_mask(&good_mask, &compat_mask); } #endif /* First check hint if it's valid or if we have MAP_FIXED */ if (addr != 0 || fixed) { /* Build a mask for the requested range */ - mask = slice_range_to_mask(addr, len); + slice_range_to_mask(addr, len, &mask); slice_print_mask(" mask", mask); /* Check if we fit in the good mask. If we do, we just return, * nothing else to do */ - if (slice_check_fit(mask, good_mask)) { + if (slice_check_fit(mm, mask, good_mask)) { slice_dbg(" fits good !\n"); return addr; } @@ -471,7 +522,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing * slices for that size */ - newaddr = slice_find_area(mm, len, good_mask, psize, topdown); + newaddr = slice_find_area(mm, len, good_mask, + psize, topdown, high_limit); if (newaddr != -ENOMEM) { /* Found within the good mask, we don't have to setup, * we thus return directly @@ -484,11 +536,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* We don't fit in the good mask, check what other slices are * empty and thus can be converted */ - potential_mask = slice_mask_for_free(mm); - or_mask(potential_mask, good_mask); + slice_mask_for_free(mm, &potential_mask); + slice_or_mask(&potential_mask, &good_mask); slice_print_mask(" potential", potential_mask); - if ((addr != 0 || fixed) && slice_check_fit(mask, potential_mask)) { + if ((addr != 0 || fixed) && slice_check_fit(mm, mask, potential_mask)) { slice_dbg(" fits potential !\n"); goto convert; } @@ -503,7 +555,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, * anywhere in the good area. */ if (addr) { - addr = slice_find_area(mm, len, good_mask, psize, topdown); + addr = slice_find_area(mm, len, good_mask, + psize, topdown, high_limit); if (addr != -ENOMEM) { slice_dbg(" found area at 0x%lx\n", addr); return addr; @@ -513,28 +566,29 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* Now let's see if we can find something in the existing slices * for that size plus free slices */ - addr = slice_find_area(mm, len, potential_mask, psize, topdown); + addr = slice_find_area(mm, len, potential_mask, + psize, topdown, high_limit); #ifdef CONFIG_PPC_64K_PAGES if (addr == -ENOMEM && psize == MMU_PAGE_64K) { /* retry the search with 4k-page slices included */ - or_mask(potential_mask, compat_mask); - addr = slice_find_area(mm, len, potential_mask, psize, - topdown); + slice_or_mask(&potential_mask, &compat_mask); + addr = slice_find_area(mm, len, potential_mask, + psize, topdown, high_limit); } #endif if (addr == -ENOMEM) return -ENOMEM; - mask = slice_range_to_mask(addr, len); + slice_range_to_mask(addr, len, &mask); slice_dbg(" found potential area at 0x%lx\n", addr); slice_print_mask(" mask", mask); convert: - andnot_mask(mask, good_mask); - andnot_mask(mask, compat_mask); - if (mask.low_slices || mask.high_slices) { + slice_andnot_mask(&mask, &good_mask); + slice_andnot_mask(&mask, &compat_mask); + if (mask.low_slices || !bitmap_empty(mask.high_slices, SLICE_NUM_HIGH)) { slice_convert(mm, mask, psize); if (psize > MMU_PAGE_BASE) on_each_cpu(slice_flush_segments, mm, 1); @@ -649,8 +703,8 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) slice_dbg(" lsps=%lx, hsps=%lx\n", - mm->context.low_slices_psize, - mm->context.high_slices_psize); + (unsigned long)mm->context.low_slices_psize, + (unsigned long)mm->context.high_slices_psize); bail: spin_unlock_irqrestore(&slice_convert_lock, flags); @@ -659,9 +713,11 @@ void slice_set_user_psize(struct mm_struct *mm, unsigned int psize) void slice_set_range_psize(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned int psize) { - struct slice_mask mask = slice_range_to_mask(start, len); + struct slice_mask mask; VM_BUG_ON(radix_enabled()); + + slice_range_to_mask(start, len, &mask); slice_convert(mm, mask, psize); } @@ -694,14 +750,14 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, if (radix_enabled()) return 0; - mask = slice_range_to_mask(addr, len); - available = slice_mask_for_size(mm, psize); + slice_range_to_mask(addr, len, &mask); + slice_mask_for_size(mm, psize, &available); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ if (psize == MMU_PAGE_64K) { struct slice_mask compat_mask; - compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K); - or_mask(available, compat_mask); + slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask); + slice_or_mask(&available, &compat_mask); } #endif @@ -711,6 +767,6 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, slice_print_mask(" mask", mask); slice_print_mask(" available", available); #endif - return !slice_check_fit(mask, available); + return !slice_check_fit(mm, mask, available); } #endif diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/subpage-prot.c index 94210940112f..e94fbd4c8845 100644 --- a/arch/powerpc/mm/subpage-prot.c +++ b/arch/powerpc/mm/subpage-prot.c @@ -197,7 +197,8 @@ long sys_subpage_prot(unsigned long addr, unsigned long len, u32 __user *map) /* Check parameters */ if ((addr & ~PAGE_MASK) || (len & ~PAGE_MASK) || - addr >= TASK_SIZE || len >= TASK_SIZE || addr + len > TASK_SIZE) + addr >= mm->task_size || len >= mm->task_size || + addr + len > mm->task_size) return -EINVAL; if (is_hugepage_only_range(mm, addr, len)) diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 952713d6cf04..02e71402fdd3 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -17,7 +17,6 @@ #include <asm/tlb.h> #include <asm/tlbflush.h> -static DEFINE_RAW_SPINLOCK(native_tlbie_lock); #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -34,10 +33,8 @@ static inline void __tlbiel_pid(unsigned long pid, int set, prs = 1; /* process scoped */ r = 1; /* raidx format */ - asm volatile("ptesync": : :"memory"); asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1) : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - asm volatile("ptesync": : :"memory"); } /* @@ -47,9 +44,33 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) { int set; - for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) { + asm volatile("ptesync": : :"memory"); + + /* + * Flush the first set of the TLB, and if we're doing a RIC_FLUSH_ALL, + * also flush the entire Page Walk Cache. + */ + __tlbiel_pid(pid, 0, ric); + + if (ric == RIC_FLUSH_ALL) + /* For the remaining sets, just flush the TLB */ + ric = RIC_FLUSH_TLB; + + for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) __tlbiel_pid(pid, set, ric); - } + + asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); +} + +static inline void tlbiel_pwc(unsigned long pid) +{ + asm volatile("ptesync": : :"memory"); + + /* For PWC flush, we don't look at set number */ + __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); + + asm volatile("ptesync": : :"memory"); asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); } @@ -129,12 +150,18 @@ void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) { unsigned long pid; struct mm_struct *mm = tlb->mm; + /* + * If we are doing a full mm flush, we will do a tlb flush + * with RIC_FLUSH_ALL later. + */ + if (tlb->fullmm) + return; preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid, RIC_FLUSH_PWC); + tlbiel_pwc(pid); preempt_enable(); } @@ -175,15 +202,9 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_thread_local(mm)) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + if (!mm_is_thread_local(mm)) _tlbie_pid(pid, RIC_FLUSH_ALL); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } else + else _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); @@ -195,22 +216,22 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) unsigned long pid; struct mm_struct *mm = tlb->mm; + /* + * If we are doing a full mm flush, we will do a tlb flush + * with RIC_FLUSH_ALL later. + */ + if (tlb->fullmm) + return; preempt_disable(); pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_thread_local(mm)) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + if (!mm_is_thread_local(mm)) _tlbie_pid(pid, RIC_FLUSH_PWC); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } else - _tlbiel_pid(pid, RIC_FLUSH_PWC); + else + tlbiel_pwc(pid); no_context: preempt_enable(); } @@ -226,15 +247,9 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, pid = mm ? mm->context.id : 0; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; - if (!mm_is_thread_local(mm)) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + if (!mm_is_thread_local(mm)) _tlbie_va(vmaddr, pid, ap, RIC_FLUSH_TLB); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } else + else _tlbiel_va(vmaddr, pid, ap, RIC_FLUSH_TLB); bail: preempt_enable(); @@ -255,13 +270,7 @@ EXPORT_SYMBOL(radix__flush_tlb_page); void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); - - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); _tlbie_pid(0, RIC_FLUSH_ALL); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); } EXPORT_SYMBOL(radix__flush_tlb_kernel_range); @@ -323,7 +332,6 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, unsigned long addr; int local = mm_is_thread_local(mm); unsigned long ap = mmu_get_ap(psize); - int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; @@ -344,13 +352,8 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, if (local) _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); - else { - if (lock_tlbie) - raw_spin_lock(&native_tlbie_lock); + else _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); - if (lock_tlbie) - raw_spin_unlock(&native_tlbie_lock); - } } err_out: preempt_enable(); @@ -437,7 +440,7 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, return; } - if (old_pte & _PAGE_LARGE) + if (old_pte & R_PAGE_LARGE) radix__flush_tlb_page_psize(mm, address, MMU_PAGE_2M); else radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index ba28fcb98597..bfc4a0869609 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -770,7 +770,7 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, * avoid going over total available memory just in case... */ #ifdef CONFIG_PPC_FSL_BOOK3E - if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { + if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { unsigned long linear_sz; unsigned int num_cams; |