From 55052eeca6d71d76f7c3f156c0501814d8e5e6d3 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 7 Apr 2010 14:39:36 +1000 Subject: powerpc: Fix ioremap_flags() with book3e pte definition We can't just clear the user read permission in book3e pte, because that will also clear supervisor read permission. This surely isn't desired. Fix the problem by adding the supervisor read back. BenH: Slightly simplified the ifdef and applied to ppc64 too Signed-off-by: Li Yang Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/pgtable_32.c | 8 ++++++++ arch/powerpc/mm/pgtable_64.c | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index b9243e7557ae..767b0cf17d33 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -146,6 +146,14 @@ ioremap_flags(phys_addr_t addr, unsigned long size, unsigned long flags) /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC); +#ifdef _PAGE_BAP_SR + /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format + * which means that we just cleared supervisor access... oops ;-) This + * restores it + */ + flags |= _PAGE_BAP_SR; +#endif + return __ioremap_caller(addr, size, flags, __builtin_return_address(0)); } EXPORT_SYMBOL(ioremap_flags); diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index d95679a5fb29..d050fc8d9714 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -265,6 +265,14 @@ void __iomem * ioremap_flags(phys_addr_t addr, unsigned long size, /* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */ flags &= ~(_PAGE_USER | _PAGE_EXEC); +#ifdef _PAGE_BAP_SR + /* _PAGE_USER contains _PAGE_BAP_SR on BookE using the new PTE format + * which means that we just cleared supervisor access... oops ;-) This + * restores it + */ + flags |= _PAGE_BAP_SR; +#endif + if (ppc_md.ioremap) return ppc_md.ioremap(addr, size, flags, caller); return __ioremap_caller(addr, size, flags, caller); -- cgit v1.2.3 From 9c7cc234dc5edf5379fbbab4973f6704f59bc57b Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 29 Mar 2010 23:59:25 +0000 Subject: powerpc: Disable interrupts for data breakpoint exceptions Data address breakpoint exceptions are currently handled along with page-faults which require interrupts to remain in enabled state. Since exception handling for data breakpoints aren't pre-empt safe, we handle them separately. Signed-off-by: K.Prasad Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/fault.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 26fb6b990b0a..83ac4935eb10 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -151,13 +151,14 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!user_mode(regs) && (address >= TASK_SIZE)) return SIGSEGV; -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ + defined(CONFIG_PPC_BOOK3S_64)) if (error_code & DSISR_DABRMATCH) { /* DABR match */ do_dabr(regs, address, error_code); return 0; } -#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ +#endif if (in_atomic() || mm == NULL) { if (!user_mode(regs)) -- cgit v1.2.3 From e7f75ad01d590243904c2d95ab47e6b2e9ef6dad Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Fri, 5 Mar 2010 10:43:12 +0000 Subject: powerpc/47x: Base ppc476 support This patch adds the base support for the 476 processor. The code was primarily written by Ben Herrenschmidt and Torez Smith, but I've been maintaining it for a while. The goal is to have a single binary that will run on 44x and 47x, but we still have some details to work out. The biggest is that the L1 cache line size differs on the two platforms, but it's currently a compile-time option. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Torez Smith Signed-off-by: Dave Kleikamp Signed-off-by: Josh Boyer --- arch/powerpc/mm/44x_mmu.c | 144 ++++++++++++++++++++++++++++++++++- arch/powerpc/mm/mmu_context_nohash.c | 8 ++ arch/powerpc/mm/mmu_decl.h | 7 +- arch/powerpc/mm/tlb_nohash_low.S | 118 +++++++++++++++++++++++++--- 4 files changed, 255 insertions(+), 22 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c index 3986264b0993..d8c6efb32bc6 100644 --- a/arch/powerpc/mm/44x_mmu.c +++ b/arch/powerpc/mm/44x_mmu.c @@ -38,7 +38,9 @@ unsigned int tlb_44x_index; /* = 0 */ unsigned int tlb_44x_hwater = PPC44x_TLB_SIZE - 1 - PPC44x_EARLY_TLBS; int icache_44x_need_flush; -static void __init ppc44x_update_tlb_hwater(void) +unsigned long tlb_47x_boltmap[1024/8]; + +static void __cpuinit ppc44x_update_tlb_hwater(void) { extern unsigned int tlb_44x_patch_hwater_D[]; extern unsigned int tlb_44x_patch_hwater_I[]; @@ -59,7 +61,7 @@ static void __init ppc44x_update_tlb_hwater(void) } /* - * "Pins" a 256MB TLB entry in AS0 for kernel lowmem + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 44x type MMU */ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) { @@ -67,12 +69,18 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) ppc44x_update_tlb_hwater(); + mtspr(SPRN_MMUCR, 0); + __asm__ __volatile__( "tlbwe %2,%3,%4\n" "tlbwe %1,%3,%5\n" "tlbwe %0,%3,%6\n" : +#ifdef CONFIG_PPC47x + : "r" (PPC47x_TLB2_S_RWX), +#else : "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), +#endif "r" (phys), "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M), "r" (entry), @@ -81,8 +89,93 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys) "i" (PPC44x_TLB_ATTRIB)); } +static int __init ppc47x_find_free_bolted(void) +{ + unsigned int mmube0 = mfspr(SPRN_MMUBE0); + unsigned int mmube1 = mfspr(SPRN_MMUBE1); + + if (!(mmube0 & MMUBE0_VBE0)) + return 0; + if (!(mmube0 & MMUBE0_VBE1)) + return 1; + if (!(mmube0 & MMUBE0_VBE2)) + return 2; + if (!(mmube1 & MMUBE1_VBE3)) + return 3; + if (!(mmube1 & MMUBE1_VBE4)) + return 4; + if (!(mmube1 & MMUBE1_VBE5)) + return 5; + return -1; +} + +static void __init ppc47x_update_boltmap(void) +{ + unsigned int mmube0 = mfspr(SPRN_MMUBE0); + unsigned int mmube1 = mfspr(SPRN_MMUBE1); + + if (mmube0 & MMUBE0_VBE0) + __set_bit((mmube0 >> MMUBE0_IBE0_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube0 & MMUBE0_VBE1) + __set_bit((mmube0 >> MMUBE0_IBE1_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube0 & MMUBE0_VBE2) + __set_bit((mmube0 >> MMUBE0_IBE2_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE3) + __set_bit((mmube1 >> MMUBE1_IBE3_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE4) + __set_bit((mmube1 >> MMUBE1_IBE4_SHIFT) & 0xff, + tlb_47x_boltmap); + if (mmube1 & MMUBE1_VBE5) + __set_bit((mmube1 >> MMUBE1_IBE5_SHIFT) & 0xff, + tlb_47x_boltmap); +} + +/* + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem for 47x type MMU + */ +static void __cpuinit ppc47x_pin_tlb(unsigned int virt, unsigned int phys) +{ + unsigned int rA; + int bolted; + + /* Base rA is HW way select, way 0, bolted bit set */ + rA = 0x88000000; + + /* Look for a bolted entry slot */ + bolted = ppc47x_find_free_bolted(); + BUG_ON(bolted < 0); + + /* Insert bolted slot number */ + rA |= bolted << 24; + + pr_debug("256M TLB entry for 0x%08x->0x%08x in bolt slot %d\n", + virt, phys, bolted); + + mtspr(SPRN_MMUCR, 0); + + __asm__ __volatile__( + "tlbwe %2,%3,0\n" + "tlbwe %1,%3,1\n" + "tlbwe %0,%3,2\n" + : + : "r" (PPC47x_TLB2_SW | PPC47x_TLB2_SR | + PPC47x_TLB2_SX +#ifdef CONFIG_SMP + | PPC47x_TLB2_M +#endif + ), + "r" (phys), + "r" (virt | PPC47x_TLB0_VALID | PPC47x_TLB0_256M), + "r" (rA)); +} + void __init MMU_init_hw(void) { + /* This is not useful on 47x but won't hurt either */ ppc44x_update_tlb_hwater(); flush_instruction_cache(); @@ -95,8 +188,51 @@ unsigned long __init mmu_mapin_ram(unsigned long top) /* Pin in enough TLBs to cover any lowmem not covered by the * initial 256M mapping established in head_44x.S */ for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; - addr += PPC_PIN_SIZE) - ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + addr += PPC_PIN_SIZE) { + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); + else + ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + } + if (mmu_has_feature(MMU_FTR_TYPE_47x)) { + ppc47x_update_boltmap(); +#ifdef DEBUG + { + int i; + + printk(KERN_DEBUG "bolted entries: "); + for (i = 0; i < 255; i++) { + if (test_bit(i, tlb_47x_boltmap)) + printk("%d ", i); + } + printk("\n"); + } +#endif /* DEBUG */ + } return total_lowmem; } + +#ifdef CONFIG_SMP +void __cpuinit mmu_init_secondary(int cpu) +{ + unsigned long addr; + + /* Pin in enough TLBs to cover any lowmem not covered by the + * initial 256M mapping established in head_44x.S + * + * WARNING: This is called with only the first 256M of the + * linear mapping in the TLB and we can't take faults yet + * so beware of what this code uses. It runs off a temporary + * stack. current (r2) isn't initialized, smp_processor_id() + * will not work, current thread info isn't accessible, ... + */ + for (addr = PPC_PIN_SIZE; addr < lowmem_end_addr; + addr += PPC_PIN_SIZE) { + if (mmu_has_feature(MMU_FTR_TYPE_47x)) + ppc47x_pin_tlb(addr + PAGE_OFFSET, addr); + else + ppc44x_pin_tlb(addr + PAGE_OFFSET, addr); + } +} +#endif /* CONFIG_SMP */ diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/mmu_context_nohash.c index 1f2d9ff09895..ddfd7ad4e1d6 100644 --- a/arch/powerpc/mm/mmu_context_nohash.c +++ b/arch/powerpc/mm/mmu_context_nohash.c @@ -395,10 +395,18 @@ void __init mmu_context_init(void) * the PID/TID comparison is disabled, so we can use a TID of zero * to represent all kernel pages as shared among all contexts. * -- Dan + * + * The IBM 47x core supports 16-bit PIDs, thus 65535 contexts. We + * should normally never have to steal though the facility is + * present if needed. + * -- BenH */ if (mmu_has_feature(MMU_FTR_TYPE_8xx)) { first_context = 0; last_context = 15; + } else if (mmu_has_feature(MMU_FTR_TYPE_47x)) { + first_context = 1; + last_context = 65535; } else { first_context = 1; last_context = 255; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index d49a77503e19..eb11d5d2aa94 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -69,12 +69,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, } #endif /* CONIFG_8xx */ -/* - * As of today, we don't support tlbivax broadcast on any - * implementation. When that becomes the case, this will be - * an extern. - */ -#ifdef CONFIG_PPC_BOOK3E +#if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x) extern void _tlbivax_bcast(unsigned long address, unsigned int pid, unsigned int tsize, unsigned int ind); #else diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index bbdc5b577b85..e925cb58afd9 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -10,7 +10,7 @@ * - tlbil_va * - tlbil_pid * - tlbil_all - * - tlbivax_bcast (not yet) + * - tlbivax_bcast * * Code mostly moved over from misc_32.S * @@ -33,6 +33,7 @@ #include #include #include +#include #if defined(CONFIG_40x) @@ -65,7 +66,7 @@ _GLOBAL(__tlbil_va) * Nothing to do for 8xx, everything is inline */ -#elif defined(CONFIG_44x) +#elif defined(CONFIG_44x) /* Includes 47x */ /* * 440 implementation uses tlbsx/we for tlbil_va and a full sweep @@ -73,7 +74,13 @@ _GLOBAL(__tlbil_va) */ _GLOBAL(__tlbil_va) mfspr r5,SPRN_MMUCR - rlwimi r5,r4,0,24,31 /* Set TID */ + mfmsr r10 + + /* + * We write 16 bits of STID since 47x supports that much, we + * will never be passed out of bounds values on 440 (hopefully) + */ + rlwimi r5,r4,0,16,31 /* We have to run the search with interrupts disabled, otherwise * an interrupt which causes a TLB miss can clobber the MMUCR @@ -83,24 +90,41 @@ _GLOBAL(__tlbil_va) * and restoring MMUCR, so only normal interrupts have to be * taken care of. */ - mfmsr r4 wrteei 0 mtspr SPRN_MMUCR,r5 - tlbsx. r3, 0, r3 - wrtee r4 - bne 1f + tlbsx. r6,0,r3 + bne 10f sync - /* There are only 64 TLB entries, so r3 < 64, - * which means bit 22, is clear. Since 22 is - * the V bit in the TLB_PAGEID, loading this +BEGIN_MMU_FTR_SECTION + b 2f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) + /* On 440 There are only 64 TLB entries, so r3 < 64, which means bit + * 22, is clear. Since 22 is the V bit in the TLB_PAGEID, loading this * value will invalidate the TLB entry. */ - tlbwe r3, r3, PPC44x_TLB_PAGEID + tlbwe r6,r6,PPC44x_TLB_PAGEID isync -1: blr +10: wrtee r10 + blr +2: +#ifdef CONFIG_PPC_47x + oris r7,r6,0x8000 /* specify way explicitely */ + clrrwi r4,r3,12 /* get an EPN for the hashing with V = 0 */ + ori r4,r4,PPC47x_TLBE_SIZE + tlbwe r4,r7,0 /* write it */ + isync + wrtee r10 + blr +#else /* CONFIG_PPC_47x */ +1: trap + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; +#endif /* !CONFIG_PPC_47x */ _GLOBAL(_tlbil_all) _GLOBAL(_tlbil_pid) +BEGIN_MMU_FTR_SECTION + b 2f +END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x) li r3,0 sync @@ -115,6 +139,76 @@ _GLOBAL(_tlbil_pid) isync blr +2: +#ifdef CONFIG_PPC_47x + /* 476 variant. There's not simple way to do this, hopefully we'll + * try to limit the amount of such full invalidates + */ + mfmsr r11 /* Interrupts off */ + wrteei 0 + li r3,-1 /* Current set */ + lis r10,tlb_47x_boltmap@h + ori r10,r10,tlb_47x_boltmap@l + lis r7,0x8000 /* Specify way explicitely */ + + b 9f /* For each set */ + +1: li r9,4 /* Number of ways */ + li r4,0 /* Current way */ + li r6,0 /* Default entry value 0 */ + andi. r0,r8,1 /* Check if way 0 is bolted */ + mtctr r9 /* Load way counter */ + bne- 3f /* Bolted, skip loading it */ + +2: /* For each way */ + or r5,r3,r4 /* Make way|index for tlbre */ + rlwimi r5,r5,16,8,15 /* Copy index into position */ + tlbre r6,r5,0 /* Read entry */ +3: addis r4,r4,0x2000 /* Next way */ + andi. r0,r6,PPC47x_TLB0_VALID /* Valid entry ? */ + beq 4f /* Nope, skip it */ + rlwimi r7,r5,0,1,2 /* Insert way number */ + rlwinm r6,r6,0,21,19 /* Clear V */ + tlbwe r6,r7,0 /* Write it */ +4: bdnz 2b /* Loop for each way */ + srwi r8,r8,1 /* Next boltmap bit */ +9: cmpwi cr1,r3,255 /* Last set done ? */ + addi r3,r3,1 /* Next set */ + beq cr1,1f /* End of loop */ + andi. r0,r3,0x1f /* Need to load a new boltmap word ? */ + bne 1b /* No, loop */ + lwz r8,0(r10) /* Load boltmap entry */ + addi r10,r10,4 /* Next word */ + b 1b /* Then loop */ +1: isync /* Sync shadows */ + wrtee r11 +#else /* CONFIG_PPC_47x */ +1: trap + EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; +#endif /* !CONFIG_PPC_47x */ + blr + +#ifdef CONFIG_PPC_47x +/* + * _tlbivax_bcast is only on 47x. We don't bother doing a runtime + * check though, it will blow up soon enough if we mistakenly try + * to use it on a 440. + */ +_GLOBAL(_tlbivax_bcast) + mfspr r5,SPRN_MMUCR + mfmsr r10 + rlwimi r5,r4,0,16,31 + wrteei 0 + mtspr SPRN_MMUCR,r5 +/* tlbivax 0,r3 - use .long to avoid binutils deps */ + .long 0x7c000624 | (r3 << 11) + isync + eieio + tlbsync + sync + wrtee r10 + blr +#endif /* CONFIG_PPC_47x */ #elif defined(CONFIG_FSL_BOOKE) /* -- cgit v1.2.3 From 75c1d539ea13117cbe95e2c343e52af67d735145 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 27 Apr 2010 21:22:55 +0000 Subject: powerpc: Fix CONFIG_DEBUG_PAGEALLOC on 603/e300 So we tried to speed things up a bit using flush_hash_pages() directly but that falls over on 603 of course meaning we fail to flush the TLB properly and we may even end up having it corrupt memory randomly by accessing a hash table that doesn't exist. This removes the "optimization" by always going through flush_tlb_page() for now at least. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/pgtable_32.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 767b0cf17d33..9fc02dc72ce9 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -393,11 +393,7 @@ static int __change_page_attr(struct page *page, pgprot_t prot) return -EINVAL; __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); wmb(); -#ifdef CONFIG_PPC_STD_MMU - flush_hash_pages(0, address, pmd_val(*kpmd), 1); -#else flush_tlb_page(NULL, address); -#endif pte_unmap(kpte); return 0; -- cgit v1.2.3 From 91eea67c6d8704396a98226508c56a8501e141e3 Mon Sep 17 00:00:00 2001 From: Mark Nelson Date: Wed, 21 Apr 2010 16:21:03 +0000 Subject: powerpc/mm: Track backing pages allocated by vmemmap_populate() We need to keep track of the backing pages that get allocated by vmemmap_populate() so that when we use kdump, the dump-capture kernel knows where these pages are. We use a simple linked list of structures that contain the physical address of the backing page and corresponding virtual address to track the backing pages. To save space, we just use a pointer to the next struct vmemmap_backing. We can also do this because we never remove nodes. We call the pointer "list" to be compatible with changes made to the crash utility. vmemmap_populate() is called either at boot-time or on a memory hotplug operation. We don't have to worry about the boot-time calls because they will be inherently single-threaded, and for a memory hotplug operation vmemmap_populate() is called through: sparse_add_one_section() | V kmalloc_section_memmap() | V sparse_mem_map_populate() | V vmemmap_populate() and in sparse_add_one_section() we're protected by pgdat_resize_lock(). So, we don't need a spinlock to protect the vmemmap_list. We allocate space for the vmemmap_backing structs by allocating whole pages in vmemmap_list_alloc() and then handing out chunks of this to vmemmap_list_populate(). This means that we waste at most just under one page, but this keeps the code is simple. Signed-off-by: Mark Nelson Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/init_64.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d7fa50b09b4a..e267f223fdff 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -252,6 +252,47 @@ static void __meminit vmemmap_create_mapping(unsigned long start, } #endif /* CONFIG_PPC_BOOK3E */ +struct vmemmap_backing *vmemmap_list; + +static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) +{ + static struct vmemmap_backing *next; + static int num_left; + + /* allocate a page when required and hand out chunks */ + if (!next || !num_left) { + next = vmemmap_alloc_block(PAGE_SIZE, node); + if (unlikely(!next)) { + WARN_ON(1); + return NULL; + } + num_left = PAGE_SIZE / sizeof(struct vmemmap_backing); + } + + num_left--; + + return next++; +} + +static __meminit void vmemmap_list_populate(unsigned long phys, + unsigned long start, + int node) +{ + struct vmemmap_backing *vmem_back; + + vmem_back = vmemmap_list_alloc(node); + if (unlikely(!vmem_back)) { + WARN_ON(1); + return; + } + + vmem_back->phys = phys; + vmem_back->virt_addr = start; + vmem_back->list = vmemmap_list; + + vmemmap_list = vmem_back; +} + int __meminit vmemmap_populate(struct page *start_page, unsigned long nr_pages, int node) { @@ -276,6 +317,8 @@ int __meminit vmemmap_populate(struct page *start_page, if (!p) return -ENOMEM; + vmemmap_list_populate(__pa(p), start, node); + pr_debug(" * %016lx..%016lx allocated at %p\n", start, start + page_size, p); -- cgit v1.2.3 From e460c2c91af44374cbfa3f1c70f5ca9bbf099aa9 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Thu, 6 May 2010 17:15:58 +1000 Subject: powerpc: Invoke oom-killer from page fault As explained in commit 1c0fe6e3bd, we want to call the architecture independent oom killer when getting an unexplained OOM from handle_mm_fault, rather than simply killing current. Cc: linuxppc-dev@ozlabs.org Cc: Benjamin Herrenschmidt Cc: linux-arch@vger.kernel.org Signed-off-by: Nick Piggin Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/fault.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 83ac4935eb10..1bd712c33ce2 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -308,7 +308,6 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - survive: ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); if (unlikely(ret & VM_FAULT_ERROR)) { if (ret & VM_FAULT_OOM) @@ -360,15 +359,10 @@ bad_area_nosemaphore: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_global_init(current)) { - yield(); - down_read(&mm->mmap_sem); - goto survive; - } - printk("VM: killing process %s\n", current->comm); - if (user_mode(regs)) - do_group_exit(SIGKILL); - return SIGKILL; + if (!user_mode(regs)) + return SIGKILL; + pagefault_out_of_memory(); + return 0; do_sigbus: up_read(&mm->mmap_sem); -- cgit v1.2.3 From 25863de07af9cb90e6365cc8216bdd17f2394515 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 26 Apr 2010 15:32:43 +0000 Subject: powerpc/cpumask: Convert NUMA code to new cpumask API Convert NUMA code to new cpumask API. We shift the node to cpumask setup code until after we complete bootmem allocation so we can dynamically allocate the cpumasks. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 58 +++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 64c00227b997..d68491b31e38 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -33,16 +33,41 @@ static int numa_debug; #define dbg(args...) if (numa_debug) { printk(KERN_INFO args); } int numa_cpu_lookup_table[NR_CPUS]; -cpumask_t numa_cpumask_lookup_table[MAX_NUMNODES]; +cpumask_var_t node_to_cpumask_map[MAX_NUMNODES]; struct pglist_data *node_data[MAX_NUMNODES]; EXPORT_SYMBOL(numa_cpu_lookup_table); -EXPORT_SYMBOL(numa_cpumask_lookup_table); +EXPORT_SYMBOL(node_to_cpumask_map); EXPORT_SYMBOL(node_data); static int min_common_depth; static int n_mem_addr_cells, n_mem_size_cells; +/* + * Allocate node_to_cpumask_map based on number of available nodes + * Requires node_possible_map to be valid. + * + * Note: node_to_cpumask() is not valid until after this is done. + */ +static void __init setup_node_to_cpumask_map(void) +{ + unsigned int node, num = 0; + + /* setup nr_node_ids if not done yet */ + if (nr_node_ids == MAX_NUMNODES) { + for_each_node_mask(node, node_possible_map) + num = node; + nr_node_ids = num + 1; + } + + /* allocate the map */ + for (node = 0; node < nr_node_ids; node++) + alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]); + + /* cpumask_of_node() will now work */ + dbg("Node to cpumask map for %d nodes\n", nr_node_ids); +} + static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn, unsigned int *nid) { @@ -138,8 +163,8 @@ static void __cpuinit map_cpu_to_node(int cpu, int node) dbg("adding cpu %d to node %d\n", cpu, node); - if (!(cpu_isset(cpu, numa_cpumask_lookup_table[node]))) - cpu_set(cpu, numa_cpumask_lookup_table[node]); + if (!(cpumask_test_cpu(cpu, node_to_cpumask_map[node]))) + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); } #ifdef CONFIG_HOTPLUG_CPU @@ -149,8 +174,8 @@ static void unmap_cpu_from_node(unsigned long cpu) dbg("removing cpu %lu from node %d\n", cpu, node); - if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { - cpu_clear(cpu, numa_cpumask_lookup_table[node]); + if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { + cpumask_set_cpu(cpu, node_to_cpumask_map[node]); } else { printk(KERN_ERR "WARNING: cpu %lu not found in node %d\n", cpu, node); @@ -737,8 +762,9 @@ void __init dump_numa_cpu_topology(void) * If we used a CPU iterator here we would miss printing * the holes in the cpumap. */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { - if (cpu_isset(cpu, numa_cpumask_lookup_table[node])) { + for (cpu = 0; cpu < nr_cpu_ids; cpu++) { + if (cpumask_test_cpu(cpu, + node_to_cpumask_map[node])) { if (count == 0) printk(" %u", cpu); ++count; @@ -750,7 +776,7 @@ void __init dump_numa_cpu_topology(void) } if (count > 1) - printk("-%u", NR_CPUS - 1); + printk("-%u", nr_cpu_ids - 1); printk("\n"); } } @@ -926,10 +952,6 @@ void __init do_init_bootmem(void) else dump_numa_memory_topology(); - register_cpu_notifier(&ppc64_numa_nb); - cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, - (void *)(unsigned long)boot_cpuid); - for_each_online_node(nid) { unsigned long start_pfn, end_pfn; void *bootmem_vaddr; @@ -983,6 +1005,16 @@ void __init do_init_bootmem(void) } init_bootmem_done = 1; + + /* + * Now bootmem is initialised we can create the node to cpumask + * lookup tables and setup the cpu callback to populate them. + */ + setup_node_to_cpumask_map(); + + register_cpu_notifier(&ppc64_numa_nb); + cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, + (void *)(unsigned long)boot_cpuid); } void __init paging_init(void) -- cgit v1.2.3 From c83ec269e6931edf61abe1ed777ebb867b06a85c Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Fri, 16 Apr 2010 00:11:36 +0200 Subject: PPC: Split context init/destroy functions We need to reserve a context from KVM to make sure we have our own segment space. While we did that split for Book3S_64 already, 32 bit is still outstanding. So let's split it now. Signed-off-by: Alexander Graf Acked-by: Benjamin Herrenschmidt CC: Benjamin Herrenschmidt Signed-off-by: Avi Kivity --- arch/powerpc/mm/mmu_context_hash32.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/mmu_context_hash32.c index 0dfba2bf7f31..d0ee554e86e4 100644 --- a/arch/powerpc/mm/mmu_context_hash32.c +++ b/arch/powerpc/mm/mmu_context_hash32.c @@ -60,11 +60,7 @@ static unsigned long next_mmu_context; static unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; - -/* - * Set up the context for a new address space. - */ -int init_new_context(struct task_struct *t, struct mm_struct *mm) +unsigned long __init_new_context(void) { unsigned long ctx = next_mmu_context; @@ -74,11 +70,30 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) ctx = 0; } next_mmu_context = (ctx + 1) & LAST_CONTEXT; - mm->context.id = ctx; + + return ctx; +} +EXPORT_SYMBOL_GPL(__init_new_context); + +/* + * Set up the context for a new address space. + */ +int init_new_context(struct task_struct *t, struct mm_struct *mm) +{ + mm->context.id = __init_new_context(); return 0; } +/* + * Free a context ID. Make sure to call this with preempt disabled! + */ +void __destroy_context(unsigned long ctx) +{ + clear_bit(ctx, context_map); +} +EXPORT_SYMBOL_GPL(__destroy_context); + /* * We're finished using the context for an address space. */ @@ -86,7 +101,7 @@ void destroy_context(struct mm_struct *mm) { preempt_disable(); if (mm->context.id != NO_CONTEXT) { - clear_bit(mm->context.id, context_map); + __destroy_context(mm->context.id); mm->context.id = NO_CONTEXT; } preempt_enable(); -- cgit v1.2.3 From 78f622377f7d31d988db350a43c5689dd5f31876 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 13 May 2010 14:38:21 -0500 Subject: powerpc/fsl-booke: Move loadcam_entry back to asm code to fix SMP ftrace When we build with ftrace enabled its possible that loadcam_entry would have used the stack pointer (even though the code doesn't need it). We call loadcam_entry in __secondary_start before the stack is setup. To ensure that loadcam_entry doesn't use the stack pointer the easiest solution is to just have it in asm code. Signed-off-by: Kumar Gala --- arch/powerpc/mm/fsl_booke_mmu.c | 25 +++---------------------- arch/powerpc/mm/mmu_decl.h | 10 +++++++++- arch/powerpc/mm/tlb_nohash_low.S | 28 ++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 1ed6b52f3031..cdc7526e9c93 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -2,7 +2,7 @@ * Modifications by Kumar Gala (galak@kernel.crashing.org) to support * E500 Book E processors. * - * Copyright 2004 Freescale Semiconductor, Inc + * Copyright 2004,2010 Freescale Semiconductor, Inc. * * This file contains the routines for initializing the MMU * on the 4xx series of chips. @@ -56,19 +56,13 @@ unsigned int tlbcam_index; -#define NUM_TLBCAMS (64) #if defined(CONFIG_LOWMEM_CAM_NUM_BOOL) && (CONFIG_LOWMEM_CAM_NUM >= NUM_TLBCAMS) #error "LOWMEM_CAM_NUM must be less than NUM_TLBCAMS" #endif -struct tlbcam { - u32 MAS0; - u32 MAS1; - unsigned long MAS2; - u32 MAS3; - u32 MAS7; -} TLBCAM[NUM_TLBCAMS]; +#define NUM_TLBCAMS (64) +struct tlbcam TLBCAM[NUM_TLBCAMS]; struct tlbcamrange { unsigned long start; @@ -109,19 +103,6 @@ unsigned long p_mapped_by_tlbcam(phys_addr_t pa) return 0; } -void loadcam_entry(int idx) -{ - mtspr(SPRN_MAS0, TLBCAM[idx].MAS0); - mtspr(SPRN_MAS1, TLBCAM[idx].MAS1); - mtspr(SPRN_MAS2, TLBCAM[idx].MAS2); - mtspr(SPRN_MAS3, TLBCAM[idx].MAS3); - - if (mmu_has_feature(MMU_FTR_BIG_PHYS)) - mtspr(SPRN_MAS7, TLBCAM[idx].MAS7); - - asm volatile("isync;tlbwe;isync" : : : "memory"); -} - /* * Set up one of the I/D BAT (block address translation) register pairs. * The parameters are not checked; in particular size must be a power diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index eb11d5d2aa94..63b84a0d3b10 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -144,7 +144,15 @@ extern unsigned long mmu_mapin_ram(unsigned long top); extern void MMU_init_hw(void); extern unsigned long mmu_mapin_ram(unsigned long top); extern void adjust_total_lowmem(void); - +extern void loadcam_entry(unsigned int index); + +struct tlbcam { + u32 MAS0; + u32 MAS1; + unsigned long MAS2; + u32 MAS3; + u32 MAS7; +}; #elif defined(CONFIG_PPC32) /* anything 32-bit except 4xx or 8xx */ extern void MMU_init_hw(void); diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index e925cb58afd9..cfa768203d08 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -365,3 +365,31 @@ _GLOBAL(set_context) #else #error Unsupported processor type ! #endif + +#if defined(CONFIG_FSL_BOOKE) +/* + * extern void loadcam_entry(unsigned int index) + * + * Load TLBCAM[index] entry in to the L2 CAM MMU + */ +_GLOBAL(loadcam_entry) + LOAD_REG_ADDR(r4, TLBCAM) + mulli r5,r3,TLBCAM_SIZE + add r3,r5,r4 + lwz r4,TLBCAM_MAS0(r3) + mtspr SPRN_MAS0,r4 + lwz r4,TLBCAM_MAS1(r3) + mtspr SPRN_MAS1,r4 + PPC_LL r4,TLBCAM_MAS2(r3) + mtspr SPRN_MAS2,r4 + lwz r4,TLBCAM_MAS3(r3) + mtspr SPRN_MAS3,r4 +BEGIN_MMU_FTR_SECTION + lwz r4,TLBCAM_MAS7(r3) + mtspr SPRN_MAS7,r4 +END_MMU_FTR_SECTION_IFSET(MMU_FTR_BIG_PHYS) + isync + tlbwe + isync + blr +#endif -- cgit v1.2.3 From bc8449cc57898bc9cf1ffc4619d026f77bf327c1 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 16 May 2010 20:28:35 +0000 Subject: powerpc/numa: Use ibm,architecture-vec-5 to detect form 1 affinity I've been told that the architected way to determine we are in form 1 affinity mode is by reading the ibm,architecture-vec-5 property which mirrors the layout of the fifth vector of the ibm,client-architecture structure. Eventually we may want to parse the ibm,architecture-vec-5 and create FW_FEATURE_* bits. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/powerpc/mm') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index aace5e5dfa0a..80d110635d24 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -271,7 +271,8 @@ static int __init find_min_common_depth(void) const unsigned int *ref_points; struct device_node *rtas_root; unsigned int len; - struct device_node *options; + struct device_node *chosen; + const char *vec5; rtas_root = of_find_node_by_path("/rtas"); @@ -289,14 +290,17 @@ static int __init find_min_common_depth(void) "ibm,associativity-reference-points", &len); /* - * For type 1 affinity information we want the first field + * For form 1 affinity information we want the first field */ - options = of_find_node_by_path("/options"); - if (options) { - const char *str; - str = of_get_property(options, "ibm,associativity-form", NULL); - if (str && !strcmp(str, "1")) - index = 0; +#define VEC5_AFFINITY_BYTE 5 +#define VEC5_AFFINITY 0x80 + chosen = of_find_node_by_path("/chosen"); + if (chosen) { + vec5 = of_get_property(chosen, "ibm,architecture-vec-5", NULL); + if (vec5 && (vec5[VEC5_AFFINITY_BYTE] & VEC5_AFFINITY)) { + dbg("Using form 1 affinity\n"); + index = 0; + } } if ((len >= 2 * sizeof(unsigned int)) && ref_points) { -- cgit v1.2.3