diff options
Diffstat (limited to 'arch')
146 files changed, 4062 insertions, 1687 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 36f858c37ca7..bf6abab46dcc 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -85,6 +85,17 @@ config NMI_IPI depends on SMP && (DEBUGGER || KEXEC_CORE || HARDLOCKUP_DETECTOR) default y +config PPC_WATCHDOG + bool + depends on HARDLOCKUP_DETECTOR + depends on HAVE_HARDLOCKUP_DETECTOR_ARCH + default y + help + This is a placeholder when the powerpc hardlockup detector + watchdog is selected (arch/powerpc/kernel/watchdog.c). It is + seleted via the generic lockup detector menu which is why we + have no standalone config option for it here. + config STACKTRACE_SUPPORT bool default y @@ -165,7 +176,7 @@ config PPC select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK - select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S_64 && !RELOCATABLE && !HIBERNATION) + select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select HAVE_CBPF_JIT if !PPC64 select HAVE_CONTEXT_TRACKING if PPC64 @@ -394,7 +405,7 @@ config HUGETLB_PAGE_SIZE_VARIABLE config MATH_EMULATION bool "Math emulation" - depends on 4xx || 8xx || PPC_MPC832x || BOOKE + depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE ---help--- Some PowerPC chips designed for embedded applications do not have a floating-point unit and therefore do not implement the @@ -956,9 +967,9 @@ config PPC_PCI_CHOICE config PCI bool "PCI support" if PPC_PCI_CHOICE - default y if !40x && !CPM2 && !8xx && !PPC_83xx \ + default y if !40x && !CPM2 && !PPC_8xx && !PPC_83xx \ && !PPC_85xx && !PPC_86xx && !GAMECUBE_COMMON - default PCI_QSPAN if !4xx && !CPM2 && 8xx + default PCI_QSPAN if PPC_8xx select GENERIC_PCI_IOMAP help Find out whether your system includes a PCI bus. PCI is the name of @@ -974,7 +985,7 @@ config PCI_SYSCALL config PCI_QSPAN bool "QSpan PCI" - depends on !4xx && !CPM2 && 8xx + depends on PPC_8xx select PPC_I8259 help Say Y here if you have a system based on a Motorola 8xx-series @@ -1165,12 +1176,23 @@ config CONSISTENT_SIZE config PIN_TLB bool "Pinned Kernel TLBs (860 ONLY)" - depends on ADVANCED_OPTIONS && 8xx + depends on ADVANCED_OPTIONS && PPC_8xx && \ + !DEBUG_PAGEALLOC && !STRICT_KERNEL_RWX + +config PIN_TLB_DATA + bool "Pinned TLB for DATA" + depends on PIN_TLB + default y config PIN_TLB_IMMR bool "Pinned TLB for IMMR" depends on PIN_TLB default y + +config PIN_TLB_TEXT + bool "Pinned TLB for TEXT" + depends on PIN_TLB + default y endmenu if PPC64 diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 8d4ed73d5490..5480a7d45ef2 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -249,7 +249,7 @@ KBUILD_AFLAGS += $(aflags-y) KBUILD_CFLAGS += $(cflags-y) head-y := arch/powerpc/kernel/head_$(BITS).o -head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o +head-$(CONFIG_PPC_8xx) := arch/powerpc/kernel/head_8xx.o head-$(CONFIG_40x) := arch/powerpc/kernel/head_40x.o head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o @@ -316,6 +316,10 @@ PHONY += ppc64le_defconfig ppc64le_defconfig: $(call merge_into_defconfig,ppc64_defconfig,le) +PHONY += powernv_be_defconfig +powernv_be_defconfig: + $(call merge_into_defconfig,powernv_defconfig,be) + PHONY += mpc85xx_defconfig mpc85xx_defconfig: $(call merge_into_defconfig,mpc85xx_basic_defconfig,\ diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index a7814a7b1523..bd2a1b8cd83f 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -107,7 +107,7 @@ src-wlib-y += crtsavres.S endif src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c -src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c +src-wlib-$(CONFIG_PPC_8xx) += mpc8xx.c planetcore.c fsl-soc.c src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c fsl-soc.c @@ -124,7 +124,7 @@ src-plat-$(CONFIG_44x) += treeboot-ebony.c cuboot-ebony.c treeboot-bamboo.c \ treeboot-iss4xx.c treeboot-currituck.c \ treeboot-akebono.c \ simpleboot.c fixed-head.S virtex.c -src-plat-$(CONFIG_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c +src-plat-$(CONFIG_PPC_8xx) += cuboot-8xx.c fixed-head.S ep88xc.c redboot-8xx.c src-plat-$(CONFIG_PPC_MPC52xx) += cuboot-52xx.c src-plat-$(CONFIG_PPC_82xx) += cuboot-pq2.c fixed-head.S ep8248e.c cuboot-824x.c src-plat-$(CONFIG_PPC_83xx) += cuboot-83xx.c fixed-head.S redboot-83xx.c diff --git a/arch/powerpc/boot/dts/fsp2.dts b/arch/powerpc/boot/dts/fsp2.dts index 475953ada707..f10a64aeb83b 100644 --- a/arch/powerpc/boot/dts/fsp2.dts +++ b/arch/powerpc/boot/dts/fsp2.dts @@ -52,6 +52,7 @@ clocks { mmc_clk: mmc_clk { compatible = "fixed-clock"; + #clock-cells = <0>; clock-frequency = <50000000>; clock-output-names = "mmc_clk"; }; @@ -359,20 +360,6 @@ interrupts = <31 0x4 15 0x84>; }; - mmc0: sdhci@020c0000 { - compatible = "st,sdhci-stih407", "st,sdhci"; - status = "disabled"; - reg = <0x020c0000 0x20000>; - reg-names = "mmc"; - interrupt-parent = <&UIC1_3>; - interrupts = <21 0x4 22 0x4>; - interrupt-names = "mmcirq"; - pinctrl-names = "default"; - pinctrl-0 = <>; - clock-names = "mmc"; - clocks = <&mmc_clk>; - }; - plb6 { compatible = "ibm,plb6"; #address-cells = <2>; @@ -501,6 +488,24 @@ /*RXDE*/ 4 &UIC1_2 13 0x4>; }; + mmc0: mmc@20c0000 { + compatible = "st,sdhci-stih407", "st,sdhci"; + reg = <0x020c0000 0x20000>; + reg-names = "mmc"; + interrupts = <21 0x4>; + interrupt-parent = <&UIC1_3>; + interrupt-names = "mmcirq"; + pinctrl-names = "default"; + pinctrl-0 = <>; + clock-names = "mmc"; + clocks = <&mmc_clk>; + bus-width = <4>; + non-removable; + sd-uhs-sdr50; + sd-uhs-sdr104; + sd-uhs-ddr50; + }; + opb { compatible = "ibm,opb"; #address-cells = <1>; diff --git a/arch/powerpc/boot/ppc_asm.h b/arch/powerpc/boot/ppc_asm.h index 68e388ee94fe..c63299f9fdd9 100644 --- a/arch/powerpc/boot/ppc_asm.h +++ b/arch/powerpc/boot/ppc_asm.h @@ -80,4 +80,12 @@ .long 0xa6037b7d; /* mtsrr1 r11 */ \ .long 0x2400004c /* rfid */ +#ifdef CONFIG_PPC_8xx +#define MFTBL(dest) mftb dest +#define MFTBU(dest) mftbu dest +#else +#define MFTBL(dest) mfspr dest, SPRN_TBRL +#define MFTBU(dest) mfspr dest, SPRN_TBRU +#endif + #endif /* _PPC64_PPC_ASM_H */ diff --git a/arch/powerpc/boot/util.S b/arch/powerpc/boot/util.S index 243b8497d58b..ec069177d942 100644 --- a/arch/powerpc/boot/util.S +++ b/arch/powerpc/boot/util.S @@ -71,32 +71,18 @@ udelay: add r4,r4,r5 addi r4,r4,-1 divw r4,r4,r5 /* BUS ticks */ -#ifdef CONFIG_8xx -1: mftbu r5 - mftb r6 - mftbu r7 -#else -1: mfspr r5, SPRN_TBRU - mfspr r6, SPRN_TBRL - mfspr r7, SPRN_TBRU -#endif +1: MFTBU(r5) + MFTBL(r6) + MFTBU(r7) cmpw 0,r5,r7 bne 1b /* Get [synced] base time */ addc r9,r6,r4 /* Compute end time */ addze r8,r5 -#ifdef CONFIG_8xx -2: mftbu r5 -#else -2: mfspr r5, SPRN_TBRU -#endif +2: MFTBU(r5) cmpw 0,r5,r8 blt 2b bgt 3f -#ifdef CONFIG_8xx - mftb r6 -#else - mfspr r6, SPRN_TBRL -#endif + MFTBL(r6) cmpw 0,r6,r9 blt 2b 3: blr diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index e8e6a6999852..935aabe488c1 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -92,8 +92,10 @@ CONFIG_MMC_DEBUG=y CONFIG_MMC_SDHCI=y CONFIG_MMC_SDHCI_PLTFM=y CONFIG_MMC_SDHCI_OF_ARASAN=y +CONFIG_MMC_SDHCI_ST=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_M41T80=y +CONFIG_RESET_CONTROLLER=y CONFIG_EXT2_FS=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y diff --git a/arch/powerpc/configs/be.config b/arch/powerpc/configs/be.config new file mode 100644 index 000000000000..c5cdc99a6530 --- /dev/null +++ b/arch/powerpc/configs/be.config @@ -0,0 +1 @@ +CONFIG_CPU_BIG_ENDIAN=y diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 5c4fbc80dc6c..2542ea15d338 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -8,3 +8,4 @@ generic-y += mcs_spinlock.h generic-y += preempt.h generic-y += rwsem.h generic-y += vtime.h +generic-y += msi.h diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 7fb755880409..4d453f979553 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -294,13 +294,11 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, - pmd_t **pmdp); - int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); /* Generic accessors to PTE bits */ static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);} +static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); } static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); } static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); } diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 36fc7bfe9e11..f88452019114 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -40,7 +40,7 @@ * Define the address range of the kernel non-linear virtual area */ #define H_KERN_VIRT_START ASM_CONST(0xD000000000000000) -#define H_KERN_VIRT_SIZE ASM_CONST(0x0000100000000000) +#define H_KERN_VIRT_SIZE ASM_CONST(0x0000400000000000) /* 64T */ /* * The vmalloc space starts at the beginning of that region, and @@ -48,9 +48,11 @@ * (we keep a quarter for the virtual memmap) */ #define H_VMALLOC_START H_KERN_VIRT_START -#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE >> 1) +#define H_VMALLOC_SIZE ASM_CONST(0x380000000000) /* 56T */ #define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE) +#define H_KERN_IO_START H_VMALLOC_END + /* * Region IDs */ diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h index 5c28bd6f2ae1..2d1ca488ca44 100644 --- a/arch/powerpc/include/asm/book3s/64/hugetlb.h +++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h @@ -54,9 +54,7 @@ static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma, #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE static inline bool gigantic_page_supported(void) { - if (radix_enabled()) - return true; - return false; + return true; } #endif diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 6981a52b3887..f28d21c69f79 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -468,7 +468,7 @@ extern int htab_bolt_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize); int htab_remove_mapping(unsigned long vstart, unsigned long vend, int psize, int ssize); -extern void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); +extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages); extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr); #ifdef CONFIG_PPC_PSERIES diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 77529a3e3811..cbf1945f4338 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -96,11 +96,6 @@ typedef struct { #ifdef CONFIG_PPC_SUBPAGE_PROT struct subpage_prot_table spt; #endif /* CONFIG_PPC_SUBPAGE_PROT */ -#ifdef CONFIG_PPC_ICSWX - struct spinlock *cop_lockp; /* guard acop and cop_pid */ - unsigned long acop; /* mask of enabled coprocessor types */ - unsigned int cop_pid; /* pid value used with coprocessors */ -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_64K_PAGES /* for 4K PTE fragment support */ void *pte_frag; diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h index e2329db9d6f4..1fcfa425cefa 100644 --- a/arch/powerpc/include/asm/book3s/64/pgalloc.h +++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h @@ -41,8 +41,6 @@ extern struct kmem_cache *pgtable_cache[]; pgtable_cache[(shift) - 1]; \ }) -#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO - extern pte_t *pte_fragment_alloc(struct mm_struct *, unsigned long, int); extern void pte_fragment_free(unsigned long *, int); extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift); diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index d1da415e283c..1071d52f382d 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -272,8 +272,10 @@ extern unsigned long __vmalloc_end; extern unsigned long __kernel_virt_start; extern unsigned long __kernel_virt_size; +extern unsigned long __kernel_io_start; #define KERN_VIRT_START __kernel_virt_start #define KERN_VIRT_SIZE __kernel_virt_size +#define KERN_IO_START __kernel_io_start extern struct page *vmemmap; extern unsigned long ioremap_bot; extern unsigned long pci_io_base; @@ -298,7 +300,6 @@ extern unsigned long pci_io_base; * PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE */ -#define KERN_IO_START (KERN_VIRT_START + (KERN_VIRT_SIZE >> 1)) #define FULL_IO_SIZE 0x80000000ul #define ISA_IO_BASE (KERN_IO_START) #define ISA_IO_END (KERN_IO_START + 0x10000ul) @@ -409,6 +410,11 @@ static inline int pte_write(pte_t pte) return __pte_write(pte) || pte_savedwrite(pte); } +static inline int pte_read(pte_t pte) +{ + return !!(pte_raw(pte) & cpu_to_be64(_PAGE_READ)); +} + #define __HAVE_ARCH_PTEP_SET_WRPROTECT static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 544440b5aff3..1e5ba94e62ef 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -110,6 +110,8 @@ */ #define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END) +#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1)) + #ifndef __ASSEMBLY__ #define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE) #define RADIX_PMD_TABLE_SIZE (sizeof(pmd_t) << RADIX_PMD_INDEX_SIZE) diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h index cc7fbde4f53c..9b433a624bf3 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h @@ -22,22 +22,21 @@ extern void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end extern void radix__local_flush_tlb_mm(struct mm_struct *mm); extern void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); extern void radix__tlb_flush(struct mmu_gather *tlb); #ifdef CONFIG_SMP extern void radix__flush_tlb_mm(struct mm_struct *mm); extern void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); -extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); extern void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize); #else #define radix__flush_tlb_mm(mm) radix__local_flush_tlb_mm(mm) #define radix__flush_tlb_page(vma,addr) radix__local_flush_tlb_page(vma,addr) #define radix__flush_tlb_page_psize(mm,addr,p) radix__local_flush_tlb_page_psize(mm,addr,p) -#define radix__flush_tlb_pwc(tlb, addr) radix__local_flush_tlb_pwc(tlb, addr) #endif +extern void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr); +extern void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr); extern void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size); extern void radix__flush_tlb_lpid(unsigned long lpid); diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h index 5a90292afbad..d122f7f957ce 100644 --- a/arch/powerpc/include/asm/cache.h +++ b/arch/powerpc/include/asm/cache.h @@ -5,7 +5,7 @@ /* bytes per L1 cache line */ -#if defined(CONFIG_8xx) || defined(CONFIG_403GCX) +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_403GCX) #define L1_CACHE_SHIFT 4 #define MAX_COPY_PREFETCH 1 #elif defined(CONFIG_PPC_E500MC) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 52586f9956bb..8a174cba5567 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -67,6 +67,17 @@ #define ERR_DEEP_STATE_ESL_MISMATCH -2 #ifndef __ASSEMBLY__ +/* Additional SPRs that need to be saved/restored during stop */ +struct stop_sprs { + u64 pid; + u64 ldbar; + u64 fscr; + u64 hfscr; + u64 mmcr1; + u64 mmcr2; + u64 mmcra; +}; + extern u32 pnv_fastsleep_workaround_at_entry[]; extern u32 pnv_fastsleep_workaround_at_exit[]; diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index d02ad93bf708..a9bf921f4efc 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -513,7 +513,7 @@ enum { #else CPU_FTRS_GENERIC_32 | #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx CPU_FTRS_8XX | #endif #ifdef CONFIG_40x @@ -565,7 +565,7 @@ enum { #else CPU_FTRS_GENERIC_32 & #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx CPU_FTRS_8XX & #endif #ifdef CONFIG_40x diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h index 4508b322f2cd..6c40dfda5912 100644 --- a/arch/powerpc/include/asm/fixmap.h +++ b/arch/powerpc/include/asm/fixmap.h @@ -17,6 +17,7 @@ #ifndef __ASSEMBLY__ #include <linux/kernel.h> #include <asm/page.h> +#include <asm/pgtable.h> #ifdef CONFIG_HIGHMEM #include <linux/threads.h> #include <asm/kmap_types.h> @@ -62,9 +63,6 @@ enum fixed_addresses { __end_of_fixed_addresses }; -extern void __set_fixmap (enum fixed_addresses idx, - phys_addr_t phys, pgprot_t flags); - #define __FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) @@ -72,5 +70,11 @@ extern void __set_fixmap (enum fixed_addresses idx, #include <asm-generic/fixmap.h> +static inline void __set_fixmap(enum fixed_addresses idx, + phys_addr_t phys, pgprot_t flags) +{ + map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags)); +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index f79d6c74eb2a..8def56ec05c6 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -26,7 +26,7 @@ #define cpm2_unmap(addr) do {} while(0) #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #include <asm/8xx_immap.h> extern immap_t __iomem *mpc8xx_immr; diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 8add8b861e8d..c97603d617e3 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -12,6 +12,10 @@ typedef struct { unsigned int mce_exceptions; unsigned int spurious_irqs; unsigned int hmi_exceptions; + unsigned int sreset_irqs; +#ifdef CONFIG_PPC_WATCHDOG + unsigned int soft_nmi_irqs; +#endif #ifdef CONFIG_PPC_DOORBELL unsigned int doorbell_irqs; #endif diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h index 7f4025a6c69e..b8a0fb442c64 100644 --- a/arch/powerpc/include/asm/hugetlb.h +++ b/arch/powerpc/include/asm/hugetlb.h @@ -218,18 +218,4 @@ static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr, } #endif /* CONFIG_HUGETLB_PAGE */ -/* - * FSL Book3E platforms require special gpage handling - the gpages - * are reserved early in the boot process by memblock instead of via - * the .dts as on IBM platforms. - */ -#if defined(CONFIG_HUGETLB_PAGE) && (defined(CONFIG_PPC_FSL_BOOK3E) || \ - defined(CONFIG_PPC_8xx)) -extern void __init reserve_hugetlb_gpages(void); -#else -static inline void reserve_hugetlb_gpages(void) -{ -} -#endif - #endif /* _ASM_POWERPC_HUGETLB_H */ diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h new file mode 100644 index 000000000000..7f74c282710f --- /dev/null +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -0,0 +1,128 @@ +#ifndef __ASM_POWERPC_IMC_PMU_H +#define __ASM_POWERPC_IMC_PMU_H + +/* + * IMC Nest Performance Monitor counter support. + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ + +#include <linux/perf_event.h> +#include <linux/slab.h> +#include <linux/of.h> +#include <linux/io.h> +#include <asm/opal.h> + +/* + * For static allocation of some of the structures. + */ +#define IMC_MAX_PMUS 32 + +/* + * Compatibility macros for IMC devices + */ +#define IMC_DTB_COMPAT "ibm,opal-in-memory-counters" +#define IMC_DTB_UNIT_COMPAT "ibm,imc-counters" + + +/* + * LDBAR: Counter address and Enable/Disable macro. + * perf/imc-pmu.c has the LDBAR layout information. + */ +#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL +#define THREAD_IMC_ENABLE 0x8000000000000000ULL + +/* + * Structure to hold memory address information for imc units. + */ +struct imc_mem_info { + u64 *vbase; + u32 id; +}; + +/* + * Place holder for nest pmu events and values. + */ +struct imc_events { + u32 value; + char *name; + char *unit; + char *scale; +}; + +/* Event attribute array index */ +#define IMC_FORMAT_ATTR 0 +#define IMC_EVENT_ATTR 1 +#define IMC_CPUMASK_ATTR 2 +#define IMC_NULL_ATTR 3 + +/* PMU Format attribute macros */ +#define IMC_EVENT_OFFSET_MASK 0xffffffffULL + +/* + * Device tree parser code detects IMC pmu support and + * registers new IMC pmus. This structure will hold the + * pmu functions, events, counter memory information + * and attrs for each imc pmu and will be referenced at + * the time of pmu registration. + */ +struct imc_pmu { + struct pmu pmu; + struct imc_mem_info *mem_info; + struct imc_events **events; + /* + * Attribute groups for the PMU. Slot 0 used for + * format attribute, slot 1 used for cpusmask attribute, + * slot 2 used for event attribute. Slot 3 keep as + * NULL. + */ + const struct attribute_group *attr_groups[4]; + u32 counter_mem_size; + int domain; + /* + * flag to notify whether the memory is mmaped + * or allocated by kernel. + */ + bool imc_counter_mmaped; +}; + +/* + * Structure to hold id, lock and reference count for the imc events which + * are inited. + */ +struct imc_pmu_ref { + struct mutex lock; + unsigned int id; + int refc; +}; + +/* + * In-Memory Collection Counters type. + * Data comes from Device tree. + * Three device type are supported. + */ + +enum { + IMC_TYPE_THREAD = 0x1, + IMC_TYPE_CORE = 0x4, + IMC_TYPE_CHIP = 0x10, +}; + +/* + * Domains for IMC PMUs + */ +#define IMC_DOMAIN_NEST 1 +#define IMC_DOMAIN_CORE 2 +#define IMC_DOMAIN_THREAD 3 + +extern int init_imc_pmu(struct device_node *parent, + struct imc_pmu *pmu_ptr, int pmu_id); +extern void thread_imc_disable(void); +#endif /* __ASM_POWERPC_IMC_PMU_H */ diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index da7e9432fa8f..eb749c86dca5 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -96,12 +96,6 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, if (prev == next) return; -#ifdef CONFIG_PPC_ICSWX - /* Switch coprocessor context only if prev or next uses a coprocessor */ - if (prev->context.acop || next->context.acop) - switch_cop(next); -#endif /* CONFIG_PPC_ICSWX */ - /* We must stop all altivec streams before changing the HW * context */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index 91314268f04f..185c6a47f9ba 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -121,7 +121,7 @@ extern int icache_44x_need_flush; #include <asm/nohash/pte-book3e.h> #elif defined(CONFIG_FSL_BOOKE) #include <asm/nohash/32/pte-fsl-booke.h> -#elif defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_8xx) #include <asm/nohash/32/pte-8xx.h> #endif @@ -337,9 +337,6 @@ static inline void __ptep_set_access_flags(struct mm_struct *mm, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 }) -extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, - pmd_t **pmdp); - int map_kernel_page(unsigned long va, phys_addr_t pa, int flags); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index e5805ad78e12..17989c3d9a24 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -14,6 +14,7 @@ static inline int pte_write(pte_t pte) { return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; } +static inline int pte_read(pte_t pte) { return 1; } static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index 3130a73652c7..450a60b81d2a 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -42,6 +42,7 @@ #define OPAL_I2C_STOP_ERR -24 #define OPAL_XIVE_PROVISIONING -31 #define OPAL_XIVE_FREE_ACTIVE -32 +#define OPAL_TIMEOUT -33 /* API Tokens (in r0) */ #define OPAL_INVALID_CALL -1 @@ -190,7 +191,16 @@ #define OPAL_NPU_INIT_CONTEXT 146 #define OPAL_NPU_DESTROY_CONTEXT 147 #define OPAL_NPU_MAP_LPAR 148 -#define OPAL_LAST 148 +#define OPAL_IMC_COUNTERS_INIT 149 +#define OPAL_IMC_COUNTERS_START 150 +#define OPAL_IMC_COUNTERS_STOP 151 +#define OPAL_GET_POWERCAP 152 +#define OPAL_SET_POWERCAP 153 +#define OPAL_GET_POWER_SHIFT_RATIO 154 +#define OPAL_SET_POWER_SHIFT_RATIO 155 +#define OPAL_SENSOR_GROUP_CLEAR 156 +#define OPAL_PCI_SET_P2P 157 +#define OPAL_LAST 157 /* Device tree flags */ @@ -1084,6 +1094,18 @@ enum { XIVE_DUMP_EMU_STATE = 5, }; +/* "type" argument options for OPAL_IMC_COUNTERS_* calls */ +enum { + OPAL_IMC_COUNTERS_NEST = 1, + OPAL_IMC_COUNTERS_CORE = 2, +}; + + +/* PCI p2p descriptor */ +#define OPAL_PCI_P2P_ENABLE 0x1 +#define OPAL_PCI_P2P_LOAD 0x2 +#define OPAL_PCI_P2P_STORE 0x4 + #endif /* __ASSEMBLY__ */ #endif /* __OPAL_API_H */ diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 588fb1c23af9..97ff192f5cfb 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -267,6 +267,19 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id); int64_t opal_xive_free_irq(uint32_t girq); int64_t opal_xive_sync(uint32_t type, uint32_t id); int64_t opal_xive_dump(uint32_t type, uint32_t id); +int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target, + uint64_t desc, uint16_t pe_number); + +int64_t opal_imc_counters_init(uint32_t type, uint64_t address, + uint64_t cpu_pir); +int64_t opal_imc_counters_start(uint32_t type, uint64_t cpu_pir); +int64_t opal_imc_counters_stop(uint32_t type, uint64_t cpu_pir); + +int opal_get_powercap(u32 handle, int token, u32 *pcap); +int opal_set_powercap(u32 handle, int token, u32 pcap); +int opal_get_power_shift_ratio(u32 handle, int token, u32 *psr); +int opal_set_power_shift_ratio(u32 handle, int token, u32 psr); +int opal_sensor_group_clear(u32 group_hndl, int token); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, @@ -345,6 +358,10 @@ static inline int opal_get_async_rc(struct opal_msg msg) void opal_wake_poller(void); +void opal_powercap_init(void); +void opal_psr_init(void); +void opal_sensor_groups_init(void); + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_OPAL_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index dc88a31cc79a..04b60af027ae 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -31,6 +31,7 @@ #endif #include <asm/accounting.h> #include <asm/hmi.h> +#include <asm/cpuidle.h> register struct paca_struct *local_paca asm("r13"); @@ -183,6 +184,12 @@ struct paca_struct { struct paca_struct **thread_sibling_pacas; /* The PSSCR value that the kernel requested before going to stop */ u64 requested_psscr; + + /* + * Save area for additional SPRs that need to be + * saved/restored during cpuidle stop. + */ + struct stop_sprs stop_sprs; #endif #ifdef CONFIG_PPC_STD_MMU_64 diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h index d795c5d5789c..45ae1212ab8a 100644 --- a/arch/powerpc/include/asm/pgalloc.h +++ b/arch/powerpc/include/asm/pgalloc.h @@ -17,6 +17,8 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp) } #endif /* MODULE */ +#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) + #ifdef CONFIG_PPC_BOOK3S #include <asm/book3s/pgalloc.h> #else diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index eb9d57defb75..7d0d38f58243 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -73,7 +73,7 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_PPC32) void mark_initmem_nx(void); #else static inline void mark_initmem_nx(void) { } diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index de9681034353..3e5cf251ad9a 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -26,6 +26,8 @@ extern int pnv_pci_get_presence_state(uint64_t id, uint8_t *state); extern int pnv_pci_get_power_state(uint64_t id, uint8_t *state); extern int pnv_pci_set_power_state(uint64_t id, uint8_t state, struct opal_msg *msg); +extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, + u64 desc); int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode); int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq, diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index fa9ebaead91e..041ba15aa2b9 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -193,6 +193,7 @@ #define PPC_INST_CLRBHRB 0x7c00035c #define PPC_INST_COPY 0x7c20060c #define PPC_INST_CP_ABORT 0x7c00068c +#define PPC_INST_DARN 0x7c0005e6 #define PPC_INST_DCBA 0x7c0005ec #define PPC_INST_DCBA_MASK 0xfc0007fe #define PPC_INST_DCBAL 0x7c2005ec @@ -395,6 +396,9 @@ #define PPC_CP_ABORT stringify_in_c(.long PPC_INST_CP_ABORT) #define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \ ___PPC_RA(a) | ___PPC_RB(b)) +#define PPC_DARN(t, l) stringify_in_c(.long PPC_INST_DARN | \ + ___PPC_RT(t) | \ + (((l) & 0x3) << 16)) #define PPC_DCBAL(a, b) stringify_in_c(.long PPC_INST_DCBAL | \ __PPC_RA(a) | __PPC_RB(b)) #define PPC_DCBZL(a, b) stringify_in_c(.long PPC_INST_DCBZL | \ diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 6baeeb9acd0d..36f3e41c9fbe 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -378,10 +378,16 @@ BEGIN_FTR_SECTION_NESTED(96); \ cmpwi dest,0; \ beq- 90b; \ END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96) -#elif defined(CONFIG_8xx) -#define MFTB(dest) mftb dest #else -#define MFTB(dest) mfspr dest, SPRN_TBRL +#define MFTB(dest) MFTBL(dest) +#endif + +#ifdef CONFIG_PPC_8xx +#define MFTBL(dest) mftb dest +#define MFTBU(dest) mftbu dest +#else +#define MFTBL(dest) mfspr dest, SPRN_TBRL +#define MFTBU(dest) mfspr dest, SPRN_TBRU #endif #ifndef CONFIG_SMP @@ -411,7 +417,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) * and they must be used. */ -#if !defined(CONFIG_4xx) && !defined(CONFIG_8xx) +#if !defined(CONFIG_4xx) && !defined(CONFIG_PPC_8xx) #define tlbia \ li r4,1024; \ mtctr r4; \ @@ -439,7 +445,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601) .machine push ; \ .machine "power4" ; \ lis scratch,0x60000000@h; \ - dcbt r0,scratch,0b01010; \ + dcbt 0,scratch,0b01010; \ .machine pop /* diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a3b6575c7842..c36823d64ec9 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -22,9 +22,9 @@ #include <asm/reg_fsl_emb.h> #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #include <asm/reg_8xx.h> -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #define MSR_SF_LG 63 /* Enable 64 bit mode */ #define MSR_ISF_LG 61 /* Interrupt 64b mode valid on 630 */ @@ -135,7 +135,7 @@ #define MSR_KERNEL (MSR_ | MSR_64BIT) #define MSR_USER32 (MSR_ | MSR_PR | MSR_EE) #define MSR_USER64 (MSR_USER32 | MSR_64BIT) -#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) /* Default MSR for kernel mode. */ #define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR) #define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) @@ -272,16 +272,65 @@ #define SPRN_DAR 0x013 /* Data Address Register */ #define SPRN_DBCR 0x136 /* e300 Data Breakpoint Control Reg */ #define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ -#define DSISR_NOHPTE 0x40000000 /* no translation found */ -#define DSISR_PROTFAULT 0x08000000 /* protection fault */ -#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ -#define DSISR_ISSTORE 0x02000000 /* access was a store */ -#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ -#define DSISR_NOSEGMENT 0x00200000 /* SLB miss */ -#define DSISR_KEYFAULT 0x00200000 /* Key fault */ -#define DSISR_UNSUPP_MMU 0x00080000 /* Unsupported MMU config */ -#define DSISR_SET_RC 0x00040000 /* Failed setting of R/C bits */ -#define DSISR_PGDIRFAULT 0x00020000 /* Fault on page directory */ +#define DSISR_BAD_DIRECT_ST 0x80000000 /* Obsolete: Direct store error */ +#define DSISR_NOHPTE 0x40000000 /* no translation found */ +#define DSISR_ATTR_CONFLICT 0x20000000 /* P9: Process vs. Partition attr */ +#define DSISR_NOEXEC_OR_G 0x10000000 /* Alias of SRR1 bit, see below */ +#define DSISR_PROTFAULT 0x08000000 /* protection fault */ +#define DSISR_BADACCESS 0x04000000 /* bad access to CI or G */ +#define DSISR_ISSTORE 0x02000000 /* access was a store */ +#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ +#define DSISR_NOSEGMENT 0x00200000 /* STAB miss (unsupported) */ +#define DSISR_KEYFAULT 0x00200000 /* Storage Key fault */ +#define DSISR_BAD_EXT_CTRL 0x00100000 /* Obsolete: External ctrl error */ +#define DSISR_UNSUPP_MMU 0x00080000 /* P9: Unsupported MMU config */ +#define DSISR_SET_RC 0x00040000 /* P9: Failed setting of R/C bits */ +#define DSISR_PRTABLE_FAULT 0x00020000 /* P9: Fault on process table */ +#define DSISR_ICSWX_NO_CT 0x00004000 /* P7: icswx unavailable cp type */ +#define DSISR_BAD_COPYPASTE 0x00000008 /* P9: Copy/Paste on wrong memtype */ +#define DSISR_BAD_AMO 0x00000004 /* P9: Incorrect AMO opcode */ +#define DSISR_BAD_CI_LDST 0x00000002 /* P8: Bad HV CI load/store */ + +/* + * DSISR_NOEXEC_OR_G doesn't actually exist. This bit is always + * 0 on DSIs. However, on ISIs, the corresponding bit in SRR1 + * indicates an attempt at executing from a no-execute PTE + * or segment or from a guarded page. + * + * We add a definition here for completeness as we alias + * DSISR and SRR1 in do_page_fault. + */ + +/* + * DSISR bits that are treated as a fault. Any bit set + * here will skip hash_page, and cause do_page_fault to + * trigger a SIGBUS or SIGSEGV: + */ +#define DSISR_BAD_FAULT_32S (DSISR_BAD_DIRECT_ST | \ + DSISR_BADACCESS | \ + DSISR_BAD_EXT_CTRL) +#define DSISR_BAD_FAULT_64S (DSISR_BAD_FAULT_32S | \ + DSISR_ATTR_CONFLICT | \ + DSISR_KEYFAULT | \ + DSISR_UNSUPP_MMU | \ + DSISR_PRTABLE_FAULT | \ + DSISR_ICSWX_NO_CT | \ + DSISR_BAD_COPYPASTE | \ + DSISR_BAD_AMO | \ + DSISR_BAD_CI_LDST) +/* + * These bits are equivalent in SRR1 and DSISR for 0x400 + * instruction access interrupts on Book3S + */ +#define DSISR_SRR1_MATCH_32S (DSISR_NOHPTE | \ + DSISR_NOEXEC_OR_G | \ + DSISR_PROTFAULT) +#define DSISR_SRR1_MATCH_64S (DSISR_SRR1_MATCH_32S | \ + DSISR_KEYFAULT | \ + DSISR_UNSUPP_MMU | \ + DSISR_SET_RC | \ + DSISR_PRTABLE_FAULT) + #define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ #define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ #define SPRN_CIR 0x11B /* Chip Information Register (hyper, R/0) */ @@ -675,6 +724,7 @@ * may not be recoverable */ #define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */ #define SRR1_WS_DEEP 0x00010000 /* All resources maintained */ +#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */ #define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */ #define SRR1_PROGILL 0x00080000 /* Illegal instruction */ #define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */ @@ -1114,7 +1164,7 @@ #endif #endif -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx #define SPRN_SPRG_SCRATCH0 SPRN_SPRG0 #define SPRN_SPRG_SCRATCH1 SPRN_SPRG1 #define SPRN_SPRG_SCRATCH2 SPRN_SPRG2 @@ -1197,10 +1247,8 @@ * differentiated by the version number in the Communication Processor * Module (CPM). */ -#define PVR_821 0x00500000 -#define PVR_823 PVR_821 -#define PVR_850 PVR_821 -#define PVR_860 PVR_821 +#define PVR_8xx 0x00500000 + #define PVR_8240 0x00810100 #define PVR_8245 0x80811014 #define PVR_8260 PVR_8240 @@ -1313,7 +1361,7 @@ static inline void msr_check_and_clear(unsigned long bits) #else /* __powerpc64__ */ -#if defined(CONFIG_8xx) +#if defined(CONFIG_PPC_8xx) #define mftbl() ({unsigned long rval; \ asm volatile("mftbl %0" : "=r" (rval)); rval;}) #define mftbu() ({unsigned long rval; \ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 737e012ef56e..eb2a33d5df26 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -221,10 +221,7 @@ #define SPRN_CSRR0 SPRN_SRR2 /* Critical Save and Restore Register 0 */ #define SPRN_CSRR1 SPRN_SRR3 /* Critical Save and Restore Register 1 */ #endif - -#ifdef CONFIG_PPC_ICSWX #define SPRN_HACOP 0x15F /* Hypervisor Available Coprocessor Register */ -#endif /* Bit definitions for CCR1. */ #define CCR1_DPC 0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */ diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h index da3cdffca440..b0e82466d4e8 100644 --- a/arch/powerpc/include/asm/string.h +++ b/arch/powerpc/include/asm/string.h @@ -23,6 +23,30 @@ extern void * memmove(void *,const void *,__kernel_size_t); extern int memcmp(const void *,const void *,__kernel_size_t); extern void * memchr(const void *,int,__kernel_size_t); +#ifdef CONFIG_PPC64 +#define __HAVE_ARCH_MEMSET16 +#define __HAVE_ARCH_MEMSET32 +#define __HAVE_ARCH_MEMSET64 + +extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t); +extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t); +extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t); + +static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n) +{ + return __memset16(p, v, n * 2); +} + +static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n) +{ + return __memset32(p, v, n * 4); +} + +static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n) +{ + return __memset64(p, v, n * 8); +} +#endif #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_STRING_H */ diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h index 2cf846edb3fc..cb61eae5b7ed 100644 --- a/arch/powerpc/include/asm/timex.h +++ b/arch/powerpc/include/asm/timex.h @@ -29,7 +29,7 @@ static inline cycles_t get_cycles(void) ret = 0; __asm__ __volatile__( -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx "97: mftb %0\n" #else "97: mfspr %0, %2\n" @@ -45,11 +45,7 @@ static inline cycles_t get_cycles(void) " .long 0\n" " .long 0\n" ".previous" -#ifdef CONFIG_8xx - : "=r" (ret) : "i" (CPU_FTR_601)); -#else : "=r" (ret) : "i" (CPU_FTR_601), "i" (SPRN_TBRL)); -#endif return ret; #endif } diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index dc4e15937ccf..2d84bca8d053 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -16,8 +16,6 @@ struct device_node; #include <asm/mmzone.h> -#define parent_node(node) (node) - #define cpumask_of_node(node) ((node) == -1 ? \ cpu_all_mask : \ node_to_cpumask_map[node]) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 4aa7c147e447..91960f83039c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -38,7 +38,7 @@ obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \ signal_64.o ptrace32.o \ paca.o nvram_64.o firmware.o obj-$(CONFIG_VDSO32) += vdso32/ -obj-$(CONFIG_HARDLOCKUP_DETECTOR) += watchdog.o +obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o @@ -83,7 +83,7 @@ extra-y := head_$(BITS).o extra-$(CONFIG_40x) := head_40x.o extra-$(CONFIG_44x) := head_44x.o extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o -extra-$(CONFIG_8xx) := head_8xx.o +extra-$(CONFIG_PPC_8xx) := head_8xx.o extra-y += vmlinux.lds obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6e95c2c19a7e..8cfb20e38cfe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -746,6 +746,14 @@ int main(void) OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask); OFFSET(PACA_SIBLING_PACA_PTRS, paca_struct, thread_sibling_pacas); OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr); +#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f) + STOP_SPR(STOP_PID, pid); + STOP_SPR(STOP_LDBAR, ldbar); + STOP_SPR(STOP_FSCR, fscr); + STOP_SPR(STOP_HFSCR, hfscr); + STOP_SPR(STOP_MMCR1, mmcr1); + STOP_SPR(STOP_MMCR2, mmcr2); + STOP_SPR(STOP_MMCRA, mmcra); #endif DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 6f849832a669..760872916013 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -1259,10 +1259,10 @@ static struct cpu_spec __initdata cpu_specs[] = { .platform = "ppc603", }, #endif /* CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx { /* 8xx */ .pvr_mask = 0xffff0000, - .pvr_value = 0x00500000, + .pvr_value = PVR_8xx, .cpu_name = "8xx", /* CPU_FTR_MAYBE_CAN_DOZE is possible, * if the 8xx code is there.... */ @@ -1274,7 +1274,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_8xx, .platform = "ppc823", }, -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #ifdef CONFIG_40x { /* 403GC */ .pvr_mask = 0xffffff00, @@ -1936,6 +1936,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_440A, .platform = "ppc440", }, +#ifdef CONFIG_PPC_47x { /* 476 DD2 core */ .pvr_mask = 0xffffffff, .pvr_value = 0x11a52080, @@ -1992,6 +1993,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_47x, .platform = "ppc470", }, +#endif /* CONFIG_PPC_47x */ { /* default match */ .pvr_mask = 0x00000000, .pvr_value = 0x00000000, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 8587059ad848..e780e1fbf6c2 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -43,6 +43,13 @@ #define LOAD_MSR_KERNEL(r, x) li r,(x) #endif +/* + * Align to 4k in order to ensure that all functions modyfing srr0/srr1 + * fit into one page in order to not encounter a TLB miss between the + * modification of srr0/srr1 and the associated rfi. + */ + .align 12 + #ifdef CONFIG_BOOKE .globl mcheck_transfer_to_handler mcheck_transfer_to_handler: @@ -586,6 +593,10 @@ ppc_swapcontext: handle_page_fault: stw r4,_DAR(r1) addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_6xx + andis. r0,r5,DSISR_DABRMATCH@h + bne- handle_dabr_fault +#endif bl do_page_fault cmpwi r3,0 beq+ ret_from_except @@ -599,6 +610,17 @@ handle_page_fault: bl bad_page_fault b ret_from_except_full +#ifdef CONFIG_6xx + /* We have a data breakpoint exception - handle it */ +handle_dabr_fault: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + clrrwi r0,r0,1 + stw r0,_TRAP(r1) + bl do_break + b ret_from_except_full +#endif + /* * This routine switches between two different tasks. The process * state of one is saved on its kernel stack. Then the state diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 9029afd1fa2a..a42308b2187b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -541,7 +541,7 @@ EXC_COMMON_BEGIN(instruction_access_common) RECONCILE_IRQ_STATE(r10, r11) ld r12,_MSR(r1) ld r3,_NIP(r1) - andis. r4,r12,0x5820 + andis. r4,r12,DSISR_BAD_FAULT_64S@h li r5,0x400 std r3,_DAR(r1) std r4,_DSISR(r1) @@ -1314,7 +1314,7 @@ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) #endif -#if defined(CONFIG_HARDLOCKUP_DETECTOR) && defined(CONFIG_HAVE_HARDLOCKUP_DETECTOR_ARCH) +#ifdef CONFIG_PPC_WATCHDOG #define MASKED_DEC_HANDLER_LABEL 3f @@ -1335,10 +1335,10 @@ EXC_COMMON_BEGIN(soft_nmi_common) ADD_NVGPRS;ADD_RECONCILE) b ret_from_except -#else +#else /* CONFIG_PPC_WATCHDOG */ #define MASKED_DEC_HANDLER_LABEL 2f /* normal return */ #define MASKED_DEC_HANDLER(_H) -#endif +#endif /* CONFIG_PPC_WATCHDOG */ /* * An interrupt came in while soft-disabled. We set paca->irq_happened, then: @@ -1477,8 +1477,10 @@ USE_TEXT_SECTION() */ .balign IFETCH_ALIGN_BYTES do_hash_page: -#ifdef CONFIG_PPC_STD_MMU_64 - andis. r0,r4,0xa450 /* weird error? */ + #ifdef CONFIG_PPC_STD_MMU_64 + lis r0,DSISR_BAD_FAULT_64S@h + ori r0,r0,DSISR_BAD_FAULT_64S@l + and. r0,r4,r0 /* weird error? */ bne- handle_page_fault /* if not, try to insert a HPTE */ CURRENT_THREAD_INFO(r11, r1) lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index e22734278458..8c54166491e7 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -388,7 +388,7 @@ DataAccess: EXCEPTION_PROLOG mfspr r10,SPRN_DSISR stw r10,_DSISR(r11) - andis. r0,r10,0xa470 /* weird error? */ + andis. r0,r10,DSISR_BAD_FAULT_32S@h bne 1f /* if not, try to put a PTE */ mfspr r4,SPRN_DAR /* into the hash table */ rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ @@ -403,13 +403,13 @@ DataAccess: DO_KVM 0x400 InstructionAccess: EXCEPTION_PROLOG - andis. r0,r9,0x4000 /* no pte found? */ + andis. r0,r9,SRR1_ISI_NOPT@h /* no pte found? */ beq 1f /* if so, try to put a PTE */ li r3,0 /* into the hash table */ mr r4,r12 /* SRR0 is fault address */ bl hash_page 1: mr r4,r12 - mr r5,r9 + andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index c032fe8c2d26..4fee00d414e8 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -50,18 +50,20 @@ mtspr spr, reg #endif -/* Macro to test if an address is a kernel address */ #if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 -#define IS_KERNEL(tmp, addr) \ - andis. tmp, addr, 0x8000 /* Address >= 0x80000000 */ -#define BRANCH_UNLESS_KERNEL(label) beq label -#else -#define IS_KERNEL(tmp, addr) \ - rlwinm tmp, addr, 16, 16, 31; \ - cmpli cr0, tmp, PAGE_OFFSET >> 16 -#define BRANCH_UNLESS_KERNEL(label) blt label +/* By simply checking Address >= 0x80000000, we know if its a kernel address */ +#define SIMPLE_KERNEL_ADDRESS 1 #endif +/* + * We need an ITLB miss handler for kernel addresses if: + * - Either we have modules + * - Or we have not pinned the first 8M + */ +#if defined(CONFIG_MODULES) || !defined(CONFIG_PIN_TLB_TEXT) || \ + defined(CONFIG_DEBUG_PAGEALLOC) +#define ITLB_MISS_KERNEL 1 +#endif /* * Value for the bits that have fixed value in RPN entries. @@ -123,7 +125,6 @@ turn_on_mmu: lis r0,start_here@h ori r0,r0,start_here@l mtspr SPRN_SRR0,r0 - SYNC rfi /* enables MMU */ /* @@ -170,7 +171,7 @@ turn_on_mmu: stw r1,0(r11); \ tovirt(r1,r11); /* set new kernel sp */ \ li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ - MTMSRD(r10); /* (except for mach check in rtas) */ \ + mtmsr r10; \ stw r0,GPR0(r11); \ SAVE_4GPRS(3, r11); \ SAVE_2GPRS(7, r11) @@ -300,7 +301,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) . = 0x1100 /* @@ -325,7 +326,7 @@ SystemCall: #endif InstructionTLBMiss: -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtspr SPRN_SPRG_SCRATCH2, r3 #endif EXCEPTION_PROLOG_0 @@ -343,15 +344,32 @@ InstructionTLBMiss: INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10) /* Only modules will cause ITLB Misses as we always * pin the first 8MB of kernel memory */ -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mfcr r3 #endif -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) - IS_KERNEL(r11, r10) +#ifdef ITLB_MISS_KERNEL +#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) + andis. r11, r10, 0x8000 /* Address >= 0x80000000 */ +#else + rlwinm r11, r10, 16, 0xfff8 + cmpli cr0, r11, PAGE_OFFSET@h +#ifndef CONFIG_PIN_TLB_TEXT + /* It is assumed that kernel code fits into the first 8M page */ +_ENTRY(ITLBMiss_cmp) + cmpli cr7, r11, (PAGE_OFFSET + 0x0800000)@h +#endif +#endif #endif mfspr r11, SPRN_M_TW /* Get level 1 table */ -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) - BRANCH_UNLESS_KERNEL(3f) +#ifdef ITLB_MISS_KERNEL +#if defined(SIMPLE_KERNEL_ADDRESS) && defined(CONFIG_PIN_TLB_TEXT) + beq+ 3f +#else + blt+ 3f +#endif +#ifndef CONFIG_PIN_TLB_TEXT + blt cr7, ITLBMissLinear +#endif lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: #endif @@ -369,7 +387,7 @@ InstructionTLBMiss: rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */ lwz r10, 0(r10) /* Get the pte */ 4: -#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mtcr r3 #endif /* Insert the APG into the TWC from the Linux PTE. */ @@ -400,7 +418,7 @@ InstructionTLBMiss: MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */ /* Restore registers */ -#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE) +#if defined(CONFIG_8xx_CPU6) || defined(ITLB_MISS_KERNEL) || defined(CONFIG_HUGETLB_PAGE) mfspr r3, SPRN_SPRG_SCRATCH2 #endif EXCEPTION_EPILOG_0 @@ -447,23 +465,23 @@ DataStoreTLBMiss: * kernel page tables. */ mfspr r10, SPRN_MD_EPN - rlwinm r10, r10, 16, 0xfff8 - cmpli cr0, r10, PAGE_OFFSET@h + rlwinm r11, r10, 16, 0xfff8 + cmpli cr0, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TW /* Get level 1 table */ blt+ 3f + rlwinm r11, r10, 16, 0xfff8 #ifndef CONFIG_PIN_TLB_IMMR - cmpli cr0, r10, VIRT_IMMR_BASE@h + cmpli cr0, r11, VIRT_IMMR_BASE@h #endif _ENTRY(DTLBMiss_cmp) - cmpli cr7, r10, (PAGE_OFFSET + 0x1800000)@h - lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha + cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h #ifndef CONFIG_PIN_TLB_IMMR _ENTRY(DTLBMiss_jmp) beq- DTLBMissIMMR #endif blt cr7, DTLBMissLinear + lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha 3: - mfspr r10, SPRN_MD_EPN /* Insert level 1 index */ rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29 @@ -569,8 +587,8 @@ _ENTRY(DTLBMiss_jmp) InstructionTLBError: EXCEPTION_PROLOG mr r4,r12 - mr r5,r9 - andis. r10,r5,0x4000 + andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ + andis. r10,r9,SRR1_ISI_NOPT@h beq+ 1f tlbie r4 itlbie: @@ -595,7 +613,7 @@ DARFixed:/* Return from dcbx instruction bug workaround */ mfspr r5,SPRN_DSISR stw r5,_DSISR(r11) mfspr r4,SPRN_DAR - andis. r10,r5,0x4000 + andis. r10,r5,DSISR_NOHPTE@h beq+ 1f tlbie r4 dtlbie: @@ -684,7 +702,7 @@ DTLBMissLinear: /* Set 8M byte page and mark it valid */ li r11, MD_PS8MEG | MD_SVALID MTSPR_CPU6(SPRN_MD_TWC, r11, r3) - rlwinm r10, r10, 16, 0x0f800000 /* 8xx supports max 256Mb RAM */ + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ _PAGE_PRESENT MTSPR_CPU6(SPRN_MD_RPN, r10, r11) /* Update TLB entry */ @@ -695,6 +713,22 @@ DTLBMissLinear: EXCEPTION_EPILOG_0 rfi +#ifndef CONFIG_PIN_TLB_TEXT +ITLBMissLinear: + mtcr r3 + /* Set 8M byte page and mark it valid */ + li r11, MI_PS8MEG | MI_SVALID | _PAGE_EXEC + MTSPR_CPU6(SPRN_MI_TWC, r11, r3) + rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */ + ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SHARED | _PAGE_DIRTY | \ + _PAGE_PRESENT + MTSPR_CPU6(SPRN_MI_RPN, r10, r11) /* Update TLB entry */ + + mfspr r3, SPRN_SPRG_SCRATCH2 + EXCEPTION_EPILOG_0 + rfi +#endif + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. @@ -705,9 +739,10 @@ FixupDAR:/* Entry point for dcbx workaround. */ mtspr SPRN_SPRG_SCRATCH2, r10 /* fetch instruction from memory. */ mfspr r10, SPRN_SRR0 - IS_KERNEL(r11, r10) + rlwinm r11, r10, 16, 0xfff8 + cmpli cr0, r11, PAGE_OFFSET@h mfspr r11, SPRN_M_TW /* Get level 1 table */ - BRANCH_UNLESS_KERNEL(3f) + blt+ 3f rlwinm r11, r10, 16, 0xfff8 _ENTRY(FixupDAR_cmp) cmpli cr7, r11, (PAGE_OFFSET + 0x1800000)@h @@ -915,10 +950,8 @@ start_here: rfi /* Load up the kernel context */ 2: - SYNC /* Force all PTE updates to finish */ tlbia /* Clear all TLB entries */ sync /* wait for tlbia/tlbie to finish */ - TLBSYNC /* ... on all CPUs */ /* set up the PTE pointers for the Abatron bdiGDB. */ @@ -955,15 +988,14 @@ initial_mmu: mtspr SPRN_MD_CTR, r10 /* remove PINNED DTLB entries */ tlbia /* Invalidate all TLB entries */ -/* Always pin the first 8 MB ITLB to prevent ITLB - misses while mucking around with SRR0/SRR1 in asm -*/ +#ifdef CONFIG_PIN_TLB_TEXT lis r8, MI_RSV4I@h ori r8, r8, 0x1c00 mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ +#endif -#ifdef CONFIG_PIN_TLB +#ifdef CONFIG_PIN_TLB_DATA oris r10, r10, MD_RSV4I@h mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ #endif @@ -989,6 +1021,7 @@ initial_mmu: * internal registers (among other things). */ #ifdef CONFIG_PIN_TLB_IMMR + oris r10, r10, MD_RSV4I@h ori r10, r10, 0x1c00 mtspr SPRN_MD_CTR, r10 diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 516ebef905c0..4621568277f2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -85,7 +85,61 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) std r3,_WORT(r1) mfspr r3,SPRN_WORC std r3,_WORC(r1) +/* + * On POWER9, there are idle states such as stop4, invoked via cpuidle, + * that lose hypervisor resources. In such cases, we need to save + * additional SPRs before entering those idle states so that they can + * be restored to their older values on wakeup from the idle state. + * + * On POWER8, the only such deep idle state is winkle which is used + * only in the context of CPU-Hotplug, where these additional SPRs are + * reinitiazed to a sane value. Hence there is no need to save/restore + * these SPRs. + */ +BEGIN_FTR_SECTION + blr +END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) + +power9_save_additional_sprs: + mfspr r3, SPRN_PID + mfspr r4, SPRN_LDBAR + std r3, STOP_PID(r13) + std r4, STOP_LDBAR(r13) + mfspr r3, SPRN_FSCR + mfspr r4, SPRN_HFSCR + std r3, STOP_FSCR(r13) + std r4, STOP_HFSCR(r13) + + mfspr r3, SPRN_MMCRA + mfspr r4, SPRN_MMCR1 + std r3, STOP_MMCRA(r13) + std r4, STOP_MMCR1(r13) + + mfspr r3, SPRN_MMCR2 + std r3, STOP_MMCR2(r13) + blr + +power9_restore_additional_sprs: + ld r3,_LPCR(r1) + ld r4, STOP_PID(r13) + mtspr SPRN_LPCR,r3 + mtspr SPRN_PID, r4 + + ld r3, STOP_LDBAR(r13) + ld r4, STOP_FSCR(r13) + mtspr SPRN_LDBAR, r3 + mtspr SPRN_FSCR, r4 + + ld r3, STOP_HFSCR(r13) + ld r4, STOP_MMCRA(r13) + mtspr SPRN_HFSCR, r3 + mtspr SPRN_MMCRA, r4 + /* We have already restored PACA_MMCR0 */ + ld r3, STOP_MMCR1(r13) + ld r4, STOP_MMCR2(r13) + mtspr SPRN_MMCR1, r3 + mtspr SPRN_MMCR2, r4 blr /* @@ -803,9 +857,16 @@ no_segments: mtctr r12 bctrl +/* + * On POWER9, we can come here on wakeup from a cpuidle stop state. + * Hence restore the additional SPRs to the saved value. + * + * On POWER8, we come here only on winkle. Since winkle is used + * only in the case of CPU-Hotplug, we don't need to restore + * the additional SPRs. + */ BEGIN_FTR_SECTION - ld r4,_LPCR(r1) - mtspr SPRN_LPCR,r4 + bl power9_restore_additional_sprs END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) hypervisor_state_restored: diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 233ca3fe4754..e0af6cd7ba4f 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -190,7 +190,7 @@ static unsigned long iommu_range_alloc(struct device *dev, unsigned int pool_nr; struct iommu_pool *pool; - align_mask = 0xffffffffffffffffl >> (64 - align_order); + align_mask = (1ull << align_order) - 1; /* This allocator was derived from x86_64's bit string search */ @@ -208,7 +208,7 @@ static unsigned long iommu_range_alloc(struct device *dev, * We don't need to disable preemption here because any CPU can * safely use any IOMMU pool. */ - pool_nr = __this_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); + pool_nr = raw_cpu_read(iommu_pool_hash) & (tbl->nr_pools - 1); if (largealloc) pool = &(tbl->large_pool); diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 0bcec745a672..8b514e910bf1 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -24,7 +24,7 @@ * mask register (of which only 16 are defined), hence the weird shifting * and complement of the cached_irq_mask. I want to be able to stuff * this right into the SIU SMASK register. - * Many of the prep/chrp functions are conditional compiled on CONFIG_8xx + * Many of the prep/chrp functions are conditional compiled on CONFIG_PPC_8xx * to reduce code space and undefined function references. */ @@ -470,6 +470,18 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } + seq_printf(p, "%*s: ", prec, "NMI"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_printf(p, " System Reset interrupts\n"); + +#ifdef CONFIG_PPC_WATCHDOG + seq_printf(p, "%*s: ", prec, "WDG"); + for_each_online_cpu(j) + seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_printf(p, " Watchdog soft-NMI interrupts\n"); +#endif + #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { seq_printf(p, "%*s: ", prec, "DBL"); @@ -494,6 +506,10 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += per_cpu(irq_stat, cpu).spurious_irqs; sum += per_cpu(irq_stat, cpu).timer_irqs_others; sum += per_cpu(irq_stat, cpu).hmi_exceptions; + sum += per_cpu(irq_stat, cpu).sreset_irqs; +#ifdef CONFIG_PPC_WATCHDOG + sum += per_cpu(irq_stat, cpu).soft_nmi_irqs; +#endif #ifdef CONFIG_PPC_DOORBELL sum += per_cpu(irq_stat, cpu).doorbell_irqs; #endif diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index dbf098121ce6..35e240a0a408 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -67,9 +67,9 @@ static struct hard_trap_info #endif #else /* ! (defined(CONFIG_40x) || defined(CONFIG_BOOKE)) */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ -#if defined(CONFIG_8xx) +#if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ -#else /* ! CONFIG_8xx */ +#else /* ! CONFIG_PPC_8xx */ { 0x0f00, 0x04 /* SIGILL */ }, /* performance monitor */ { 0x0f20, 0x08 /* SIGFPE */ }, /* altivec unavailable */ { 0x1300, 0x05 /* SIGTRAP */ }, /* instruction address break */ diff --git a/arch/powerpc/kernel/l2cr_6xx.S b/arch/powerpc/kernel/l2cr_6xx.S index 97ec8557f974..6408f09dbbd9 100644 --- a/arch/powerpc/kernel/l2cr_6xx.S +++ b/arch/powerpc/kernel/l2cr_6xx.S @@ -181,7 +181,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) mtctr r4 li r4,0 1: - lwzx r0,r0,r4 + lwzx r0,0,r4 addi r4,r4,32 /* Go to start of next cache line */ bdnz 1b isync @@ -328,7 +328,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_L3CR) mtctr r4 li r4,0 1: - lwzx r0,r0,r4 + lwzx r0,0,r4 dcbf 0,r4 addi r4,r4,32 /* Go to start of next cache line */ bdnz 1b diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index e0e131e662ed..9b2ea7e71c06 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -22,11 +22,14 @@ #undef DEBUG #define pr_fmt(fmt) "mce: " fmt +#include <linux/hardirq.h> #include <linux/types.h> #include <linux/ptrace.h> #include <linux/percpu.h> #include <linux/export.h> #include <linux/irq_work.h> + +#include <asm/machdep.h> #include <asm/mce.h> static DEFINE_PER_CPU(int, mce_nest_count); @@ -446,3 +449,33 @@ uint64_t get_mce_fault_addr(struct machine_check_event *evt) return 0; } EXPORT_SYMBOL(get_mce_fault_addr); + +/* + * This function is called in real mode. Strictly no printk's please. + * + * regs->nip and regs->msr contains srr0 and ssr1. + */ +long machine_check_early(struct pt_regs *regs) +{ + long handled = 0; + + __this_cpu_inc(irq_stat.mce_exceptions); + + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) + handled = cur_cpu_spec->machine_check_early(regs); + return handled; +} + +long hmi_exception_realmode(struct pt_regs *regs) +{ + __this_cpu_inc(irq_stat.hmi_exceptions); + + wait_for_subcore_guest_exit(); + + if (ppc_md.hmi_exception_early) + ppc_md.hmi_exception_early(regs); + + wait_for_tb_resync(); + + return 0; +} diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9f3e2c932dcc..cc5bae4bba7b 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -230,7 +230,8 @@ void enable_kernel_fp(void) } EXPORT_SYMBOL(enable_kernel_fp); -static int restore_fp(struct task_struct *tsk) { +static int restore_fp(struct task_struct *tsk) +{ if (tsk->thread.load_fp || msr_tm_active(tsk->thread.regs->msr)) { load_fp_state(¤t->thread.fp_state); current->thread.load_fp++; @@ -330,11 +331,19 @@ static inline int restore_altivec(struct task_struct *tsk) { return 0; } #ifdef CONFIG_VSX static void __giveup_vsx(struct task_struct *tsk) { - if (tsk->thread.regs->msr & MSR_FP) + unsigned long msr = tsk->thread.regs->msr; + + /* + * We should never be ssetting MSR_VSX without also setting + * MSR_FP and MSR_VEC + */ + WARN_ON((msr & MSR_VSX) && !((msr & MSR_FP) && (msr & MSR_VEC))); + + /* __giveup_fpu will clear MSR_VSX */ + if (msr & MSR_FP) __giveup_fpu(tsk); - if (tsk->thread.regs->msr & MSR_VEC) + if (msr & MSR_VEC) __giveup_altivec(tsk); - tsk->thread.regs->msr &= ~MSR_VSX; } static void giveup_vsx(struct task_struct *tsk) @@ -346,14 +355,6 @@ static void giveup_vsx(struct task_struct *tsk) msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } -static void save_vsx(struct task_struct *tsk) -{ - if (tsk->thread.regs->msr & MSR_FP) - save_fpu(tsk); - if (tsk->thread.regs->msr & MSR_VEC) - save_altivec(tsk); -} - void enable_kernel_vsx(void) { unsigned long cpumsr; @@ -373,10 +374,6 @@ void enable_kernel_vsx(void) */ if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr)) return; - if (current->thread.regs->msr & MSR_FP) - __giveup_fpu(current); - if (current->thread.regs->msr & MSR_VEC) - __giveup_altivec(current); __giveup_vsx(current); } } @@ -406,7 +403,6 @@ static int restore_vsx(struct task_struct *tsk) } #else static inline int restore_vsx(struct task_struct *tsk) { return 0; } -static inline void save_vsx(struct task_struct *tsk) { } #endif /* CONFIG_VSX */ #ifdef CONFIG_SPE @@ -486,6 +482,8 @@ void giveup_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); check_if_tm_restore_required(tsk); + WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); + #ifdef CONFIG_PPC_FPU if (usermsr & MSR_FP) __giveup_fpu(tsk); @@ -494,10 +492,6 @@ void giveup_all(struct task_struct *tsk) if (usermsr & MSR_VEC) __giveup_altivec(tsk); #endif -#ifdef CONFIG_VSX - if (usermsr & MSR_VSX) - __giveup_vsx(tsk); -#endif #ifdef CONFIG_SPE if (usermsr & MSR_SPE) __giveup_spe(tsk); @@ -556,19 +550,13 @@ void save_all(struct task_struct *tsk) msr_check_and_set(msr_all_available); - /* - * Saving the way the register space is in hardware, save_vsx boils - * down to a save_fpu() and save_altivec() - */ - if (usermsr & MSR_VSX) { - save_vsx(tsk); - } else { - if (usermsr & MSR_FP) - save_fpu(tsk); + WARN_ON((usermsr & MSR_VSX) && !((usermsr & MSR_FP) && (usermsr & MSR_VEC))); - if (usermsr & MSR_VEC) - save_altivec(tsk); - } + if (usermsr & MSR_FP) + save_fpu(tsk); + + if (usermsr & MSR_VEC) + save_altivec(tsk); if (usermsr & MSR_SPE) __giveup_spe(tsk); diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 94a948207cd2..0f896f17d5ab 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -916,13 +916,6 @@ void __init setup_arch(char **cmdline_p) /* Reserve large chunks of memory for use by CMA for KVM. */ kvm_cma_reserve(); - /* - * Reserve any gigantic pages requested on the command line. - * memblock needs to have been initialized by the time this is - * called since this will reserve memory. - */ - reserve_hugetlb_gpages(); - klp_init_thread_info(&init_thread_info); init_mm.start_code = (unsigned long)_stext; diff --git a/arch/powerpc/kernel/swsusp_asm64.S b/arch/powerpc/kernel/swsusp_asm64.S index 988f38dced0f..82d8aae81c6a 100644 --- a/arch/powerpc/kernel/swsusp_asm64.S +++ b/arch/powerpc/kernel/swsusp_asm64.S @@ -179,7 +179,7 @@ nothing_to_copy: sld r3, r3, r0 li r0, 0 1: - dcbf r0,r3 + dcbf 0,r3 addi r3,r3,0x20 bdnz 1b diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index bfcfd9ef09f2..675d5d2bfcde 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -288,6 +288,8 @@ void system_reset_exception(struct pt_regs *regs) if (!nested) nmi_enter(); + __this_cpu_inc(irq_stat.sreset_irqs); + /* See if any machine dependent calls */ if (ppc_md.system_reset_exception) { if (ppc_md.system_reset_exception(regs)) @@ -312,39 +314,6 @@ out: /* What should we do here? We could issue a shutdown or hard reset. */ } -#ifdef CONFIG_PPC64 -/* - * This function is called in real mode. Strictly no printk's please. - * - * regs->nip and regs->msr contains srr0 and ssr1. - */ -long machine_check_early(struct pt_regs *regs) -{ - long handled = 0; - - __this_cpu_inc(irq_stat.mce_exceptions); - - if (cur_cpu_spec && cur_cpu_spec->machine_check_early) - handled = cur_cpu_spec->machine_check_early(regs); - return handled; -} - -long hmi_exception_realmode(struct pt_regs *regs) -{ - __this_cpu_inc(irq_stat.hmi_exceptions); - - wait_for_subcore_guest_exit(); - - if (ppc_md.hmi_exception_early) - ppc_md.hmi_exception_early(regs); - - wait_for_tb_resync(); - - return 0; -} - -#endif - /* * I/O accesses can cause machine checks on powermacs. * Check if the NIP corresponds to the address of a sync @@ -397,11 +366,6 @@ static inline int check_io_access(struct pt_regs *regs) /* On 4xx, the reason for the machine check or program exception is in the ESR. */ #define get_reason(regs) ((regs)->dsisr) -#ifndef CONFIG_FSL_BOOKE -#define get_mc_reason(regs) ((regs)->dsisr) -#else -#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) -#endif #define REASON_FP ESR_FP #define REASON_ILLEGAL (ESR_PIL | ESR_PUO) #define REASON_PRIVILEGED ESR_PPR @@ -415,108 +379,17 @@ static inline int check_io_access(struct pt_regs *regs) /* On non-4xx, the reason for the machine check or program exception is in the MSR. */ #define get_reason(regs) ((regs)->msr) -#define get_mc_reason(regs) ((regs)->msr) -#define REASON_TM 0x200000 -#define REASON_FP 0x100000 -#define REASON_ILLEGAL 0x80000 -#define REASON_PRIVILEGED 0x40000 -#define REASON_TRAP 0x20000 +#define REASON_TM SRR1_PROGTM +#define REASON_FP SRR1_PROGFPE +#define REASON_ILLEGAL SRR1_PROGILL +#define REASON_PRIVILEGED SRR1_PROGPRIV +#define REASON_TRAP SRR1_PROGTRAP #define single_stepping(regs) ((regs)->msr & MSR_SE) #define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) #endif -#if defined(CONFIG_4xx) -int machine_check_4xx(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - - if (reason & ESR_IMCP) { - printk("Instruction"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } else - printk("Data"); - printk(" machine check in kernel mode.\n"); - - return 0; -} - -int machine_check_440A(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - - printk("Machine check in kernel mode.\n"); - if (reason & ESR_IMCP){ - printk("Instruction Synchronous Machine Check exception\n"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - } - else { - u32 mcsr = mfspr(SPRN_MCSR); - if (mcsr & MCSR_IB) - printk("Instruction Read PLB Error\n"); - if (mcsr & MCSR_DRB) - printk("Data Read PLB Error\n"); - if (mcsr & MCSR_DWB) - printk("Data Write PLB Error\n"); - if (mcsr & MCSR_TLBP) - printk("TLB Parity Error\n"); - if (mcsr & MCSR_ICP){ - flush_instruction_cache(); - printk("I-Cache Parity Error\n"); - } - if (mcsr & MCSR_DCSP) - printk("D-Cache Search Parity Error\n"); - if (mcsr & MCSR_DCFP) - printk("D-Cache Flush Parity Error\n"); - if (mcsr & MCSR_IMPE) - printk("Machine Check exception is imprecise\n"); - - /* Clear MCSR */ - mtspr(SPRN_MCSR, mcsr); - } - return 0; -} - -int machine_check_47x(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - u32 mcsr; - - printk(KERN_ERR "Machine check in kernel mode.\n"); - if (reason & ESR_IMCP) { - printk(KERN_ERR - "Instruction Synchronous Machine Check exception\n"); - mtspr(SPRN_ESR, reason & ~ESR_IMCP); - return 0; - } - mcsr = mfspr(SPRN_MCSR); - if (mcsr & MCSR_IB) - printk(KERN_ERR "Instruction Read PLB Error\n"); - if (mcsr & MCSR_DRB) - printk(KERN_ERR "Data Read PLB Error\n"); - if (mcsr & MCSR_DWB) - printk(KERN_ERR "Data Write PLB Error\n"); - if (mcsr & MCSR_TLBP) - printk(KERN_ERR "TLB Parity Error\n"); - if (mcsr & MCSR_ICP) { - flush_instruction_cache(); - printk(KERN_ERR "I-Cache Parity Error\n"); - } - if (mcsr & MCSR_DCSP) - printk(KERN_ERR "D-Cache Search Parity Error\n"); - if (mcsr & PPC47x_MCSR_GPR) - printk(KERN_ERR "GPR Parity Error\n"); - if (mcsr & PPC47x_MCSR_FPR) - printk(KERN_ERR "FPR Parity Error\n"); - if (mcsr & PPC47x_MCSR_IPR) - printk(KERN_ERR "Machine Check exception is imprecise\n"); - - /* Clear MCSR */ - mtspr(SPRN_MCSR, mcsr); - - return 0; -} -#elif defined(CONFIG_E500) +#if defined(CONFIG_E500) int machine_check_e500mc(struct pt_regs *regs) { unsigned long mcsr = mfspr(SPRN_MCSR); @@ -618,7 +491,7 @@ silent_out: int machine_check_e500(struct pt_regs *regs) { - unsigned long reason = get_mc_reason(regs); + unsigned long reason = mfspr(SPRN_MCSR); if (reason & MCSR_BUS_RBERR) { if (fsl_rio_mcheck_exception(regs)) @@ -665,7 +538,7 @@ int machine_check_generic(struct pt_regs *regs) #elif defined(CONFIG_E200) int machine_check_e200(struct pt_regs *regs) { - unsigned long reason = get_mc_reason(regs); + unsigned long reason = mfspr(SPRN_MCSR); printk("Machine check in kernel mode.\n"); printk("Caused by (from MCSR=%lx): ", reason); @@ -687,35 +560,10 @@ int machine_check_e200(struct pt_regs *regs) return 0; } -#elif defined(CONFIG_PPC_8xx) -int machine_check_8xx(struct pt_regs *regs) -{ - unsigned long reason = get_mc_reason(regs); - - pr_err("Machine check in kernel mode.\n"); - pr_err("Caused by (from SRR1=%lx): ", reason); - if (reason & 0x40000000) - pr_err("Fetch error at address %lx\n", regs->nip); - else - pr_err("Data access error at address %lx\n", regs->dar); - -#ifdef CONFIG_PCI - /* the qspan pci read routines can cause machine checks -- Cort - * - * yuck !!! that totally needs to go away ! There are better ways - * to deal with that than having a wart in the mcheck handler. - * -- BenH - */ - bad_page_fault(regs, regs->dar, SIGBUS); - return 1; -#else - return 0; -#endif -} -#else +#elif defined(CONFIG_PPC32) int machine_check_generic(struct pt_regs *regs) { - unsigned long reason = get_mc_reason(regs); + unsigned long reason = regs->msr; printk("Machine check in kernel mode.\n"); printk("Caused by (from SRR1=%lx): ", reason); @@ -755,7 +603,9 @@ void machine_check_exception(struct pt_regs *regs) enum ctx_state prev_state = exception_enter(); int recover = 0; - __this_cpu_inc(irq_stat.mce_exceptions); + /* 64s accounts the mce in machine_check_early when in HVMODE */ + if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE)) + __this_cpu_inc(irq_stat.mce_exceptions); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); @@ -1672,24 +1522,6 @@ void performance_monitor_exception(struct pt_regs *regs) perf_irq(regs); } -#ifdef CONFIG_8xx -void SoftwareEmulation(struct pt_regs *regs) -{ - CHECK_FULL_REGS(regs); - - if (!user_mode(regs)) { - debugger(regs); - die("Kernel Mode Unimplemented Instruction or SW FPU Emulation", - regs, SIGFPE); - } - - if (!emulate_math(regs)) - return; - - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); -} -#endif /* CONFIG_8xx */ - #ifdef CONFIG_PPC_ADV_DEBUG_REGS static void handle_debug(struct pt_regs *regs, unsigned long debug_status) { diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index 6b2b69616e77..769c2624e0a6 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -232,15 +232,9 @@ __do_get_tspec: lwz r6,(CFG_TB_ORIG_STAMP+4)(r9) /* Get a stable TB value */ -#ifdef CONFIG_8xx -2: mftbu r3 - mftbl r4 - mftbu r0 -#else -2: mfspr r3, SPRN_TBRU - mfspr r4, SPRN_TBRL - mfspr r0, SPRN_TBRU -#endif +2: MFTBU(r3) + MFTBL(r4) + MFTBU(r0) cmplw cr0,r3,r0 bne- 2b diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index b1a250560198..882628fa6987 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -8,7 +8,7 @@ #include <asm/cache.h> #include <asm/thread_info.h> -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) && !defined(CONFIG_PPC32) #define STRICT_ALIGN_SIZE (1 << 24) #else #define STRICT_ALIGN_SIZE PAGE_SIZE diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index b67f8b03a32d..4b9a567c9975 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -204,6 +204,9 @@ void soft_nmi_interrupt(struct pt_regs *regs) return; nmi_enter(); + + __this_cpu_inc(irq_stat.soft_nmi_irqs); + tb = get_tb(); if (tb - per_cpu(wd_timer_tb, cpu) >= wd_panic_timeout_tb) { per_cpu(wd_timer_tb, cpu) = tb; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 7d719c8aa0bb..c5d7435455f1 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -323,13 +323,13 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, gpa = vcpu->arch.fault_gpa & ~0xfffUL; gpa &= ~0xF000000000000000ul; gfn = gpa >> PAGE_SHIFT; - if (!(dsisr & DSISR_PGDIRFAULT)) + if (!(dsisr & DSISR_PRTABLE_FAULT)) gpa |= ea & 0xfff; memslot = gfn_to_memslot(kvm, gfn); /* No memslot means it's an emulated MMIO region */ if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { - if (dsisr & (DSISR_PGDIRFAULT | DSISR_BADACCESS | + if (dsisr & (DSISR_PRTABLE_FAULT | DSISR_BADACCESS | DSISR_SET_RC)) { /* * Bad address in guest page table tree, or other diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index a84d333ecb09..ca5fc8fa7efc 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -45,13 +45,13 @@ _GLOBAL(copypage_power7) .machine push .machine "power4" /* setup read stream 0 */ - dcbt r0,r4,0b01000 /* addr from */ - dcbt r0,r7,0b01010 /* length and depth from */ + dcbt 0,r4,0b01000 /* addr from */ + dcbt 0,r7,0b01010 /* length and depth from */ /* setup write stream 1 */ - dcbtst r0,r9,0b01000 /* addr to */ - dcbtst r0,r10,0b01010 /* length and depth to */ + dcbtst 0,r9,0b01000 /* addr to */ + dcbtst 0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* all streams GO */ + dcbt 0,r8,0b01010 /* all streams GO */ .machine pop #ifdef CONFIG_ALTIVEC @@ -83,7 +83,7 @@ _GLOBAL(copypage_power7) li r12,112 .align 5 -1: lvx v7,r0,r4 +1: lvx v7,0,r4 lvx v6,r4,r6 lvx v5,r4,r7 lvx v4,r4,r8 @@ -92,7 +92,7 @@ _GLOBAL(copypage_power7) lvx v1,r4,r11 lvx v0,r4,r12 addi r4,r4,128 - stvx v7,r0,r3 + stvx v7,0,r3 stvx v6,r3,r6 stvx v5,r3,r7 stvx v4,r3,r8 diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 706b7cc19846..d416a4a66578 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -315,13 +315,13 @@ err1; stb r0,0(r3) .machine push .machine "power4" /* setup read stream 0 */ - dcbt r0,r6,0b01000 /* addr from */ - dcbt r0,r7,0b01010 /* length and depth from */ + dcbt 0,r6,0b01000 /* addr from */ + dcbt 0,r7,0b01010 /* length and depth from */ /* setup write stream 1 */ - dcbtst r0,r9,0b01000 /* addr to */ - dcbtst r0,r10,0b01010 /* length and depth to */ + dcbtst 0,r9,0b01000 /* addr to */ + dcbtst 0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* all streams GO */ + dcbt 0,r8,0b01010 /* all streams GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy @@ -376,26 +376,26 @@ err3; std r0,0(r3) li r11,48 bf cr7*4+3,5f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 addi r4,r4,16 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 addi r3,r3,16 5: bf cr7*4+2,6f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 err3; stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 err3; lvx v2,r4,r9 err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx v3,r0,r3 +err3; stvx v3,0,r3 err3; stvx v2,r3,r9 err3; stvx v1,r3,r10 err3; stvx v0,r3,r11 @@ -421,7 +421,7 @@ err3; stvx v0,r3,r11 */ .align 5 8: -err4; lvx v7,r0,r4 +err4; lvx v7,0,r4 err4; lvx v6,r4,r9 err4; lvx v5,r4,r10 err4; lvx v4,r4,r11 @@ -430,7 +430,7 @@ err4; lvx v2,r4,r14 err4; lvx v1,r4,r15 err4; lvx v0,r4,r16 addi r4,r4,128 -err4; stvx v7,r0,r3 +err4; stvx v7,0,r3 err4; stvx v6,r3,r9 err4; stvx v5,r3,r10 err4; stvx v4,r3,r11 @@ -451,29 +451,29 @@ err4; stvx v0,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 err3; lvx v2,r4,r9 err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 addi r4,r4,64 -err3; stvx v3,r0,r3 +err3; stvx v3,0,r3 err3; stvx v2,r3,r9 err3; stvx v1,r3,r10 err3; stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 err3; lvx v0,r4,r9 addi r4,r4,32 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 err3; stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 addi r4,r4,16 -err3; stvx v1,r0,r3 +err3; stvx v1,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -553,25 +553,25 @@ err3; lvx v0,0,r4 addi r4,r4,16 bf cr7*4+3,5f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 addi r3,r3,16 vor v0,v1,v1 5: bf cr7*4+2,6f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) err3; lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 VPERM(v8,v0,v3,v16) err3; lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -580,7 +580,7 @@ err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 err3; stvx v10,r3,r10 err3; stvx v11,r3,r11 @@ -606,7 +606,7 @@ err3; stvx v11,r3,r11 */ .align 5 8: -err4; lvx v7,r0,r4 +err4; lvx v7,0,r4 VPERM(v8,v0,v7,v16) err4; lvx v6,r4,r9 VPERM(v9,v7,v6,v16) @@ -623,7 +623,7 @@ err4; lvx v1,r4,r15 err4; lvx v0,r4,r16 VPERM(v15,v1,v0,v16) addi r4,r4,128 -err4; stvx v8,r0,r3 +err4; stvx v8,0,r3 err4; stvx v9,r3,r9 err4; stvx v10,r3,r10 err4; stvx v11,r3,r11 @@ -644,7 +644,7 @@ err4; stvx v15,r3,r16 mtocrf 0x01,r6 bf cr7*4+1,9f -err3; lvx v3,r0,r4 +err3; lvx v3,0,r4 VPERM(v8,v0,v3,v16) err3; lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -653,27 +653,27 @@ err3; lvx v1,r4,r10 err3; lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 err3; stvx v10,r3,r10 err3; stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) err3; lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 err3; stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f -err3; lvx v1,r0,r4 +err3; lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 -err3; stvx v8,r0,r3 +err3; stvx v8,0,r3 addi r3,r3,16 /* Up to 15B to go */ diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 85fa9869aec5..ec531de99996 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -13,6 +13,23 @@ #include <asm/ppc_asm.h> #include <asm/export.h> +_GLOBAL(__memset16) + rlwimi r4,r4,16,0,15 + /* fall through */ + +_GLOBAL(__memset32) + rldimi r4,r4,32,0 + /* fall through */ + +_GLOBAL(__memset64) + neg r0,r3 + andi. r0,r0,7 + cmplw cr1,r5,r0 + b .Lms +EXPORT_SYMBOL(__memset16) +EXPORT_SYMBOL(__memset32) +EXPORT_SYMBOL(__memset64) + _GLOBAL(memset) neg r0,r3 rlwimi r4,r4,8,16,23 @@ -20,7 +37,7 @@ _GLOBAL(memset) rlwimi r4,r4,16,0,15 cmplw cr1,r5,r0 /* do we get that far? */ rldimi r4,r4,32,0 - PPC_MTOCRF(1,r0) +.Lms: PPC_MTOCRF(1,r0) mr r6,r3 blt cr1,8f beq+ 3f /* if already 8-byte aligned */ diff --git a/arch/powerpc/lib/memcpy_power7.S b/arch/powerpc/lib/memcpy_power7.S index 786234fd4e91..193909abd18b 100644 --- a/arch/powerpc/lib/memcpy_power7.S +++ b/arch/powerpc/lib/memcpy_power7.S @@ -261,12 +261,12 @@ _GLOBAL(memcpy_power7) .machine push .machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + dcbt 0,r6,0b01000 + dcbt 0,r7,0b01010 + dcbtst 0,r9,0b01000 + dcbtst 0,r10,0b01010 eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt 0,r8,0b01010 /* GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy @@ -321,26 +321,26 @@ _GLOBAL(memcpy_power7) li r11,48 bf cr7*4+3,5f - lvx v1,r0,r4 + lvx v1,0,r4 addi r4,r4,16 - stvx v1,r0,r3 + stvx v1,0,r3 addi r3,r3,16 5: bf cr7*4+2,6f - lvx v1,r0,r4 + lvx v1,0,r4 lvx v0,r4,r9 addi r4,r4,32 - stvx v1,r0,r3 + stvx v1,0,r3 stvx v0,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx v3,r0,r4 + lvx v3,0,r4 lvx v2,r4,r9 lvx v1,r4,r10 lvx v0,r4,r11 addi r4,r4,64 - stvx v3,r0,r3 + stvx v3,0,r3 stvx v2,r3,r9 stvx v1,r3,r10 stvx v0,r3,r11 @@ -366,7 +366,7 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx v7,r0,r4 + lvx v7,0,r4 lvx v6,r4,r9 lvx v5,r4,r10 lvx v4,r4,r11 @@ -375,7 +375,7 @@ _GLOBAL(memcpy_power7) lvx v1,r4,r15 lvx v0,r4,r16 addi r4,r4,128 - stvx v7,r0,r3 + stvx v7,0,r3 stvx v6,r3,r9 stvx v5,r3,r10 stvx v4,r3,r11 @@ -396,29 +396,29 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx v3,r0,r4 + lvx v3,0,r4 lvx v2,r4,r9 lvx v1,r4,r10 lvx v0,r4,r11 addi r4,r4,64 - stvx v3,r0,r3 + stvx v3,0,r3 stvx v2,r3,r9 stvx v1,r3,r10 stvx v0,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx v1,r0,r4 + lvx v1,0,r4 lvx v0,r4,r9 addi r4,r4,32 - stvx v1,r0,r3 + stvx v1,0,r3 stvx v0,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx v1,r0,r4 + lvx v1,0,r4 addi r4,r4,16 - stvx v1,r0,r3 + stvx v1,0,r3 addi r3,r3,16 /* Up to 15B to go */ @@ -499,25 +499,25 @@ _GLOBAL(memcpy_power7) addi r4,r4,16 bf cr7*4+3,5f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx v8,r0,r3 + stvx v8,0,r3 addi r3,r3,16 vor v0,v1,v1 5: bf cr7*4+2,6f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 addi r3,r3,32 6: bf cr7*4+1,7f - lvx v3,r0,r4 + lvx v3,0,r4 VPERM(v8,v0,v3,v16) lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -526,7 +526,7 @@ _GLOBAL(memcpy_power7) lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 stvx v10,r3,r10 stvx v11,r3,r11 @@ -552,7 +552,7 @@ _GLOBAL(memcpy_power7) */ .align 5 8: - lvx v7,r0,r4 + lvx v7,0,r4 VPERM(v8,v0,v7,v16) lvx v6,r4,r9 VPERM(v9,v7,v6,v16) @@ -569,7 +569,7 @@ _GLOBAL(memcpy_power7) lvx v0,r4,r16 VPERM(v15,v1,v0,v16) addi r4,r4,128 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 stvx v10,r3,r10 stvx v11,r3,r11 @@ -590,7 +590,7 @@ _GLOBAL(memcpy_power7) mtocrf 0x01,r6 bf cr7*4+1,9f - lvx v3,r0,r4 + lvx v3,0,r4 VPERM(v8,v0,v3,v16) lvx v2,r4,r9 VPERM(v9,v3,v2,v16) @@ -599,27 +599,27 @@ _GLOBAL(memcpy_power7) lvx v0,r4,r11 VPERM(v11,v1,v0,v16) addi r4,r4,64 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 stvx v10,r3,r10 stvx v11,r3,r11 addi r3,r3,64 9: bf cr7*4+2,10f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) lvx v0,r4,r9 VPERM(v9,v1,v0,v16) addi r4,r4,32 - stvx v8,r0,r3 + stvx v8,0,r3 stvx v9,r3,r9 addi r3,r3,32 10: bf cr7*4+3,11f - lvx v1,r0,r4 + lvx v1,0,r4 VPERM(v8,v0,v1,v16) addi r4,r4,16 - stvx v8,r0,r3 + stvx v8,0,r3 addi r3,r3,16 /* Up to 15B to go */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index ee33327686ae..a85b82c0e3f3 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -596,6 +596,86 @@ static nokprobe_inline void do_cmp_unsigned(struct pt_regs *regs, unsigned long regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift); } +static nokprobe_inline void do_cmpb(struct pt_regs *regs, unsigned long v1, + unsigned long v2, int rd) +{ + unsigned long long out_val, mask; + int i; + + out_val = 0; + for (i = 0; i < 8; i++) { + mask = 0xffUL << (i * 8); + if ((v1 & mask) == (v2 & mask)) + out_val |= mask; + } + + regs->gpr[rd] = out_val; +} + +/* + * The size parameter is used to adjust the equivalent popcnt instruction. + * popcntb = 8, popcntw = 32, popcntd = 64 + */ +static nokprobe_inline void do_popcnt(struct pt_regs *regs, unsigned long v1, + int size, int ra) +{ + unsigned long long out = v1; + + out -= (out >> 1) & 0x5555555555555555; + out = (0x3333333333333333 & out) + (0x3333333333333333 & (out >> 2)); + out = (out + (out >> 4)) & 0x0f0f0f0f0f0f0f0f; + + if (size == 8) { /* popcntb */ + regs->gpr[ra] = out; + return; + } + out += out >> 8; + out += out >> 16; + if (size == 32) { /* popcntw */ + regs->gpr[ra] = out & 0x0000003f0000003f; + return; + } + + out = (out + (out >> 32)) & 0x7f; + regs->gpr[ra] = out; /* popcntd */ +} + +#ifdef CONFIG_PPC64 +static nokprobe_inline void do_bpermd(struct pt_regs *regs, unsigned long v1, + unsigned long v2, int ra) +{ + unsigned char perm, idx; + unsigned int i; + + perm = 0; + for (i = 0; i < 8; i++) { + idx = (v1 >> (i * 8)) & 0xff; + if (idx < 64) + if (v2 & PPC_BIT(idx)) + perm |= 1 << i; + } + regs->gpr[ra] = perm; +} +#endif /* CONFIG_PPC64 */ +/* + * The size parameter adjusts the equivalent prty instruction. + * prtyw = 32, prtyd = 64 + */ +static nokprobe_inline void do_prty(struct pt_regs *regs, unsigned long v, + int size, int ra) +{ + unsigned long long res = v ^ (v >> 8); + + res ^= res >> 16; + if (size == 32) { /* prtyw */ + regs->gpr[ra] = res & 0x0000000100000001; + return; + } + + res ^= res >> 32; + regs->gpr[ra] = res & 1; /*prtyd */ +} + static nokprobe_inline int trap_compare(long v1, long v2) { int ret = 0; @@ -1064,6 +1144,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, do_cmp_unsigned(regs, val, val2, rd >> 2); goto instr_done; + case 508: /* cmpb */ + do_cmpb(regs, regs->gpr[rd], regs->gpr[rb], ra); + goto instr_done; + /* * Arithmetic instructions */ @@ -1171,6 +1255,14 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, /* * Logical instructions */ + case 15: /* isel */ + mb = (instr >> 6) & 0x1f; /* bc */ + val = (regs->ccr >> (31 - mb)) & 1; + val2 = (ra) ? regs->gpr[ra] : 0; + + regs->gpr[rd] = (val) ? val2 : regs->gpr[rb]; + goto logical_done; + case 26: /* cntlzw */ asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) : "r" (regs->gpr[rd])); @@ -1189,10 +1281,26 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb]; goto logical_done; + case 122: /* popcntb */ + do_popcnt(regs, regs->gpr[rd], 8, ra); + goto logical_done; + case 124: /* nor */ regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]); goto logical_done; + case 154: /* prtyw */ + do_prty(regs, regs->gpr[rd], 32, ra); + goto logical_done; + + case 186: /* prtyd */ + do_prty(regs, regs->gpr[rd], 64, ra); + goto logical_done; +#ifdef CONFIG_PPC64 + case 252: /* bpermd */ + do_bpermd(regs, regs->gpr[rd], regs->gpr[rb], ra); + goto logical_done; +#endif case 284: /* xor */ regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]); goto logical_done; @@ -1201,6 +1309,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb]; goto logical_done; + case 378: /* popcntw */ + do_popcnt(regs, regs->gpr[rd], 32, ra); + goto logical_done; + case 412: /* orc */ regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb]; goto logical_done; @@ -1212,7 +1324,11 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 476: /* nand */ regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]); goto logical_done; - +#ifdef CONFIG_PPC64 + case 506: /* popcntd */ + do_popcnt(regs, regs->gpr[rd], 64, ra); + goto logical_done; +#endif case 922: /* extsh */ regs->gpr[ra] = (signed short) regs->gpr[rd]; goto logical_done; diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index d5b4d9498c54..56aac4c22025 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -184,7 +184,7 @@ err1; std r0,8(r3) mtctr r6 mr r8,r3 14: -err1; dcbz r0,r3 +err1; dcbz 0,r3 add r3,r3,r9 bdnz 14b diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/8xx_mmu.c index f4c6472f2fc4..f29212e40f40 100644 --- a/arch/powerpc/mm/8xx_mmu.c +++ b/arch/powerpc/mm/8xx_mmu.c @@ -22,8 +22,11 @@ extern int __map_without_ltlbs; +static unsigned long block_mapped_ram; + /* - * Return PA for this VA if it is in IMMR area, or 0 + * Return PA for this VA if it is in an area mapped with LTLBs. + * Otherwise, returns 0 */ phys_addr_t v_block_mapped(unsigned long va) { @@ -33,11 +36,13 @@ phys_addr_t v_block_mapped(unsigned long va) return 0; if (va >= VIRT_IMMR_BASE && va < VIRT_IMMR_BASE + IMMR_SIZE) return p + va - VIRT_IMMR_BASE; + if (va >= PAGE_OFFSET && va < PAGE_OFFSET + block_mapped_ram) + return __pa(va); return 0; } /* - * Return VA for a given PA or 0 if not mapped + * Return VA for a given PA mapped with LTLBs or 0 if not mapped */ unsigned long p_block_mapped(phys_addr_t pa) { @@ -47,6 +52,8 @@ unsigned long p_block_mapped(phys_addr_t pa) return 0; if (pa >= p && pa < p + IMMR_SIZE) return VIRT_IMMR_BASE + pa - p; + if (pa < block_mapped_ram) + return (unsigned long)__va(pa); return 0; } @@ -58,7 +65,7 @@ unsigned long p_block_mapped(phys_addr_t pa) void __init MMU_init_hw(void) { /* PIN up to the 3 first 8Mb after IMMR in DTLB table */ -#ifdef CONFIG_PIN_TLB +#ifdef CONFIG_PIN_TLB_DATA unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000; unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SHARED | _PAGE_DIRTY; #ifdef CONFIG_PIN_TLB_IMMR @@ -80,7 +87,7 @@ void __init MMU_init_hw(void) #endif } -static void mmu_mapin_immr(void) +static void __init mmu_mapin_immr(void) { unsigned long p = PHYS_IMMR_BASE; unsigned long v = VIRT_IMMR_BASE; @@ -96,8 +103,11 @@ static void mmu_mapin_immr(void) extern unsigned int DTLBMiss_jmp; #endif extern unsigned int DTLBMiss_cmp, FixupDAR_cmp; +#ifndef CONFIG_PIN_TLB_TEXT +extern unsigned int ITLBMiss_cmp; +#endif -void mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped) +static void __init mmu_patch_cmp_limit(unsigned int *addr, unsigned long mapped) { unsigned int instr = *addr; @@ -116,6 +126,9 @@ unsigned long __init mmu_mapin_ram(unsigned long top) #ifndef CONFIG_PIN_TLB_IMMR patch_instruction(&DTLBMiss_jmp, PPC_INST_NOP); #endif +#ifndef CONFIG_PIN_TLB_TEXT + mmu_patch_cmp_limit(&ITLBMiss_cmp, 0); +#endif } else { mapped = top & ~(LARGE_PAGE_SIZE_8M - 1); } @@ -133,11 +146,13 @@ unsigned long __init mmu_mapin_ram(unsigned long top) if (mapped) memblock_set_current_limit(mapped); + block_mapped_ram = mapped; + return mapped; } -void setup_initial_memory_limit(phys_addr_t first_memblock_base, - phys_addr_t first_memblock_size) +void __init setup_initial_memory_limit(phys_addr_t first_memblock_base, + phys_addr_t first_memblock_size) { /* We don't currently support the first MEMBLOCK not mapping 0 * physical on those processors diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 7414034df1c3..fa6a6ba34355 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -22,8 +22,6 @@ ifeq ($(CONFIG_PPC_STD_MMU_64),y) obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o endif -obj-$(CONFIG_PPC_ICSWX) += icswx.o -obj-$(CONFIG_PPC_ICSWX_PID) += icswx_pid.o obj-$(CONFIG_40x) += 40x_mmu.o obj-$(CONFIG_44x) += 44x_mmu.o obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 4c422632047b..4797d08581ce 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -45,43 +45,39 @@ #include <asm/siginfo.h> #include <asm/debug.h> -#include "icswx.h" - -#ifdef CONFIG_KPROBES -static inline int notify_page_fault(struct pt_regs *regs) +static inline bool notify_page_fault(struct pt_regs *regs) { - int ret = 0; + bool ret = false; +#ifdef CONFIG_KPROBES /* kprobe_running() needs smp_processor_id() */ if (!user_mode(regs)) { preempt_disable(); if (kprobe_running() && kprobe_fault_handler(regs, 11)) - ret = 1; + ret = true; preempt_enable(); } +#endif /* CONFIG_KPROBES */ + + if (unlikely(debugger_fault_handler(regs))) + ret = true; return ret; } -#else -static inline int notify_page_fault(struct pt_regs *regs) -{ - return 0; -} -#endif /* * Check whether the instruction at regs->nip is a store using * an update addressing form which will update r1. */ -static int store_updates_sp(struct pt_regs *regs) +static bool store_updates_sp(struct pt_regs *regs) { unsigned int inst; if (get_user(inst, (unsigned int __user *)regs->nip)) - return 0; + return false; /* check for 1 in the rA field */ if (((inst >> 16) & 0x1f) != 1) - return 0; + return false; /* check major opcode */ switch (inst >> 26) { case 37: /* stwu */ @@ -89,7 +85,7 @@ static int store_updates_sp(struct pt_regs *regs) case 45: /* sthu */ case 53: /* stfsu */ case 55: /* stfdu */ - return 1; + return true; case 62: /* std or stdu */ return (inst & 3) == 1; case 31: @@ -101,18 +97,53 @@ static int store_updates_sp(struct pt_regs *regs) case 439: /* sthux */ case 695: /* stfsux */ case 759: /* stfdux */ - return 1; + return true; } } - return 0; + return false; } /* * do_page_fault error handling helpers */ -#define MM_FAULT_RETURN 0 -#define MM_FAULT_CONTINUE -1 -#define MM_FAULT_ERR(sig) (sig) +static int +__bad_area_nosemaphore(struct pt_regs *regs, unsigned long address, int si_code) +{ + /* + * If we are in kernel mode, bail out with a SEGV, this will + * be caught by the assembly which will restore the non-volatile + * registers before calling bad_page_fault() + */ + if (!user_mode(regs)) + return SIGSEGV; + + _exception(SIGSEGV, regs, si_code, address); + + return 0; +} + +static noinline int bad_area_nosemaphore(struct pt_regs *regs, unsigned long address) +{ + return __bad_area_nosemaphore(regs, address, SEGV_MAPERR); +} + +static int __bad_area(struct pt_regs *regs, unsigned long address, int si_code) +{ + struct mm_struct *mm = current->mm; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ + up_read(&mm->mmap_sem); + + return __bad_area_nosemaphore(regs, address, si_code); +} + +static noinline int bad_area(struct pt_regs *regs, unsigned long address) +{ + return __bad_area(regs, address, SEGV_MAPERR); +} static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int fault) @@ -121,7 +152,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, unsigned int lsb = 0; if (!user_mode(regs)) - return MM_FAULT_ERR(SIGBUS); + return SIGBUS; current->thread.trap_nr = BUS_ADRERR; info.si_signo = SIGBUS; @@ -142,25 +173,17 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, #endif info.si_addr_lsb = lsb; force_sig_info(SIGBUS, &info, current); - return MM_FAULT_RETURN; + return 0; } static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) { /* - * Pagefault was interrupted by SIGKILL. We have no reason to - * continue the pagefault. + * Kernel page fault interrupted by SIGKILL. We have no reason to + * continue processing. */ - if (fatal_signal_pending(current)) { - /* Coming from kernel, we need to deal with uaccess fixups */ - if (user_mode(regs)) - return MM_FAULT_RETURN; - return MM_FAULT_ERR(SIGKILL); - } - - /* No fault: be happy */ - if (!(fault & VM_FAULT_ERROR)) - return MM_FAULT_CONTINUE; + if (fatal_signal_pending(current) && !user_mode(regs)) + return SIGKILL; /* Out of memory */ if (fault & VM_FAULT_OOM) { @@ -169,19 +192,176 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) * made us unable to handle the page fault gracefully. */ if (!user_mode(regs)) - return MM_FAULT_ERR(SIGKILL); + return SIGSEGV; pagefault_out_of_memory(); - return MM_FAULT_RETURN; + } else { + if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| + VM_FAULT_HWPOISON_LARGE)) + return do_sigbus(regs, addr, fault); + else if (fault & VM_FAULT_SIGSEGV) + return bad_area_nosemaphore(regs, addr); + else + BUG(); + } + return 0; +} + +/* Is this a bad kernel fault ? */ +static bool bad_kernel_fault(bool is_exec, unsigned long error_code, + unsigned long address) +{ + if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT))) { + printk_ratelimited(KERN_CRIT "kernel tried to execute" + " exec-protected page (%lx) -" + "exploit attempt? (uid: %d)\n", + address, from_kuid(&init_user_ns, + current_uid())); } + return is_exec || (address >= TASK_SIZE); +} - if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) - return do_sigbus(regs, addr, fault); +static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address, + struct vm_area_struct *vma, + bool store_update_sp) +{ + /* + * N.B. The POWER/Open ABI allows programs to access up to + * 288 bytes below the stack pointer. + * The kernel signal delivery code writes up to about 1.5kB + * below the stack pointer (r1) before decrementing it. + * The exec code can write slightly over 640kB to the stack + * before setting the user r1. Thus we allow the stack to + * expand to 1MB without further checks. + */ + if (address + 0x100000 < vma->vm_end) { + /* get user regs even if this fault is in kernel mode */ + struct pt_regs *uregs = current->thread.regs; + if (uregs == NULL) + return true; - /* We don't understand the fault code, this is fatal */ - BUG(); - return MM_FAULT_CONTINUE; + /* + * A user-mode access to an address a long way below + * the stack pointer is only valid if the instruction + * is one which would update the stack pointer to the + * address accessed if the instruction completed, + * i.e. either stwu rs,n(r1) or stwux rs,r1,rb + * (or the byte, halfword, float or double forms). + * + * If we don't check this then any write to the area + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ + if (address + 2048 < uregs->gpr[1] && !store_update_sp) + return true; + } + return false; +} + +static bool access_error(bool is_write, bool is_exec, + struct vm_area_struct *vma) +{ + /* + * Allow execution from readable areas if the MMU does not + * provide separate controls over reading and executing. + * + * Note: That code used to not be enabled for 4xx/BookE. + * It is now as I/D cache coherency for these is done at + * set_pte_at() time and I see no reason why the test + * below wouldn't be valid on those processors. This -may- + * break programs compiled with a really old ABI though. + */ + if (is_exec) { + return !(vma->vm_flags & VM_EXEC) && + (cpu_has_feature(CPU_FTR_NOEXECUTE) || + !(vma->vm_flags & (VM_READ | VM_WRITE))); + } + + if (is_write) { + if (unlikely(!(vma->vm_flags & VM_WRITE))) + return true; + return false; + } + + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return true; + + return false; } +#ifdef CONFIG_PPC_SMLPAR +static inline void cmo_account_page_fault(void) +{ + if (firmware_has_feature(FW_FEATURE_CMO)) { + u32 page_ins; + + preempt_disable(); + page_ins = be32_to_cpu(get_lppaca()->page_ins); + page_ins += 1 << PAGE_FACTOR; + get_lppaca()->page_ins = cpu_to_be32(page_ins); + preempt_enable(); + } +} +#else +static inline void cmo_account_page_fault(void) { } +#endif /* CONFIG_PPC_SMLPAR */ + +#ifdef CONFIG_PPC_STD_MMU +static void sanity_check_fault(bool is_write, unsigned long error_code) +{ + /* + * For hash translation mode, we should never get a + * PROTFAULT. Any update to pte to reduce access will result in us + * removing the hash page table entry, thus resulting in a DSISR_NOHPTE + * fault instead of DSISR_PROTFAULT. + * + * A pte update to relax the access will not result in a hash page table + * entry invalidate and hence can result in DSISR_PROTFAULT. + * ptep_set_access_flags() doesn't do a hpte flush. This is why we have + * the special !is_write in the below conditional. + * + * For platforms that doesn't supports coherent icache and do support + * per page noexec bit, we do setup things such that we do the + * sync between D/I cache via fault. But that is handled via low level + * hash fault code (hash_page_do_lazy_icache()) and we should not reach + * here in such case. + * + * For wrong access that can result in PROTFAULT, the above vma->vm_flags + * check should handle those and hence we should fall to the bad_area + * handling correctly. + * + * For embedded with per page exec support that doesn't support coherent + * icache we do get PROTFAULT and we handle that D/I cache sync in + * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON + * is conditional for server MMU. + * + * For radix, we can get prot fault for autonuma case, because radix + * page table will have them marked noaccess for user. + */ + if (!radix_enabled() && !is_write) + WARN_ON_ONCE(error_code & DSISR_PROTFAULT); +} +#else +static void sanity_check_fault(bool is_write, unsigned long error_code) { } +#endif /* CONFIG_PPC_STD_MMU */ + +/* + * Define the correct "is_write" bit in error_code based + * on the processor family + */ +#if (defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +#define page_fault_is_write(__err) ((__err) & ESR_DST) +#define page_fault_is_bad(__err) (0) +#else +#define page_fault_is_write(__err) ((__err) & DSISR_ISSTORE) +#if defined(CONFIG_PPC_8xx) +#define page_fault_is_bad(__err) ((__err) & DSISR_NOEXEC_OR_G) +#elif defined(CONFIG_PPC64) +#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_64S) +#else +#define page_fault_is_bad(__err) ((__err) & DSISR_BAD_FAULT_32S) +#endif +#endif + /* * For 600- and 800-family processors, the error_code parameter is DSISR * for a data fault, SRR1 for an instruction fault. For 400-family processors @@ -195,92 +375,56 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -int do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +static int __do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) { - enum ctx_state prev_state = exception_enter(); struct vm_area_struct * vma; struct mm_struct *mm = current->mm; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - int code = SEGV_MAPERR; - int is_write = 0; - int trap = TRAP(regs); - int is_exec = trap == 0x400; + int is_exec = TRAP(regs) == 0x400; int is_user = user_mode(regs); - int fault; - int rc = 0, store_update_sp = 0; + int is_write = page_fault_is_write(error_code); + int fault, major = 0; + bool store_update_sp = false; -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) - /* - * Fortunately the bit assignments in SRR1 for an instruction - * fault and DSISR for a data fault are mostly the same for the - * bits we are interested in. But there are some bits which - * indicate errors in DSISR but can validly be set in SRR1. - */ - if (is_exec) - error_code &= 0x48200000; - else - is_write = error_code & DSISR_ISSTORE; -#else - is_write = error_code & ESR_DST; -#endif /* CONFIG_4xx || CONFIG_BOOKE */ + if (notify_page_fault(regs)) + return 0; -#ifdef CONFIG_PPC_ICSWX - /* - * we need to do this early because this "data storage - * interrupt" does not update the DAR/DEAR so we don't want to - * look at it - */ - if (error_code & ICSWX_DSI_UCT) { - rc = acop_handle_fault(regs, address, error_code); - if (rc) - goto bail; + if (unlikely(page_fault_is_bad(error_code))) { + if (is_user) { + _exception(SIGBUS, regs, BUS_OBJERR, address); + return 0; + } + return SIGBUS; } -#endif /* CONFIG_PPC_ICSWX */ - - if (notify_page_fault(regs)) - goto bail; - if (unlikely(debugger_fault_handler(regs))) - goto bail; + /* Additional sanity check(s) */ + sanity_check_fault(is_write, error_code); /* * The kernel should never take an execute fault nor should it * take a page fault to a kernel address. */ - if (!is_user && (is_exec || (address >= TASK_SIZE))) { - rc = SIGSEGV; - goto bail; - } + if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address))) + return SIGSEGV; -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE) || \ - defined(CONFIG_PPC_BOOK3S_64) || defined(CONFIG_PPC_8xx)) - if (error_code & DSISR_DABRMATCH) { - /* breakpoint match */ - do_break(regs, address, error_code); - goto bail; + /* + * If we're in an interrupt, have no user context or are running + * in a region with pagefaults disabled then we must not take the fault + */ + if (unlikely(faulthandler_disabled() || !mm)) { + if (is_user) + printk_ratelimited(KERN_ERR "Page fault in user mode" + " with faulthandler_disabled()=%d" + " mm=%p\n", + faulthandler_disabled(), mm); + return bad_area_nosemaphore(regs, address); } -#endif /* We restore the interrupt state now */ if (!arch_irq_disabled_regs(regs)) local_irq_enable(); - if (faulthandler_disabled() || mm == NULL) { - if (!is_user) { - rc = SIGSEGV; - goto bail; - } - /* faulthandler_disabled() in user mode is really bad, - as is current->mm == NULL. */ - printk(KERN_EMERG "Page fault in user mode with " - "faulthandler_disabled() = %d mm = %p\n", - faulthandler_disabled(), mm); - printk(KERN_EMERG "NIP = %lx MSR = %lx\n", - regs->nip, regs->msr); - die("Weird page fault", regs, SIGSEGV); - } - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); /* @@ -293,6 +437,10 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, if (is_user) flags |= FAULT_FLAG_USER; + if (is_write) + flags |= FAULT_FLAG_WRITE; + if (is_exec) + flags |= FAULT_FLAG_INSTRUCTION; /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -309,9 +457,9 @@ int do_page_fault(struct pt_regs *regs, unsigned long address, * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ - if (!down_read_trylock(&mm->mmap_sem)) { + if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if (!is_user && !search_exception_tables(regs->nip)) - goto bad_area_nosemaphore; + return bad_area_nosemaphore(regs, address); retry: down_read(&mm->mmap_sem); @@ -325,122 +473,24 @@ retry: } vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) + if (unlikely(!vma)) + return bad_area(regs, address); + if (likely(vma->vm_start <= address)) goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) + return bad_area(regs, address); - /* - * N.B. The POWER/Open ABI allows programs to access up to - * 288 bytes below the stack pointer. - * The kernel signal delivery code writes up to about 1.5kB - * below the stack pointer (r1) before decrementing it. - * The exec code can write slightly over 640kB to the stack - * before setting the user r1. Thus we allow the stack to - * expand to 1MB without further checks. - */ - if (address + 0x100000 < vma->vm_end) { - /* get user regs even if this fault is in kernel mode */ - struct pt_regs *uregs = current->thread.regs; - if (uregs == NULL) - goto bad_area; + /* The stack is being expanded, check if it's valid */ + if (unlikely(bad_stack_expansion(regs, address, vma, store_update_sp))) + return bad_area(regs, address); - /* - * A user-mode access to an address a long way below - * the stack pointer is only valid if the instruction - * is one which would update the stack pointer to the - * address accessed if the instruction completed, - * i.e. either stwu rs,n(r1) or stwux rs,r1,rb - * (or the byte, halfword, float or double forms). - * - * If we don't check this then any write to the area - * between the last mapped region and the stack will - * expand the stack rather than segfaulting. - */ - if (address + 2048 < uregs->gpr[1] && !store_update_sp) - goto bad_area; - } - if (expand_stack(vma, address)) - goto bad_area; + /* Try to expand it */ + if (unlikely(expand_stack(vma, address))) + return bad_area(regs, address); good_area: - code = SEGV_ACCERR; -#if defined(CONFIG_6xx) - if (error_code & 0x95700000) - /* an error such as lwarx to I/O controller space, - address matching DABR, eciwx, etc. */ - goto bad_area; -#endif /* CONFIG_6xx */ -#if defined(CONFIG_8xx) - /* The MPC8xx seems to always set 0x80000000, which is - * "undefined". Of those that can be set, this is the only - * one which seems bad. - */ - if (error_code & 0x10000000) - /* Guarded storage error. */ - goto bad_area; -#endif /* CONFIG_8xx */ - - if (is_exec) { - /* - * Allow execution from readable areas if the MMU does not - * provide separate controls over reading and executing. - * - * Note: That code used to not be enabled for 4xx/BookE. - * It is now as I/D cache coherency for these is done at - * set_pte_at() time and I see no reason why the test - * below wouldn't be valid on those processors. This -may- - * break programs compiled with a really old ABI though. - */ - if (!(vma->vm_flags & VM_EXEC) && - (cpu_has_feature(CPU_FTR_NOEXECUTE) || - !(vma->vm_flags & (VM_READ | VM_WRITE)))) - goto bad_area; - /* a write */ - } else if (is_write) { - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - flags |= FAULT_FLAG_WRITE; - /* a read */ - } else { - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; - } -#ifdef CONFIG_PPC_STD_MMU - /* - * For hash translation mode, we should never get a - * PROTFAULT. Any update to pte to reduce access will result in us - * removing the hash page table entry, thus resulting in a DSISR_NOHPTE - * fault instead of DSISR_PROTFAULT. - * - * A pte update to relax the access will not result in a hash page table - * entry invalidate and hence can result in DSISR_PROTFAULT. - * ptep_set_access_flags() doesn't do a hpte flush. This is why we have - * the special !is_write in the below conditional. - * - * For platforms that doesn't supports coherent icache and do support - * per page noexec bit, we do setup things such that we do the - * sync between D/I cache via fault. But that is handled via low level - * hash fault code (hash_page_do_lazy_icache()) and we should not reach - * here in such case. - * - * For wrong access that can result in PROTFAULT, the above vma->vm_flags - * check should handle those and hence we should fall to the bad_area - * handling correctly. - * - * For embedded with per page exec support that doesn't support coherent - * icache we do get PROTFAULT and we handle that D/I cache sync in - * set_pte_at while taking the noexec/prot fault. Hence this is WARN_ON - * is conditional for server MMU. - * - * For radix, we can get prot fault for autonuma case, because radix - * page table will have them marked noaccess for user. - */ - if (!radix_enabled() && !is_write) - WARN_ON_ONCE(error_code & DSISR_PROTFAULT); -#endif /* CONFIG_PPC_STD_MMU */ + if (unlikely(access_error(is_write, is_exec, vma))) + return bad_area(regs, address); /* * If for any reason at all we couldn't handle the fault, @@ -448,6 +498,7 @@ good_area: * the fault. */ fault = handle_mm_fault(vma, address, flags); + major |= fault & VM_FAULT_MAJOR; /* * Handle the retry right now, the mmap_sem has been released in that @@ -465,64 +516,39 @@ good_area: if (!fatal_signal_pending(current)) goto retry; } - /* We will enter mm_fault_error() below */ - } else - up_read(¤t->mm->mmap_sem); - - if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { - if (fault & VM_FAULT_SIGSEGV) - goto bad_area_nosemaphore; - rc = mm_fault_error(regs, address, fault); - if (rc >= MM_FAULT_RETURN) - goto bail; - else - rc = 0; + + /* + * User mode? Just return to handle the fatal exception otherwise + * return to bad_page_fault + */ + return is_user ? 0 : SIGBUS; } + up_read(¤t->mm->mmap_sem); + + if (unlikely(fault & VM_FAULT_ERROR)) + return mm_fault_error(regs, address, fault); + /* * Major/minor page fault accounting. */ - if (fault & VM_FAULT_MAJOR) { + if (major) { current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); -#ifdef CONFIG_PPC_SMLPAR - if (firmware_has_feature(FW_FEATURE_CMO)) { - u32 page_ins; - - preempt_disable(); - page_ins = be32_to_cpu(get_lppaca()->page_ins); - page_ins += 1 << PAGE_FACTOR; - get_lppaca()->page_ins = cpu_to_be32(page_ins); - preempt_enable(); - } -#endif /* CONFIG_PPC_SMLPAR */ + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address); + cmo_account_page_fault(); } else { current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); - } - - goto bail; - -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses cause a SIGSEGV */ - if (is_user) { - _exception(SIGSEGV, regs, code, address); - goto bail; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address); } + return 0; +} +NOKPROBE_SYMBOL(__do_page_fault); - if (is_exec && (error_code & DSISR_PROTFAULT)) - printk_ratelimited(KERN_CRIT "kernel tried to execute NX-protected" - " page (%lx) - exploit attempt? (uid: %d)\n", - address, from_kuid(&init_user_ns, current_uid())); - - rc = SIGSEGV; - -bail: +int do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + enum ctx_state prev_state = exception_enter(); + int rc = __do_page_fault(regs, address, error_code); exception_exit(prev_state); return rc; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 5b10b4fcbf76..1c4cd82421bc 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -508,9 +508,9 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node, printk(KERN_INFO "Huge page(16GB) memory: " "addr = 0x%lX size = 0x%lX pages = %d\n", phys_addr, block_size, expected_pages); - if (phys_addr + (16 * GB) <= memblock_end_of_DRAM()) { + if (phys_addr + block_size * expected_pages <= memblock_end_of_DRAM()) { memblock_reserve(phys_addr, block_size * expected_pages); - add_gpage(phys_addr, block_size, expected_pages); + pseries_add_gpage(phys_addr, block_size, expected_pages); } return 0; } @@ -1020,6 +1020,7 @@ void __init hash__early_init_mmu(void) __kernel_virt_size = H_KERN_VIRT_SIZE; __vmalloc_start = H_VMALLOC_START; __vmalloc_end = H_VMALLOC_END; + __kernel_io_start = H_KERN_IO_START; vmemmap = (struct page *)H_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 70a3a2bdf06c..2d4a331e498e 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -38,26 +38,6 @@ unsigned int HPAGE_SHIFT; EXPORT_SYMBOL(HPAGE_SHIFT); -/* - * Tracks gpages after the device tree is scanned and before the - * huge_boot_pages list is ready. On non-Freescale implementations, this is - * just used to track 16G pages and so is a single array. FSL-based - * implementations may have more than one gpage size, so we need multiple - * arrays - */ -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) -#define MAX_NUMBER_GPAGES 128 -struct psize_gpages { - u64 gpage_list[MAX_NUMBER_GPAGES]; - unsigned int nr_gpages; -}; -static struct psize_gpages gpage_freearray[MMU_PAGE_COUNT]; -#else -#define MAX_NUMBER_GPAGES 1024 -static u64 gpage_freearray[MAX_NUMBER_GPAGES]; -static unsigned nr_gpages; -#endif - #define hugepd_none(hpd) (hpd_val(hpd) == 0) pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz) @@ -215,145 +195,20 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz return hugepte_offset(*hpdp, addr, pdshift); } -#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) -/* Build list of addresses of gigantic pages. This function is used in early - * boot before the buddy allocator is setup. - */ -void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) -{ - unsigned int idx = shift_to_mmu_psize(__ffs(page_size)); - int i; - - if (addr == 0) - return; - - gpage_freearray[idx].nr_gpages = number_of_pages; - - for (i = 0; i < number_of_pages; i++) { - gpage_freearray[idx].gpage_list[i] = addr; - addr += page_size; - } -} - -/* - * Moves the gigantic page addresses from the temporary list to the - * huge_boot_pages list. - */ -int alloc_bootmem_huge_page(struct hstate *hstate) -{ - struct huge_bootmem_page *m; - int idx = shift_to_mmu_psize(huge_page_shift(hstate)); - int nr_gpages = gpage_freearray[idx].nr_gpages; - - if (nr_gpages == 0) - return 0; - -#ifdef CONFIG_HIGHMEM - /* - * If gpages can be in highmem we can't use the trick of storing the - * data structure in the page; allocate space for this - */ - m = memblock_virt_alloc(sizeof(struct huge_bootmem_page), 0); - m->phys = gpage_freearray[idx].gpage_list[--nr_gpages]; -#else - m = phys_to_virt(gpage_freearray[idx].gpage_list[--nr_gpages]); -#endif - - list_add(&m->list, &huge_boot_pages); - gpage_freearray[idx].nr_gpages = nr_gpages; - gpage_freearray[idx].gpage_list[nr_gpages] = 0; - m->hstate = hstate; - - return 1; -} +#ifdef CONFIG_PPC_BOOK3S_64 /* - * Scan the command line hugepagesz= options for gigantic pages; store those in - * a list that we use to allocate the memory once all options are parsed. + * Tracks gpages after the device tree is scanned and before the + * huge_boot_pages list is ready on pseries. */ - -unsigned long gpage_npages[MMU_PAGE_COUNT]; - -static int __init do_gpage_early_setup(char *param, char *val, - const char *unused, void *arg) -{ - static phys_addr_t size; - unsigned long npages; - - /* - * The hugepagesz and hugepages cmdline options are interleaved. We - * use the size variable to keep track of whether or not this was done - * properly and skip over instances where it is incorrect. Other - * command-line parsing code will issue warnings, so we don't need to. - * - */ - if ((strcmp(param, "default_hugepagesz") == 0) || - (strcmp(param, "hugepagesz") == 0)) { - size = memparse(val, NULL); - } else if (strcmp(param, "hugepages") == 0) { - if (size != 0) { - if (sscanf(val, "%lu", &npages) <= 0) - npages = 0; - if (npages > MAX_NUMBER_GPAGES) { - pr_warn("MMU: %lu pages requested for page " -#ifdef CONFIG_PHYS_ADDR_T_64BIT - "size %llu KB, limiting to " -#else - "size %u KB, limiting to " -#endif - __stringify(MAX_NUMBER_GPAGES) "\n", - npages, size / 1024); - npages = MAX_NUMBER_GPAGES; - } - gpage_npages[shift_to_mmu_psize(__ffs(size))] = npages; - size = 0; - } - } - return 0; -} - +#define MAX_NUMBER_GPAGES 1024 +__initdata static u64 gpage_freearray[MAX_NUMBER_GPAGES]; +__initdata static unsigned nr_gpages; /* - * This function allocates physical space for pages that are larger than the - * buddy allocator can handle. We want to allocate these in highmem because - * the amount of lowmem is limited. This means that this function MUST be - * called before lowmem_end_addr is set up in MMU_init() in order for the lmb - * allocate to grab highmem. - */ -void __init reserve_hugetlb_gpages(void) -{ - static __initdata char cmdline[COMMAND_LINE_SIZE]; - phys_addr_t size, base; - int i; - - strlcpy(cmdline, boot_command_line, COMMAND_LINE_SIZE); - parse_args("hugetlb gpages", cmdline, NULL, 0, 0, 0, - NULL, &do_gpage_early_setup); - - /* - * Walk gpage list in reverse, allocating larger page sizes first. - * Skip over unsupported sizes, or sizes that have 0 gpages allocated. - * When we reach the point in the list where pages are no longer - * considered gpages, we're done. - */ - for (i = MMU_PAGE_COUNT-1; i >= 0; i--) { - if (mmu_psize_defs[i].shift == 0 || gpage_npages[i] == 0) - continue; - else if (mmu_psize_to_shift(i) < (MAX_ORDER + PAGE_SHIFT)) - break; - - size = (phys_addr_t)(1ULL << mmu_psize_to_shift(i)); - base = memblock_alloc_base(size * gpage_npages[i], size, - MEMBLOCK_ALLOC_ANYWHERE); - add_gpage(base, size, gpage_npages[i]); - } -} - -#else /* !PPC_FSL_BOOK3E */ - -/* Build list of addresses of gigantic pages. This function is used in early + * Build list of addresses of gigantic pages. This function is used in early * boot before the buddy allocator is setup. */ -void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) +void __init pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) { if (!addr) return; @@ -365,10 +220,7 @@ void add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages) } } -/* Moves the gigantic page addresses from the temporary list to the - * huge_boot_pages list. - */ -int alloc_bootmem_huge_page(struct hstate *hstate) +int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate) { struct huge_bootmem_page *m; if (nr_gpages == 0) @@ -381,6 +233,17 @@ int alloc_bootmem_huge_page(struct hstate *hstate) } #endif + +int __init alloc_bootmem_huge_page(struct hstate *h) +{ + +#ifdef CONFIG_PPC_BOOK3S_64 + if (firmware_has_feature(FW_FEATURE_LPAR) && !radix_enabled()) + return pseries_alloc_bootmem_huge_page(h); +#endif + return __alloc_bootmem_huge_page(h); +} + #if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx) #define HUGEPD_FREELIST_SIZE \ ((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t)) @@ -981,7 +844,6 @@ EXPORT_SYMBOL_GPL(__find_linux_pte); int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { - unsigned long mask; unsigned long pte_end; struct page *head, *page; pte_t pte; @@ -992,18 +854,10 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr, end = pte_end; pte = READ_ONCE(*ptep); - mask = _PAGE_PRESENT | _PAGE_READ; - /* - * On some CPUs like the 8xx, _PAGE_RW hence _PAGE_WRITE is defined - * as 0 and _PAGE_RO has to be set when a page is not writable - */ - if (write) - mask |= _PAGE_WRITE; - else - mask |= _PAGE_RO; - - if ((pte_val(pte) & mask) != mask) + if (!pte_present(pte) || !pte_read(pte)) + return 0; + if (write && !pte_write(pte)) return 0; /* hugepages are never "special" */ diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c deleted file mode 100644 index 1fa794d7d59f..000000000000 --- a/arch/powerpc/mm/icswx.c +++ /dev/null @@ -1,292 +0,0 @@ -/* - * ICSWX and ACOP Management - * - * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/module.h> -#include <linux/uaccess.h> - -#include "icswx.h" - -/* - * The processor and its L2 cache cause the icswx instruction to - * generate a COP_REQ transaction on PowerBus. The transaction has no - * address, and the processor does not perform an MMU access to - * authenticate the transaction. The command portion of the PowerBus - * COP_REQ transaction includes the LPAR_ID (LPID) and the coprocessor - * Process ID (PID), which the coprocessor compares to the authorized - * LPID and PID held in the coprocessor, to determine if the process - * is authorized to generate the transaction. The data of the COP_REQ - * transaction is 128-byte or less in size and is placed in cacheable - * memory on a 128-byte cache line boundary. - * - * The task to use a coprocessor should use use_cop() to mark the use - * of the Coprocessor Type (CT) and context switching. On a server - * class processor, the PID register is used only for coprocessor - * management + * and so a coprocessor PID is allocated before - * executing icswx + * instruction. Drop_cop() is used to free the - * coprocessor PID. - * - * Example: - * Host Fabric Interface (HFI) is a PowerPC network coprocessor. - * Each HFI have multiple windows. Each HFI window serves as a - * network device sending to and receiving from HFI network. - * HFI immediate send function uses icswx instruction. The immediate - * send function allows small (single cache-line) packets be sent - * without using the regular HFI send FIFO and doorbell, which are - * much slower than immediate send. - * - * For each task intending to use HFI immediate send, the HFI driver - * calls use_cop() to obtain a coprocessor PID for the task. - * The HFI driver then allocate a free HFI window and save the - * coprocessor PID to the HFI window to allow the task to use the - * HFI window. - * - * The HFI driver repeatedly creates immediate send packets and - * issues icswx instruction to send data through the HFI window. - * The HFI compares the coprocessor PID in the CPU PID register - * to the PID held in the HFI window to determine if the transaction - * is allowed. - * - * When the task to release the HFI window, the HFI driver calls - * drop_cop() to release the coprocessor PID. - */ - -void switch_cop(struct mm_struct *next) -{ -#ifdef CONFIG_PPC_ICSWX_PID - mtspr(SPRN_PID, next->context.cop_pid); -#endif - mtspr(SPRN_ACOP, next->context.acop); -} - -/** - * Start using a coprocessor. - * @acop: mask of coprocessor to be used. - * @mm: The mm the coprocessor to associate with. Most likely current mm. - * - * Return a positive PID if successful. Negative errno otherwise. - * The returned PID will be fed to the coprocessor to determine if an - * icswx transaction is authenticated. - */ -int use_cop(unsigned long acop, struct mm_struct *mm) -{ - int ret; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) - return -ENODEV; - - if (!mm || !acop) - return -EINVAL; - - /* The page_table_lock ensures mm_users won't change under us */ - spin_lock(&mm->page_table_lock); - spin_lock(mm->context.cop_lockp); - - ret = get_cop_pid(mm); - if (ret < 0) - goto out; - - /* update acop */ - mm->context.acop |= acop; - - sync_cop(mm); - - /* - * If this is a threaded process then there might be other threads - * running. We need to send an IPI to force them to pick up any - * change in PID and ACOP. - */ - if (atomic_read(&mm->mm_users) > 1) - smp_call_function(sync_cop, mm, 1); - -out: - spin_unlock(mm->context.cop_lockp); - spin_unlock(&mm->page_table_lock); - - return ret; -} -EXPORT_SYMBOL_GPL(use_cop); - -/** - * Stop using a coprocessor. - * @acop: mask of coprocessor to be stopped. - * @mm: The mm the coprocessor associated with. - */ -void drop_cop(unsigned long acop, struct mm_struct *mm) -{ - int free_pid; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) - return; - - if (WARN_ON_ONCE(!mm)) - return; - - /* The page_table_lock ensures mm_users won't change under us */ - spin_lock(&mm->page_table_lock); - spin_lock(mm->context.cop_lockp); - - mm->context.acop &= ~acop; - - free_pid = disable_cop_pid(mm); - sync_cop(mm); - - /* - * If this is a threaded process then there might be other threads - * running. We need to send an IPI to force them to pick up any - * change in PID and ACOP. - */ - if (atomic_read(&mm->mm_users) > 1) - smp_call_function(sync_cop, mm, 1); - - if (free_pid != COP_PID_NONE) - free_cop_pid(free_pid); - - spin_unlock(mm->context.cop_lockp); - spin_unlock(&mm->page_table_lock); -} -EXPORT_SYMBOL_GPL(drop_cop); - -static int acop_use_cop(int ct) -{ - /* There is no alternate policy, yet */ - return -1; -} - -/* - * Get the instruction word at the NIP - */ -static u32 acop_get_inst(struct pt_regs *regs) -{ - u32 inst; - u32 __user *p; - - p = (u32 __user *)regs->nip; - if (!access_ok(VERIFY_READ, p, sizeof(*p))) - return 0; - - if (__get_user(inst, p)) - return 0; - - return inst; -} - -/** - * @regs: registers at time of interrupt - * @address: storage address - * @error_code: Fault code, usually the DSISR or ESR depending on - * processor type - * - * Return 0 if we are able to resolve the data storage fault that - * results from a CT miss in the ACOP register. - */ -int acop_handle_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) -{ - int ct; - u32 inst = 0; - - if (!cpu_has_feature(CPU_FTR_ICSWX)) { - pr_info("No coprocessors available"); - _exception(SIGILL, regs, ILL_ILLOPN, address); - } - - if (!user_mode(regs)) { - /* this could happen if the HV denies the - * kernel access, for now we just die */ - die("ICSWX from kernel failed", regs, SIGSEGV); - } - - /* Some implementations leave us a hint for the CT */ - ct = ICSWX_GET_CT_HINT(error_code); - if (ct < 0) { - /* we have to peek at the instruction word to figure out CT */ - u32 ccw; - u32 rs; - - inst = acop_get_inst(regs); - if (inst == 0) - return -1; - - rs = (inst >> (31 - 10)) & 0x1f; - ccw = regs->gpr[rs]; - ct = (ccw >> 16) & 0x3f; - } - - /* - * We could be here because another thread has enabled acop - * but the ACOP register has yet to be updated. - * - * This should have been taken care of by the IPI to sync all - * the threads (see smp_call_function(sync_cop, mm, 1)), but - * that could take forever if there are a significant amount - * of threads. - * - * Given the number of threads on some of these systems, - * perhaps this is the best way to sync ACOP rather than whack - * every thread with an IPI. - */ - if ((acop_copro_type_bit(ct) & current->active_mm->context.acop) != 0) { - sync_cop(current->active_mm); - return 0; - } - - /* check for alternate policy */ - if (!acop_use_cop(ct)) - return 0; - - /* at this point the CT is unknown to the system */ - pr_warn("%s[%d]: Coprocessor %d is unavailable\n", - current->comm, current->pid, ct); - - /* get inst if we don't already have it */ - if (inst == 0) { - inst = acop_get_inst(regs); - if (inst == 0) - return -1; - } - - /* Check if the instruction is the "record form" */ - if (inst & 1) { - /* - * the instruction is "record" form so we can reject - * using CR0 - */ - regs->ccr &= ~(0xful << 28); - regs->ccr |= ICSWX_RC_NOT_FOUND << 28; - - /* Move on to the next instruction */ - regs->nip += 4; - } else { - /* - * There is no architected mechanism to report a bad - * CT so we could either SIGILL or report nothing. - * Since the non-record version should only bu used - * for "hints" or "don't care" we should probably do - * nothing. However, I could see how some people - * might want an SIGILL so it here if you want it. - */ -#ifdef CONFIG_PPC_ICSWX_USE_SIGILL - _exception(SIGILL, regs, ILL_ILLOPN, address); -#else - regs->nip += 4; -#endif - } - - return 0; -} -EXPORT_SYMBOL_GPL(acop_handle_fault); diff --git a/arch/powerpc/mm/icswx.h b/arch/powerpc/mm/icswx.h deleted file mode 100644 index 6dedc08e62c8..000000000000 --- a/arch/powerpc/mm/icswx.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef _ARCH_POWERPC_MM_ICSWX_H_ -#define _ARCH_POWERPC_MM_ICSWX_H_ - -/* - * ICSWX and ACOP Management - * - * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <asm/mmu_context.h> - -/* also used to denote that PIDs are not used */ -#define COP_PID_NONE 0 - -static inline void sync_cop(void *arg) -{ - struct mm_struct *mm = arg; - - if (mm == current->active_mm) - switch_cop(current->active_mm); -} - -#ifdef CONFIG_PPC_ICSWX_PID -extern int get_cop_pid(struct mm_struct *mm); -extern int disable_cop_pid(struct mm_struct *mm); -extern void free_cop_pid(int free_pid); -#else -#define get_cop_pid(m) (COP_PID_NONE) -#define disable_cop_pid(m) (COP_PID_NONE) -#define free_cop_pid(p) -#endif - -/* - * These are implementation bits for architected registers. If this - * ever becomes architecture the should be moved to reg.h et. al. - */ -/* UCT is the same bit for Server and Embedded */ -#define ICSWX_DSI_UCT 0x00004000 /* Unavailable Coprocessor Type */ - -#ifdef CONFIG_PPC_BOOK3E -/* Embedded implementation gives us no hints as to what the CT is */ -#define ICSWX_GET_CT_HINT(x) (-1) -#else -/* Server implementation contains the CT value in the DSISR */ -#define ICSWX_DSISR_CTMASK 0x00003f00 -#define ICSWX_GET_CT_HINT(x) (((x) & ICSWX_DSISR_CTMASK) >> 8) -#endif - -#define ICSWX_RC_STARTED 0x8 /* The request has been started */ -#define ICSWX_RC_NOT_IDLE 0x4 /* No coprocessor found idle */ -#define ICSWX_RC_NOT_FOUND 0x2 /* No coprocessor found */ -#define ICSWX_RC_UNDEFINED 0x1 /* Reserved */ - -extern int acop_handle_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code); - -static inline u64 acop_copro_type_bit(unsigned int type) -{ - return 1ULL << (63 - type); -} - -#endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */ diff --git a/arch/powerpc/mm/icswx_pid.c b/arch/powerpc/mm/icswx_pid.c deleted file mode 100644 index 91e30eb7d054..000000000000 --- a/arch/powerpc/mm/icswx_pid.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * ICSWX and ACOP/PID Management - * - * Copyright (C) 2011 Anton Blanchard, IBM Corp. <anton@samba.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#include <linux/sched.h> -#include <linux/kernel.h> -#include <linux/errno.h> -#include <linux/types.h> -#include <linux/mm.h> -#include <linux/spinlock.h> -#include <linux/idr.h> -#include <linux/module.h> -#include "icswx.h" - -#define COP_PID_MIN (COP_PID_NONE + 1) -#define COP_PID_MAX (0xFFFF) - -static DEFINE_SPINLOCK(mmu_context_acop_lock); -static DEFINE_IDA(cop_ida); - -static int new_cop_pid(struct ida *ida, int min_id, int max_id, - spinlock_t *lock) -{ - int index; - int err; - -again: - if (!ida_pre_get(ida, GFP_KERNEL)) - return -ENOMEM; - - spin_lock(lock); - err = ida_get_new_above(ida, min_id, &index); - spin_unlock(lock); - - if (err == -EAGAIN) - goto again; - else if (err) - return err; - - if (index > max_id) { - spin_lock(lock); - ida_remove(ida, index); - spin_unlock(lock); - return -ENOMEM; - } - - return index; -} - -int get_cop_pid(struct mm_struct *mm) -{ - int pid; - - if (mm->context.cop_pid == COP_PID_NONE) { - pid = new_cop_pid(&cop_ida, COP_PID_MIN, COP_PID_MAX, - &mmu_context_acop_lock); - if (pid >= 0) - mm->context.cop_pid = pid; - } - return mm->context.cop_pid; -} - -int disable_cop_pid(struct mm_struct *mm) -{ - int free_pid = COP_PID_NONE; - - if ((!mm->context.acop) && (mm->context.cop_pid != COP_PID_NONE)) { - free_pid = mm->context.cop_pid; - mm->context.cop_pid = COP_PID_NONE; - } - return free_pid; -} - -void free_cop_pid(int free_pid) -{ - spin_lock(&mmu_context_acop_lock); - ida_remove(&cop_ida, free_pid); - spin_unlock(&mmu_context_acop_lock); -} diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index 8a7c38b8d335..6419b33ca309 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -113,6 +113,12 @@ void __init MMU_setup(void) __map_without_bats = 1; __map_without_ltlbs = 1; } +#ifdef CONFIG_STRICT_KERNEL_RWX + if (rodata_enabled) { + __map_without_bats = 1; + __map_without_ltlbs = 1; + } +#endif } /* @@ -132,8 +138,6 @@ void __init MMU_init(void) * Reserve gigantic pages for hugetlb. This MUST occur before * lowmem_end_addr is initialized below. */ - reserve_hugetlb_gpages(); - if (memblock.memory.cnt > 1) { #ifndef CONFIG_WII memblock_enforce_memory_limit(memblock.memory.regions[0].size); diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 5b4c25d12ff3..588a521966ec 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,7 +356,7 @@ struct page *realmode_pfn_to_page(unsigned long pfn) } EXPORT_SYMBOL_GPL(realmode_pfn_to_page); -#elif defined(CONFIG_FLATMEM) +#else struct page *realmode_pfn_to_page(unsigned long pfn) { @@ -365,7 +365,7 @@ struct page *realmode_pfn_to_page(unsigned long pfn) } EXPORT_SYMBOL_GPL(realmode_pfn_to_page); -#endif /* CONFIG_SPARSEMEM_VMEMMAP/CONFIG_FLATMEM */ +#endif /* CONFIG_SPARSEMEM_VMEMMAP */ #ifdef CONFIG_PPC_STD_MMU_64 static bool disable_radix; @@ -381,7 +381,7 @@ early_param("disable_radix", parse_disable_radix); * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do * radix. If not, we clear the radix feature bit so we fall back to hash. */ -static void early_check_vec5(void) +static void __init early_check_vec5(void) { unsigned long root, chosen; int size; diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 46b4e67d2372..4362b86ef84c 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -436,7 +436,7 @@ void flush_dcache_icache_page(struct page *page) return; } #endif -#if defined(CONFIG_8xx) || defined(CONFIG_PPC64) +#if defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC64) /* On 8xx there is no need to kmap since highmem is not supported */ __flush_dcache_icache(page_address(page)); #else diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index abed1fe6992f..db9d8cb0f8ee 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -25,8 +25,6 @@ #include <asm/mmu_context.h> #include <asm/pgalloc.h> -#include "icswx.h" - static DEFINE_SPINLOCK(mmu_context_lock); static DEFINE_IDA(mmu_context_ida); @@ -164,16 +162,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) return index; mm->context.id = index; -#ifdef CONFIG_PPC_ICSWX - mm->context.cop_lockp = kmalloc(sizeof(spinlock_t), GFP_KERNEL); - if (!mm->context.cop_lockp) { - __destroy_context(index); - subpage_prot_free(mm); - mm->context.id = MMU_NO_CONTEXT; - return -ENOMEM; - } - spin_lock_init(mm->context.cop_lockp); -#endif /* CONFIG_PPC_ICSWX */ #ifdef CONFIG_PPC_64K_PAGES mm->context.pte_frag = NULL; @@ -225,12 +213,6 @@ void destroy_context(struct mm_struct *mm) #ifdef CONFIG_SPAPR_TCE_IOMMU WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif -#ifdef CONFIG_PPC_ICSWX - drop_cop(mm->context.acop, mm); - kfree(mm->context.cop_lockp); - mm->context.cop_lockp = NULL; -#endif /* CONFIG_PPC_ICSWX */ - if (radix_enabled()) { /* * Radix doesn't have a valid bit in the process table diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index d46128b22150..57fbc554c785 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -27,7 +27,7 @@ /* * On 40x and 8xx, we directly inline tlbia and tlbivax */ -#if defined(CONFIG_40x) || defined(CONFIG_8xx) +#if defined(CONFIG_40x) || defined(CONFIG_PPC_8xx) static inline void _tlbil_all(void) { asm volatile ("sync; tlbia; isync" : : : "memory"); @@ -38,7 +38,7 @@ static inline void _tlbil_pid(unsigned int pid) } #define _tlbil_pid_noind(pid) _tlbil_pid(pid) -#else /* CONFIG_40x || CONFIG_8xx */ +#else /* CONFIG_40x || CONFIG_PPC_8xx */ extern void _tlbil_all(void); extern void _tlbil_pid(unsigned int pid); #ifdef CONFIG_PPC_BOOK3E @@ -46,12 +46,12 @@ extern void _tlbil_pid_noind(unsigned int pid); #else #define _tlbil_pid_noind(pid) _tlbil_pid(pid) #endif -#endif /* !(CONFIG_40x || CONFIG_8xx) */ +#endif /* !(CONFIG_40x || CONFIG_PPC_8xx) */ /* * On 8xx, we directly inline tlbie, on others, it's extern */ -#ifdef CONFIG_8xx +#ifdef CONFIG_PPC_8xx static inline void _tlbil_va(unsigned long address, unsigned int pid, unsigned int tsize, unsigned int ind) { @@ -67,7 +67,7 @@ static inline void _tlbil_va(unsigned long address, unsigned int pid, { __tlbil_va(address, pid); } -#endif /* CONFIG_8xx */ +#endif /* CONFIG_PPC_8xx */ #if defined(CONFIG_PPC_BOOK3E) || defined(CONFIG_PPC_47x) extern void _tlbivax_bcast(unsigned long address, unsigned int pid, diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 5cc50d47ce3f..5d8be076f8e5 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -28,9 +28,13 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz, unsigned long table_size) { - unsigned long patb1 = base | table_size | PATB_GR; + unsigned long patb0, patb1; + + patb0 = be64_to_cpu(partition_tb[0].patb0); + patb1 = base | table_size | PATB_GR; + + mmu_partition_table_set_entry(0, patb0, patb1); - partition_tb->patb1 = cpu_to_be64(patb1); return 0; } @@ -494,6 +498,7 @@ void __init radix__early_init_mmu(void) __kernel_virt_size = RADIX_KERN_VIRT_SIZE; __vmalloc_start = RADIX_VMALLOC_START; __vmalloc_end = RADIX_VMALLOC_END; + __kernel_io_start = RADIX_KERN_IO_START; vmemmap = (struct page *)RADIX_VMEMMAP_BASE; ioremap_bot = IOREMAP_BASE; @@ -804,9 +809,12 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre */ pmd = *pmdp; pmd_clear(pmdp); + /*FIXME!! Verify whether we need this kick below */ kick_all_cpus_sync(); - flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + radix__flush_tlb_collapsed_pmd(vma->vm_mm, address); + return pmd; } diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a9e4bfc025bc..65eda1997c3f 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -34,6 +34,7 @@ #include <asm/fixmap.h> #include <asm/io.h> #include <asm/setup.h> +#include <asm/sections.h> #include "mmu_decl.h" @@ -242,7 +243,7 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, int flags) /* * Map in a chunk of physical memory starting at start. */ -void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) +static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top) { unsigned long v, s, f; phys_addr_t p; @@ -294,7 +295,7 @@ void __init mapin_ram(void) * Returns true (1) if PTE was found, zero otherwise. The pointer to * the PTE pointer is unmodified if PTE is not found. */ -int +static int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) { pgd_t *pgd; @@ -323,9 +324,7 @@ get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep, pmd_t **pmdp) return(retval); } -#ifdef CONFIG_DEBUG_PAGEALLOC - -static int __change_page_attr(struct page *page, pgprot_t prot) +static int __change_page_attr_noflush(struct page *page, pgprot_t prot) { pte_t *kpte; pmd_t *kpmd; @@ -339,8 +338,6 @@ static int __change_page_attr(struct page *page, pgprot_t prot) if (!get_pteptr(&init_mm, address, &kpte, &kpmd)) return -EINVAL; __set_pte_at(&init_mm, address, kpte, mk_pte(page, prot), 0); - wmb(); - flush_tlb_page(NULL, address); pte_unmap(kpte); return 0; @@ -349,44 +346,65 @@ static int __change_page_attr(struct page *page, pgprot_t prot) /* * Change the page attributes of an page in the linear mapping. * - * THIS CONFLICTS WITH BAT MAPPINGS, DEBUG USE ONLY + * THIS DOES NOTHING WITH BAT MAPPINGS, DEBUG USE ONLY */ static int change_page_attr(struct page *page, int numpages, pgprot_t prot) { int i, err = 0; unsigned long flags; + struct page *start = page; local_irq_save(flags); for (i = 0; i < numpages; i++, page++) { - err = __change_page_attr(page, prot); + err = __change_page_attr_noflush(page, prot); if (err) break; } + wmb(); + flush_tlb_kernel_range((unsigned long)page_address(start), + (unsigned long)page_address(page)); local_irq_restore(flags); return err; } - -void __kernel_map_pages(struct page *page, int numpages, int enable) +void mark_initmem_nx(void) { - if (PageHighMem(page)) - return; + struct page *page = virt_to_page(_sinittext); + unsigned long numpages = PFN_UP((unsigned long)_einittext) - + PFN_DOWN((unsigned long)_sinittext); - change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); + change_page_attr(page, numpages, PAGE_KERNEL); } -#endif /* CONFIG_DEBUG_PAGEALLOC */ -static int fixmaps; - -void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) +#ifdef CONFIG_STRICT_KERNEL_RWX +void mark_rodata_ro(void) { - unsigned long address = __fix_to_virt(idx); + struct page *page; + unsigned long numpages; + + page = virt_to_page(_stext); + numpages = PFN_UP((unsigned long)_etext) - + PFN_DOWN((unsigned long)_stext); - if (idx >= __end_of_fixed_addresses) { - BUG(); + change_page_attr(page, numpages, PAGE_KERNEL_ROX); + /* + * mark .rodata as read only. Use __init_begin rather than __end_rodata + * to cover NOTES and EXCEPTION_TABLE. + */ + page = virt_to_page(__start_rodata); + numpages = PFN_UP((unsigned long)__init_begin) - + PFN_DOWN((unsigned long)__start_rodata); + + change_page_attr(page, numpages, PAGE_KERNEL_RO); +} +#endif + +#ifdef CONFIG_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (PageHighMem(page)) return; - } - map_kernel_page(address, phys, pgprot_val(flags)); - fixmaps++; + change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); } +#endif /* CONFIG_DEBUG_PAGEALLOC */ diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 0736e94c7615..ac0717a90ca6 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -104,6 +104,8 @@ unsigned long __vmalloc_start; EXPORT_SYMBOL(__vmalloc_start); unsigned long __vmalloc_end; EXPORT_SYMBOL(__vmalloc_end); +unsigned long __kernel_io_start; +EXPORT_SYMBOL(__kernel_io_start); struct page *vmemmap; EXPORT_SYMBOL(vmemmap); unsigned long __pte_frag_nr; diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index bde378559d01..906a86fe457b 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -121,12 +121,25 @@ slb_miss_kernel_load_vmemmap: 1: #endif /* CONFIG_SPARSEMEM_VMEMMAP */ - /* vmalloc mapping gets the encoding from the PACA as the mapping - * can be demoted from 64K -> 4K dynamically on some machines + /* + * r10 contains the ESID, which is the original faulting EA shifted + * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28) + * which is 0xd00038000. That can't be used as an immediate, even if we + * ignored the 0xd, so we have to load it into a register, and we only + * have one register free. So we must load all of (H_VMALLOC_END >> 28) + * into a register and compare ESID against that. + */ + lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000 + ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800 + // Rotate left 4, then mask with 0xffffffff0 + rldic r11,r11,4,28 // r11 = 0xd00038000 + cmpld r10,r11 // if r10 >= r11 + bge 5f // goto io_mapping + + /* + * vmalloc mapping gets the encoding from the PACA as the mapping + * can be demoted from 64K -> 4K dynamically on some machines. */ - clrldi r11,r10,48 - cmpldi r11,(H_VMALLOC_SIZE >> 28) - 1 - bgt 5f lhz r11,PACAVMALLOCSLLP(r13) b 6f 5: diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 744e0164ecf5..18151e9ad694 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -54,23 +54,15 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) */ __tlbiel_pid(pid, 0, ric); - if (ric == RIC_FLUSH_ALL) - /* For the remaining sets, just flush the TLB */ - ric = RIC_FLUSH_TLB; + /* For PWC, only one flush is needed */ + if (ric == RIC_FLUSH_PWC) { + asm volatile("ptesync": : :"memory"); + return; + } + /* For the remaining sets, just flush the TLB */ for (set = 1; set < POWER9_TLB_SETS_RADIX ; set++) - __tlbiel_pid(pid, set, ric); - - asm volatile("ptesync": : :"memory"); - asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); -} - -static inline void tlbiel_pwc(unsigned long pid) -{ - asm volatile("ptesync": : :"memory"); - - /* For PWC flush, we don't look at set number */ - __tlbiel_pid(pid, 0, RIC_FLUSH_PWC); + __tlbiel_pid(pid, set, RIC_FLUSH_TLB); asm volatile("ptesync": : :"memory"); asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); @@ -146,31 +138,23 @@ void radix__local_flush_tlb_mm(struct mm_struct *mm) preempt_disable(); pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbiel_pid(pid, RIC_FLUSH_TLB); preempt_enable(); } EXPORT_SYMBOL(radix__local_flush_tlb_mm); -void radix__local_flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +#ifndef CONFIG_SMP +static void radix__local_flush_all_mm(struct mm_struct *mm) { unsigned long pid; - struct mm_struct *mm = tlb->mm; - /* - * If we are doing a full mm flush, we will do a tlb flush - * with RIC_FLUSH_ALL later. - */ - if (tlb->fullmm) - return; preempt_disable(); - pid = mm->context.id; if (pid != MMU_NO_CONTEXT) - tlbiel_pwc(pid); - + _tlbiel_pid(pid, RIC_FLUSH_ALL); preempt_enable(); } -EXPORT_SYMBOL(radix__local_flush_tlb_pwc); +#endif /* CONFIG_SMP */ void radix__local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) @@ -208,38 +192,35 @@ void radix__flush_tlb_mm(struct mm_struct *mm) goto no_context; if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_ALL); + _tlbie_pid(pid, RIC_FLUSH_TLB); else - _tlbiel_pid(pid, RIC_FLUSH_ALL); + _tlbiel_pid(pid, RIC_FLUSH_TLB); no_context: preempt_enable(); } EXPORT_SYMBOL(radix__flush_tlb_mm); -void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +static void radix__flush_all_mm(struct mm_struct *mm) { unsigned long pid; - struct mm_struct *mm = tlb->mm; - /* - * If we are doing a full mm flush, we will do a tlb flush - * with RIC_FLUSH_ALL later. - */ - if (tlb->fullmm) - return; preempt_disable(); - pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; if (!mm_is_thread_local(mm)) - _tlbie_pid(pid, RIC_FLUSH_PWC); + _tlbie_pid(pid, RIC_FLUSH_ALL); else - tlbiel_pwc(pid); + _tlbiel_pid(pid, RIC_FLUSH_ALL); no_context: preempt_enable(); } + +void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) +{ + tlb->need_flush_all = 1; +} EXPORT_SYMBOL(radix__flush_tlb_pwc); void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, @@ -271,6 +252,8 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(radix__flush_tlb_page); +#else /* CONFIG_SMP */ +#define radix__flush_all_mm radix__local_flush_all_mm #endif /* CONFIG_SMP */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -288,6 +271,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm = vma->vm_mm; + radix__flush_tlb_mm(mm); } EXPORT_SYMBOL(radix__flush_tlb_range); @@ -319,7 +303,10 @@ void radix__tlb_flush(struct mmu_gather *tlb) */ if (psize != -1 && !tlb->fullmm && !tlb->need_flush_all) radix__flush_tlb_range_psize(mm, tlb->start, tlb->end, psize); - else + else if (tlb->need_flush_all) { + tlb->need_flush_all = 0; + radix__flush_all_mm(mm); + } else radix__flush_tlb_mm(mm); } @@ -364,6 +351,43 @@ err_out: preempt_enable(); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) +{ + int local = mm_is_thread_local(mm); + unsigned long ap = mmu_get_ap(mmu_virtual_psize); + unsigned long pid, end; + + + pid = mm ? mm->context.id : 0; + if (unlikely(pid == MMU_NO_CONTEXT)) + goto no_context; + + /* 4k page size, just blow the world */ + if (PAGE_SIZE == 0x1000) { + radix__flush_all_mm(mm); + return; + } + + /* Otherwise first do the PWC */ + if (local) + _tlbiel_pid(pid, RIC_FLUSH_PWC); + else + _tlbie_pid(pid, RIC_FLUSH_PWC); + + /* Then iterate the pages */ + end = addr + HPAGE_PMD_SIZE; + for (; addr < end; addr += PAGE_SIZE) { + if (local) + _tlbiel_va(addr, pid, ap, RIC_FLUSH_TLB); + else + _tlbie_va(addr, pid, ap, RIC_FLUSH_TLB); + } +no_context: + preempt_enable(); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + void radix__flush_tlb_lpid_va(unsigned long lpid, unsigned long gpa, unsigned long page_size) { diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/tlb_nohash_low.S index eabecfcaef7c..048b8e9f4492 100644 --- a/arch/powerpc/mm/tlb_nohash_low.S +++ b/arch/powerpc/mm/tlb_nohash_low.S @@ -60,7 +60,7 @@ _GLOBAL(__tlbil_va) isync 1: blr -#elif defined(CONFIG_8xx) +#elif defined(CONFIG_PPC_8xx) /* * Nothing to do for 8xx, everything is inline diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 861c5af1c9c4..6ba5d253e857 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -25,11 +25,7 @@ int bpf_jit_enable __read_mostly; static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { - int *p = area; - - /* Fill whole space with trap instructions */ - while (p < (int *)((char *)area + size)) - *p++ = BREAKPOINT_INSTRUCTION; + memset32(area, BREAKPOINT_INSTRUCTION, size/4); } static inline void bpf_flush_icache(void *start, void *end) diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 4d606b99a5cb..3f3a5ce66495 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ isa207-common.o power8-pmu.o power9-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o +obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c new file mode 100644 index 000000000000..9ccac86f3463 --- /dev/null +++ b/arch/powerpc/perf/imc-pmu.c @@ -0,0 +1,1306 @@ +/* + * In-Memory Collection (IMC) Performance Monitor counter support. + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ +#include <linux/perf_event.h> +#include <linux/slab.h> +#include <asm/opal.h> +#include <asm/imc-pmu.h> +#include <asm/cputhreads.h> +#include <asm/smp.h> +#include <linux/string.h> + +/* Nest IMC data structures and variables */ + +/* + * Used to avoid races in counting the nest-pmu units during hotplug + * register and unregister + */ +static DEFINE_MUTEX(nest_init_lock); +static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); +static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; +static cpumask_t nest_imc_cpumask; +struct imc_pmu_ref *nest_imc_refc; +static int nest_pmus; + +/* Core IMC data structures and variables */ + +static cpumask_t core_imc_cpumask; +struct imc_pmu_ref *core_imc_refc; +static struct imc_pmu *core_imc_pmu; + +/* Thread IMC data structures and variables */ + +static DEFINE_PER_CPU(u64 *, thread_imc_mem); +static struct imc_pmu *thread_imc_pmu; +static int thread_imc_mem_size; + +struct imc_pmu *imc_event_to_pmu(struct perf_event *event) +{ + return container_of(event->pmu, struct imc_pmu, pmu); +} + +PMU_FORMAT_ATTR(event, "config:0-40"); +PMU_FORMAT_ATTR(offset, "config:0-31"); +PMU_FORMAT_ATTR(rvalue, "config:32"); +PMU_FORMAT_ATTR(mode, "config:33-40"); +static struct attribute *imc_format_attrs[] = { + &format_attr_event.attr, + &format_attr_offset.attr, + &format_attr_rvalue.attr, + &format_attr_mode.attr, + NULL, +}; + +static struct attribute_group imc_format_group = { + .name = "format", + .attrs = imc_format_attrs, +}; + +/* Get the cpumask printed to a buffer "buf" */ +static ssize_t imc_pmu_cpumask_get_attr(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct pmu *pmu = dev_get_drvdata(dev); + struct imc_pmu *imc_pmu = container_of(pmu, struct imc_pmu, pmu); + cpumask_t *active_mask; + + switch(imc_pmu->domain){ + case IMC_DOMAIN_NEST: + active_mask = &nest_imc_cpumask; + break; + case IMC_DOMAIN_CORE: + active_mask = &core_imc_cpumask; + break; + default: + return 0; + } + + return cpumap_print_to_pagebuf(true, buf, active_mask); +} + +static DEVICE_ATTR(cpumask, S_IRUGO, imc_pmu_cpumask_get_attr, NULL); + +static struct attribute *imc_pmu_cpumask_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group imc_pmu_cpumask_attr_group = { + .attrs = imc_pmu_cpumask_attrs, +}; + +/* device_str_attr_create : Populate event "name" and string "str" in attribute */ +static struct attribute *device_str_attr_create(const char *name, const char *str) +{ + struct perf_pmu_events_attr *attr; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return NULL; + sysfs_attr_init(&attr->attr.attr); + + attr->event_str = str; + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = perf_event_sysfs_show; + + return &attr->attr.attr; +} + +struct imc_events *imc_parse_event(struct device_node *np, const char *scale, + const char *unit, const char *prefix, u32 base) +{ + struct imc_events *event; + const char *s; + u32 reg; + + event = kzalloc(sizeof(struct imc_events), GFP_KERNEL); + if (!event) + return NULL; + + if (of_property_read_u32(np, "reg", ®)) + goto error; + /* Add the base_reg value to the "reg" */ + event->value = base + reg; + + if (of_property_read_string(np, "event-name", &s)) + goto error; + + event->name = kasprintf(GFP_KERNEL, "%s%s", prefix, s); + if (!event->name) + goto error; + + if (of_property_read_string(np, "scale", &s)) + s = scale; + + if (s) { + event->scale = kstrdup(s, GFP_KERNEL); + if (!event->scale) + goto error; + } + + if (of_property_read_string(np, "unit", &s)) + s = unit; + + if (s) { + event->unit = kstrdup(s, GFP_KERNEL); + if (!event->unit) + goto error; + } + + return event; +error: + kfree(event->unit); + kfree(event->scale); + kfree(event->name); + kfree(event); + + return NULL; +} + +/* + * update_events_in_group: Update the "events" information in an attr_group + * and assign the attr_group to the pmu "pmu". + */ +static int update_events_in_group(struct device_node *node, struct imc_pmu *pmu) +{ + struct attribute_group *attr_group; + struct attribute **attrs, *dev_str; + struct device_node *np, *pmu_events; + struct imc_events *ev; + u32 handle, base_reg; + int i=0, j=0, ct; + const char *prefix, *g_scale, *g_unit; + const char *ev_val_str, *ev_scale_str, *ev_unit_str; + + if (!of_property_read_u32(node, "events", &handle)) + pmu_events = of_find_node_by_phandle(handle); + else + return 0; + + /* Did not find any node with a given phandle */ + if (!pmu_events) + return 0; + + /* Get a count of number of child nodes */ + ct = of_get_child_count(pmu_events); + + /* Get the event prefix */ + if (of_property_read_string(node, "events-prefix", &prefix)) + return 0; + + /* Get a global unit and scale data if available */ + if (of_property_read_string(node, "scale", &g_scale)) + g_scale = NULL; + + if (of_property_read_string(node, "unit", &g_unit)) + g_unit = NULL; + + /* "reg" property gives out the base offset of the counters data */ + of_property_read_u32(node, "reg", &base_reg); + + /* Allocate memory for the events */ + pmu->events = kcalloc(ct, sizeof(struct imc_events), GFP_KERNEL); + if (!pmu->events) + return -ENOMEM; + + ct = 0; + /* Parse the events and update the struct */ + for_each_child_of_node(pmu_events, np) { + ev = imc_parse_event(np, g_scale, g_unit, prefix, base_reg); + if (ev) + pmu->events[ct++] = ev; + } + + /* Allocate memory for attribute group */ + attr_group = kzalloc(sizeof(*attr_group), GFP_KERNEL); + if (!attr_group) + return -ENOMEM; + + /* + * Allocate memory for attributes. + * Since we have count of events for this pmu, we also allocate + * memory for the scale and unit attribute for now. + * "ct" has the total event structs added from the events-parent node. + * So allocate three times the "ct" (this includes event, event_scale and + * event_unit). + */ + attrs = kcalloc(((ct * 3) + 1), sizeof(struct attribute *), GFP_KERNEL); + if (!attrs) { + kfree(attr_group); + kfree(pmu->events); + return -ENOMEM; + } + + attr_group->name = "events"; + attr_group->attrs = attrs; + do { + ev_val_str = kasprintf(GFP_KERNEL, "event=0x%x", pmu->events[i]->value); + dev_str = device_str_attr_create(pmu->events[i]->name, ev_val_str); + if (!dev_str) + continue; + + attrs[j++] = dev_str; + if (pmu->events[i]->scale) { + ev_scale_str = kasprintf(GFP_KERNEL, "%s.scale",pmu->events[i]->name); + dev_str = device_str_attr_create(ev_scale_str, pmu->events[i]->scale); + if (!dev_str) + continue; + + attrs[j++] = dev_str; + } + + if (pmu->events[i]->unit) { + ev_unit_str = kasprintf(GFP_KERNEL, "%s.unit",pmu->events[i]->name); + dev_str = device_str_attr_create(ev_unit_str, pmu->events[i]->unit); + if (!dev_str) + continue; + + attrs[j++] = dev_str; + } + } while (++i < ct); + + /* Save the event attribute */ + pmu->attr_groups[IMC_EVENT_ATTR] = attr_group; + + kfree(pmu->events); + return 0; +} + +/* get_nest_pmu_ref: Return the imc_pmu_ref struct for the given node */ +static struct imc_pmu_ref *get_nest_pmu_ref(int cpu) +{ + return per_cpu(local_nest_imc_refc, cpu); +} + +static void nest_change_cpu_context(int old_cpu, int new_cpu) +{ + struct imc_pmu **pn = per_nest_pmu_arr; + int i; + + if (old_cpu < 0 || new_cpu < 0) + return; + + for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++) + perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu); +} + +static int ppc_nest_imc_cpu_offline(unsigned int cpu) +{ + int nid, target = -1; + const struct cpumask *l_cpumask; + struct imc_pmu_ref *ref; + + /* + * Check in the designated list for this cpu. Dont bother + * if not one of them. + */ + if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask)) + return 0; + + /* + * Now that this cpu is one of the designated, + * find a next cpu a) which is online and b) in same chip. + */ + nid = cpu_to_node(cpu); + l_cpumask = cpumask_of_node(nid); + target = cpumask_any_but(l_cpumask, cpu); + + /* + * Update the cpumask with the target cpu and + * migrate the context if needed + */ + if (target >= 0 && target < nr_cpu_ids) { + cpumask_set_cpu(target, &nest_imc_cpumask); + nest_change_cpu_context(cpu, target); + } else { + opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(cpu)); + /* + * If this is the last cpu in this chip then, skip the reference + * count mutex lock and make the reference count on this chip zero. + */ + ref = get_nest_pmu_ref(cpu); + if (!ref) + return -EINVAL; + + ref->refc = 0; + } + return 0; +} + +static int ppc_nest_imc_cpu_online(unsigned int cpu) +{ + const struct cpumask *l_cpumask; + static struct cpumask tmp_mask; + int res; + + /* Get the cpumask of this node */ + l_cpumask = cpumask_of_node(cpu_to_node(cpu)); + + /* + * If this is not the first online CPU on this node, then + * just return. + */ + if (cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask)) + return 0; + + /* + * If this is the first online cpu on this node + * disable the nest counters by making an OPAL call. + */ + res = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(cpu)); + if (res) + return res; + + /* Make this CPU the designated target for counter collection */ + cpumask_set_cpu(cpu, &nest_imc_cpumask); + return 0; +} + +static int nest_pmu_cpumask_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE, + "perf/powerpc/imc:online", + ppc_nest_imc_cpu_online, + ppc_nest_imc_cpu_offline); +} + +static void nest_imc_counters_release(struct perf_event *event) +{ + int rc, node_id; + struct imc_pmu_ref *ref; + + if (event->cpu < 0) + return; + + node_id = cpu_to_node(event->cpu); + + /* + * See if we need to disable the nest PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * enable or disable the nest counters. + */ + ref = get_nest_pmu_ref(event->cpu); + if (!ref) + return; + + /* Take the mutex lock for this node and then decrement the reference count */ + mutex_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(event->cpu)); + if (rc) { + mutex_unlock(&ref->lock); + pr_err("nest-imc: Unable to stop the counters for core %d\n", node_id); + return; + } + } else if (ref->refc < 0) { + WARN(1, "nest-imc: Invalid event reference count\n"); + ref->refc = 0; + } + mutex_unlock(&ref->lock); +} + +static int nest_imc_event_init(struct perf_event *event) +{ + int chip_id, rc, node_id; + u32 l_config, config = event->attr.config; + struct imc_mem_info *pcni; + struct imc_pmu *pmu; + struct imc_pmu_ref *ref; + bool flag = false; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling not supported */ + if (event->hw.sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + pmu = imc_event_to_pmu(event); + + /* Sanity check for config (event offset) */ + if ((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size) + return -EINVAL; + + /* + * Nest HW counter memory resides in a per-chip reserve-memory (HOMER). + * Get the base memory addresss for this cpu. + */ + chip_id = topology_physical_package_id(event->cpu); + pcni = pmu->mem_info; + do { + if (pcni->id == chip_id) { + flag = true; + break; + } + pcni++; + } while (pcni); + + if (!flag) + return -ENODEV; + + /* + * Add the event offset to the base address. + */ + l_config = config & IMC_EVENT_OFFSET_MASK; + event->hw.event_base = (u64)pcni->vbase + l_config; + node_id = cpu_to_node(event->cpu); + + /* + * Get the imc_pmu_ref struct for this node. + * Take the mutex lock and then increment the count of nest pmu events + * inited. + */ + ref = get_nest_pmu_ref(event->cpu); + if (!ref) + return -EINVAL; + + mutex_lock(&ref->lock); + if (ref->refc == 0) { + rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(event->cpu)); + if (rc) { + mutex_unlock(&ref->lock); + pr_err("nest-imc: Unable to start the counters for node %d\n", + node_id); + return rc; + } + } + ++ref->refc; + mutex_unlock(&ref->lock); + + event->destroy = nest_imc_counters_release; + return 0; +} + +/* + * core_imc_mem_init : Initializes memory for the current core. + * + * Uses alloc_pages_node() and uses the returned address as an argument to + * an opal call to configure the pdbar. The address sent as an argument is + * converted to physical address before the opal call is made. This is the + * base address at which the core imc counters are populated. + */ +static int core_imc_mem_init(int cpu, int size) +{ + int phys_id, rc = 0, core_id = (cpu / threads_per_core); + struct imc_mem_info *mem_info; + + /* + * alloc_pages_node() will allocate memory for core in the + * local node only. + */ + phys_id = topology_physical_package_id(cpu); + mem_info = &core_imc_pmu->mem_info[core_id]; + mem_info->id = core_id; + + /* We need only vbase for core counters */ + mem_info->vbase = page_address(alloc_pages_node(phys_id, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, + get_order(size))); + if (!mem_info->vbase) + return -ENOMEM; + + /* Init the mutex */ + core_imc_refc[core_id].id = core_id; + mutex_init(&core_imc_refc[core_id].lock); + + rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_CORE, + __pa((void *)mem_info->vbase), + get_hard_smp_processor_id(cpu)); + if (rc) { + free_pages((u64)mem_info->vbase, get_order(size)); + mem_info->vbase = NULL; + } + + return rc; +} + +static bool is_core_imc_mem_inited(int cpu) +{ + struct imc_mem_info *mem_info; + int core_id = (cpu / threads_per_core); + + mem_info = &core_imc_pmu->mem_info[core_id]; + if (!mem_info->vbase) + return false; + + return true; +} + +static int ppc_core_imc_cpu_online(unsigned int cpu) +{ + const struct cpumask *l_cpumask; + static struct cpumask tmp_mask; + int ret = 0; + + /* Get the cpumask for this core */ + l_cpumask = cpu_sibling_mask(cpu); + + /* If a cpu for this core is already set, then, don't do anything */ + if (cpumask_and(&tmp_mask, l_cpumask, &core_imc_cpumask)) + return 0; + + if (!is_core_imc_mem_inited(cpu)) { + ret = core_imc_mem_init(cpu, core_imc_pmu->counter_mem_size); + if (ret) { + pr_info("core_imc memory allocation for cpu %d failed\n", cpu); + return ret; + } + } + + /* set the cpu in the mask */ + cpumask_set_cpu(cpu, &core_imc_cpumask); + return 0; +} + +static int ppc_core_imc_cpu_offline(unsigned int cpu) +{ + unsigned int ncpu, core_id; + struct imc_pmu_ref *ref; + + /* + * clear this cpu out of the mask, if not present in the mask, + * don't bother doing anything. + */ + if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask)) + return 0; + + /* Find any online cpu in that core except the current "cpu" */ + ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu); + + if (ncpu >= 0 && ncpu < nr_cpu_ids) { + cpumask_set_cpu(ncpu, &core_imc_cpumask); + perf_pmu_migrate_context(&core_imc_pmu->pmu, cpu, ncpu); + } else { + /* + * If this is the last cpu in this core then, skip taking refernce + * count mutex lock for this core and directly zero "refc" for + * this core. + */ + opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(cpu)); + core_id = cpu / threads_per_core; + ref = &core_imc_refc[core_id]; + if (!ref) + return -EINVAL; + + ref->refc = 0; + } + return 0; +} + +static int core_imc_pmu_cpumask_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE, + "perf/powerpc/imc_core:online", + ppc_core_imc_cpu_online, + ppc_core_imc_cpu_offline); +} + +static void core_imc_counters_release(struct perf_event *event) +{ + int rc, core_id; + struct imc_pmu_ref *ref; + + if (event->cpu < 0) + return; + /* + * See if we need to disable the IMC PMU. + * If no events are currently in use, then we have to take a + * mutex to ensure that we don't race with another task doing + * enable or disable the core counters. + */ + core_id = event->cpu / threads_per_core; + + /* Take the mutex lock and decrement the refernce count for this core */ + ref = &core_imc_refc[core_id]; + if (!ref) + return; + + mutex_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(event->cpu)); + if (rc) { + mutex_unlock(&ref->lock); + pr_err("IMC: Unable to stop the counters for core %d\n", core_id); + return; + } + } else if (ref->refc < 0) { + WARN(1, "core-imc: Invalid event reference count\n"); + ref->refc = 0; + } + mutex_unlock(&ref->lock); +} + +static int core_imc_event_init(struct perf_event *event) +{ + int core_id, rc; + u64 config = event->attr.config; + struct imc_mem_info *pcmi; + struct imc_pmu *pmu; + struct imc_pmu_ref *ref; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling not supported */ + if (event->hw.sample_period) + return -EINVAL; + + /* unsupported modes and filters */ + if (event->attr.exclude_user || + event->attr.exclude_kernel || + event->attr.exclude_hv || + event->attr.exclude_idle || + event->attr.exclude_host || + event->attr.exclude_guest) + return -EINVAL; + + if (event->cpu < 0) + return -EINVAL; + + event->hw.idx = -1; + pmu = imc_event_to_pmu(event); + + /* Sanity check for config (event offset) */ + if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)) + return -EINVAL; + + if (!is_core_imc_mem_inited(event->cpu)) + return -ENODEV; + + core_id = event->cpu / threads_per_core; + pcmi = &core_imc_pmu->mem_info[core_id]; + if ((!pcmi->vbase)) + return -ENODEV; + + /* Get the core_imc mutex for this core */ + ref = &core_imc_refc[core_id]; + if (!ref) + return -EINVAL; + + /* + * Core pmu units are enabled only when it is used. + * See if this is triggered for the first time. + * If yes, take the mutex lock and enable the core counters. + * If not, just increment the count in core_imc_refc struct. + */ + mutex_lock(&ref->lock); + if (ref->refc == 0) { + rc = opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(event->cpu)); + if (rc) { + mutex_unlock(&ref->lock); + pr_err("core-imc: Unable to start the counters for core %d\n", + core_id); + return rc; + } + } + ++ref->refc; + mutex_unlock(&ref->lock); + + event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); + event->destroy = core_imc_counters_release; + return 0; +} + +/* + * Allocates a page of memory for each of the online cpus, and write the + * physical base address of that page to the LDBAR for that cpu. + * + * LDBAR Register Layout: + * + * 0 4 8 12 16 20 24 28 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * | | [ ] [ Counter Address [8:50] + * | * Mode | + * | * PB Scope + * * Enable/Disable + * + * 32 36 40 44 48 52 56 60 + * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | + * Counter Address [8:50] ] + * + */ +static int thread_imc_mem_alloc(int cpu_id, int size) +{ + u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id); + int phys_id = topology_physical_package_id(cpu_id); + + if (!local_mem) { + /* + * This case could happen only once at start, since we dont + * free the memory in cpu offline path. + */ + local_mem = page_address(alloc_pages_node(phys_id, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, + get_order(size))); + if (!local_mem) + return -ENOMEM; + + per_cpu(thread_imc_mem, cpu_id) = local_mem; + } + + ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE; + + mtspr(SPRN_LDBAR, ldbar_value); + return 0; +} + +static int ppc_thread_imc_cpu_online(unsigned int cpu) +{ + return thread_imc_mem_alloc(cpu, thread_imc_mem_size); +} + +static int ppc_thread_imc_cpu_offline(unsigned int cpu) +{ + mtspr(SPRN_LDBAR, 0); + return 0; +} + +static int thread_imc_cpu_init(void) +{ + return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE, + "perf/powerpc/imc_thread:online", + ppc_thread_imc_cpu_online, + ppc_thread_imc_cpu_offline); +} + +void thread_imc_pmu_sched_task(struct perf_event_context *ctx, + bool sched_in) +{ + int core_id; + struct imc_pmu_ref *ref; + + if (!is_core_imc_mem_inited(smp_processor_id())) + return; + + core_id = smp_processor_id() / threads_per_core; + /* + * imc pmus are enabled only when it is used. + * See if this is triggered for the first time. + * If yes, take the mutex lock and enable the counters. + * If not, just increment the count in ref count struct. + */ + ref = &core_imc_refc[core_id]; + if (!ref) + return; + + if (sched_in) { + mutex_lock(&ref->lock); + if (ref->refc == 0) { + if (opal_imc_counters_start(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(smp_processor_id()))) { + mutex_unlock(&ref->lock); + pr_err("thread-imc: Unable to start the counter\ + for core %d\n", core_id); + return; + } + } + ++ref->refc; + mutex_unlock(&ref->lock); + } else { + mutex_lock(&ref->lock); + ref->refc--; + if (ref->refc == 0) { + if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(smp_processor_id()))) { + mutex_unlock(&ref->lock); + pr_err("thread-imc: Unable to stop the counters\ + for core %d\n", core_id); + return; + } + } else if (ref->refc < 0) { + ref->refc = 0; + } + mutex_unlock(&ref->lock); + } + + return; +} + +static int thread_imc_event_init(struct perf_event *event) +{ + u32 config = event->attr.config; + struct task_struct *target; + struct imc_pmu *pmu; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* Sampling not supported */ + if (event->hw.sample_period) + return -EINVAL; + + event->hw.idx = -1; + pmu = imc_event_to_pmu(event); + + /* Sanity check for config offset */ + if (((config & IMC_EVENT_OFFSET_MASK) > pmu->counter_mem_size)) + return -EINVAL; + + target = event->hw.target; + if (!target) + return -EINVAL; + + event->pmu->task_ctx_nr = perf_sw_context; + return 0; +} + +static bool is_thread_imc_pmu(struct perf_event *event) +{ + if (!strncmp(event->pmu->name, "thread_imc", strlen("thread_imc"))) + return true; + + return false; +} + +static u64 * get_event_base_addr(struct perf_event *event) +{ + u64 addr; + + if (is_thread_imc_pmu(event)) { + addr = (u64)per_cpu(thread_imc_mem, smp_processor_id()); + return (u64 *)(addr + (event->attr.config & IMC_EVENT_OFFSET_MASK)); + } + + return (u64 *)event->hw.event_base; +} + +static void thread_imc_pmu_start_txn(struct pmu *pmu, + unsigned int txn_flags) +{ + if (txn_flags & ~PERF_PMU_TXN_ADD) + return; + perf_pmu_disable(pmu); +} + +static void thread_imc_pmu_cancel_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); +} + +static int thread_imc_pmu_commit_txn(struct pmu *pmu) +{ + perf_pmu_enable(pmu); + return 0; +} + +static u64 imc_read_counter(struct perf_event *event) +{ + u64 *addr, data; + + /* + * In-Memory Collection (IMC) counters are free flowing counters. + * So we take a snapshot of the counter value on enable and save it + * to calculate the delta at later stage to present the event counter + * value. + */ + addr = get_event_base_addr(event); + data = be64_to_cpu(READ_ONCE(*addr)); + local64_set(&event->hw.prev_count, data); + + return data; +} + +static void imc_event_update(struct perf_event *event) +{ + u64 counter_prev, counter_new, final_count; + + counter_prev = local64_read(&event->hw.prev_count); + counter_new = imc_read_counter(event); + final_count = counter_new - counter_prev; + + /* Update the delta to the event count */ + local64_add(final_count, &event->count); +} + +static void imc_event_start(struct perf_event *event, int flags) +{ + /* + * In Memory Counters are free flowing counters. HW or the microcode + * keeps adding to the counter offset in memory. To get event + * counter value, we snapshot the value here and we calculate + * delta at later point. + */ + imc_read_counter(event); +} + +static void imc_event_stop(struct perf_event *event, int flags) +{ + /* + * Take a snapshot and calculate the delta and update + * the event counter values. + */ + imc_event_update(event); +} + +static int imc_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + imc_event_start(event, flags); + + return 0; +} + +static int thread_imc_event_add(struct perf_event *event, int flags) +{ + if (flags & PERF_EF_START) + imc_event_start(event, flags); + + /* Enable the sched_task to start the engine */ + perf_sched_cb_inc(event->ctx->pmu); + return 0; +} + +static void thread_imc_event_del(struct perf_event *event, int flags) +{ + /* + * Take a snapshot and calculate the delta and update + * the event counter values. + */ + imc_event_update(event); + perf_sched_cb_dec(event->ctx->pmu); +} + +/* update_pmu_ops : Populate the appropriate operations for "pmu" */ +static int update_pmu_ops(struct imc_pmu *pmu) +{ + pmu->pmu.task_ctx_nr = perf_invalid_context; + pmu->pmu.add = imc_event_add; + pmu->pmu.del = imc_event_stop; + pmu->pmu.start = imc_event_start; + pmu->pmu.stop = imc_event_stop; + pmu->pmu.read = imc_event_update; + pmu->pmu.attr_groups = pmu->attr_groups; + pmu->attr_groups[IMC_FORMAT_ATTR] = &imc_format_group; + + switch (pmu->domain) { + case IMC_DOMAIN_NEST: + pmu->pmu.event_init = nest_imc_event_init; + pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group; + break; + case IMC_DOMAIN_CORE: + pmu->pmu.event_init = core_imc_event_init; + pmu->attr_groups[IMC_CPUMASK_ATTR] = &imc_pmu_cpumask_attr_group; + break; + case IMC_DOMAIN_THREAD: + pmu->pmu.event_init = thread_imc_event_init; + pmu->pmu.sched_task = thread_imc_pmu_sched_task; + pmu->pmu.add = thread_imc_event_add; + pmu->pmu.del = thread_imc_event_del; + pmu->pmu.start_txn = thread_imc_pmu_start_txn; + pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn; + pmu->pmu.commit_txn = thread_imc_pmu_commit_txn; + break; + default: + break; + } + + return 0; +} + +/* init_nest_pmu_ref: Initialize the imc_pmu_ref struct for all the nodes */ +static int init_nest_pmu_ref(void) +{ + int nid, i, cpu; + + nest_imc_refc = kcalloc(num_possible_nodes(), sizeof(*nest_imc_refc), + GFP_KERNEL); + + if (!nest_imc_refc) + return -ENOMEM; + + i = 0; + for_each_node(nid) { + /* + * Mutex lock to avoid races while tracking the number of + * sessions using the chip's nest pmu units. + */ + mutex_init(&nest_imc_refc[i].lock); + + /* + * Loop to init the "id" with the node_id. Variable "i" initialized to + * 0 and will be used as index to the array. "i" will not go off the + * end of the array since the "for_each_node" loops for "N_POSSIBLE" + * nodes only. + */ + nest_imc_refc[i++].id = nid; + } + + /* + * Loop to init the per_cpu "local_nest_imc_refc" with the proper + * "nest_imc_refc" index. This makes get_nest_pmu_ref() alot simple. + */ + for_each_possible_cpu(cpu) { + nid = cpu_to_node(cpu); + for (i = 0; i < num_possible_nodes(); i++) { + if (nest_imc_refc[i].id == nid) { + per_cpu(local_nest_imc_refc, cpu) = &nest_imc_refc[i]; + break; + } + } + } + return 0; +} + +static void cleanup_all_core_imc_memory(void) +{ + int i, nr_cores = num_present_cpus() / threads_per_core; + struct imc_mem_info *ptr = core_imc_pmu->mem_info; + int size = core_imc_pmu->counter_mem_size; + + /* mem_info will never be NULL */ + for (i = 0; i < nr_cores; i++) { + if (ptr[i].vbase) + free_pages((u64)ptr->vbase, get_order(size)); + } + + kfree(ptr); + kfree(core_imc_refc); +} + +static void thread_imc_ldbar_disable(void *dummy) +{ + /* + * By Zeroing LDBAR, we disable thread-imc + * updates. + */ + mtspr(SPRN_LDBAR, 0); +} + +void thread_imc_disable(void) +{ + on_each_cpu(thread_imc_ldbar_disable, NULL, 1); +} + +static void cleanup_all_thread_imc_memory(void) +{ + int i, order = get_order(thread_imc_mem_size); + + for_each_online_cpu(i) { + if (per_cpu(thread_imc_mem, i)) + free_pages((u64)per_cpu(thread_imc_mem, i), order); + + } +} + +/* + * Common function to unregister cpu hotplug callback and + * free the memory. + * TODO: Need to handle pmu unregistering, which will be + * done in followup series. + */ +static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) +{ + if (pmu_ptr->domain == IMC_DOMAIN_NEST) { + mutex_lock(&nest_init_lock); + if (nest_pmus == 1) { + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_NEST_IMC_ONLINE); + kfree(nest_imc_refc); + } + + if (nest_pmus > 0) + nest_pmus--; + mutex_unlock(&nest_init_lock); + } + + /* Free core_imc memory */ + if (pmu_ptr->domain == IMC_DOMAIN_CORE) { + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE); + cleanup_all_core_imc_memory(); + } + + /* Free thread_imc memory */ + if (pmu_ptr->domain == IMC_DOMAIN_THREAD) { + cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE); + cleanup_all_thread_imc_memory(); + } + + /* Only free the attr_groups which are dynamically allocated */ + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); + kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); + kfree(pmu_ptr); + return; +} + + +/* + * imc_mem_init : Function to support memory allocation for core imc. + */ +static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, + int pmu_index) +{ + const char *s; + int nr_cores, cpu, res; + + if (of_property_read_string(parent, "name", &s)) + return -ENODEV; + + switch (pmu_ptr->domain) { + case IMC_DOMAIN_NEST: + /* Update the pmu name */ + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s_imc", "nest_", s); + if (!pmu_ptr->pmu.name) + return -ENOMEM; + + /* Needed for hotplug/migration */ + per_nest_pmu_arr[pmu_index] = pmu_ptr; + break; + case IMC_DOMAIN_CORE: + /* Update the pmu name */ + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc"); + if (!pmu_ptr->pmu.name) + return -ENOMEM; + + nr_cores = num_present_cpus() / threads_per_core; + pmu_ptr->mem_info = kcalloc(nr_cores, sizeof(struct imc_mem_info), + GFP_KERNEL); + + if (!pmu_ptr->mem_info) + return -ENOMEM; + + core_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref), + GFP_KERNEL); + + if (!core_imc_refc) + return -ENOMEM; + + core_imc_pmu = pmu_ptr; + break; + case IMC_DOMAIN_THREAD: + /* Update the pmu name */ + pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc"); + if (!pmu_ptr->pmu.name) + return -ENOMEM; + + thread_imc_mem_size = pmu_ptr->counter_mem_size; + for_each_online_cpu(cpu) { + res = thread_imc_mem_alloc(cpu, pmu_ptr->counter_mem_size); + if (res) + return res; + } + + thread_imc_pmu = pmu_ptr; + break; + default: + return -EINVAL; + } + + return 0; +} + +/* + * init_imc_pmu : Setup and register the IMC pmu device. + * + * @parent: Device tree unit node + * @pmu_ptr: memory allocated for this pmu + * @pmu_idx: Count of nest pmc registered + * + * init_imc_pmu() setup pmu cpumask and registers for a cpu hotplug callback. + * Handles failure cases and accordingly frees memory. + */ +int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_idx) +{ + int ret; + + ret = imc_mem_init(pmu_ptr, parent, pmu_idx); + if (ret) + goto err_free; + + switch (pmu_ptr->domain) { + case IMC_DOMAIN_NEST: + /* + * Nest imc pmu need only one cpu per chip, we initialize the + * cpumask for the first nest imc pmu and use the same for the + * rest. To handle the cpuhotplug callback unregister, we track + * the number of nest pmus in "nest_pmus". + */ + mutex_lock(&nest_init_lock); + if (nest_pmus == 0) { + ret = init_nest_pmu_ref(); + if (ret) { + mutex_unlock(&nest_init_lock); + goto err_free; + } + /* Register for cpu hotplug notification. */ + ret = nest_pmu_cpumask_init(); + if (ret) { + mutex_unlock(&nest_init_lock); + goto err_free; + } + } + nest_pmus++; + mutex_unlock(&nest_init_lock); + break; + case IMC_DOMAIN_CORE: + ret = core_imc_pmu_cpumask_init(); + if (ret) { + cleanup_all_core_imc_memory(); + return ret; + } + + break; + case IMC_DOMAIN_THREAD: + ret = thread_imc_cpu_init(); + if (ret) { + cleanup_all_thread_imc_memory(); + return ret; + } + + break; + default: + return -1; /* Unknown domain */ + } + + ret = update_events_in_group(parent, pmu_ptr); + if (ret) + goto err_free; + + ret = update_pmu_ops(pmu_ptr); + if (ret) + goto err_free; + + ret = perf_pmu_register(&pmu_ptr->pmu, pmu_ptr->pmu.name, -1); + if (ret) + goto err_free; + + pr_info("%s performance monitor hardware support registered\n", + pmu_ptr->pmu.name); + + return 0; + +err_free: + imc_common_cpuhp_mem_free(pmu_ptr); + return ret; +} diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 3f3aa9a7063a..2efee3f196f5 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -99,7 +99,7 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event)) *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; else - *mmcra |= MMCRA_SDAR_MODE_TLB; + *mmcra |= MMCRA_SDAR_MODE_DCACHE; } else *mmcra |= MMCRA_SDAR_MODE_TLB; } @@ -488,8 +488,8 @@ static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int return -1; } -int isa207_get_alternatives(u64 event, u64 alt[], - const unsigned int ev_alt[][MAX_ALT], int size) +int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, + const unsigned int ev_alt[][MAX_ALT]) { int i, j, num_alt = 0; u64 alt_event; @@ -505,5 +505,30 @@ int isa207_get_alternatives(u64 event, u64 alt[], } } + if (flags & PPMU_ONLY_COUNT_RUN) { + /* + * We're only counting in RUN state, so PM_CYC is equivalent to + * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL. + */ + j = num_alt; + for (i = 0; i < num_alt; ++i) { + switch (alt[i]) { + case 0x1e: /* PMC_CYC */ + alt[j++] = 0x600f4; /* PM_RUN_CYC */ + break; + case 0x600f4: + alt[j++] = 0x1e; + break; + case 0x2: /* PM_INST_CMPL */ + alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ + break; + case 0x500fa: + alt[j++] = 0x2; + break; + } + } + num_alt = j; + } + return num_alt; } diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h index 8acbe6e802c7..6c737d675792 100644 --- a/arch/powerpc/perf/isa207-common.h +++ b/arch/powerpc/perf/isa207-common.h @@ -247,6 +247,7 @@ #define MMCRA_SDAR_MODE_SHIFT 42 #define MMCRA_SDAR_MODE_TLB (1ull << MMCRA_SDAR_MODE_SHIFT) #define MMCRA_SDAR_MODE_NO_UPDATES ~(0x3ull << MMCRA_SDAR_MODE_SHIFT) +#define MMCRA_SDAR_MODE_DCACHE (2ull << MMCRA_SDAR_MODE_SHIFT) #define MMCRA_IFM_SHIFT 30 #define MMCRA_THR_CTR_MANT_SHIFT 19 #define MMCRA_THR_CTR_MANT_MASK 0x7Ful @@ -287,8 +288,8 @@ int isa207_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[]); void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]); -int isa207_get_alternatives(u64 event, u64 alt[], - const unsigned int ev_alt[][MAX_ALT], int size); +int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags, + const unsigned int ev_alt[][MAX_ALT]); void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags, struct pt_regs *regs); void isa207_get_mem_weight(u64 *weight); diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 5463516e369b..c9356955cab4 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -50,34 +50,11 @@ static const unsigned int event_alternatives[][MAX_ALT] = { static int power8_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { - int i, j, num_alt = 0; - - num_alt = isa207_get_alternatives(event, alt, event_alternatives, - (int)ARRAY_SIZE(event_alternatives)); - if (flags & PPMU_ONLY_COUNT_RUN) { - /* - * We're only counting in RUN state, so PM_CYC is equivalent to - * PM_RUN_CYC and PM_INST_CMPL === PM_RUN_INST_CMPL. - */ - j = num_alt; - for (i = 0; i < num_alt; ++i) { - switch (alt[i]) { - case PM_CYC: - alt[j++] = PM_RUN_CYC; - break; - case PM_RUN_CYC: - alt[j++] = PM_CYC; - break; - case PM_INST_CMPL: - alt[j++] = PM_RUN_INST_CMPL; - break; - case PM_RUN_INST_CMPL: - alt[j++] = PM_INST_CMPL; - break; - } - } - num_alt = j; - } + int num_alt = 0; + + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(event_alternatives), flags, + event_alternatives); return num_alt; } diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index 50689180a6c1..e99c6bf4d391 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -16,13 +16,16 @@ EVENT(PM_CYC, 0x0001e) EVENT(PM_ICT_NOSLOT_CYC, 0x100f8) EVENT(PM_CMPLU_STALL, 0x1e054) EVENT(PM_INST_CMPL, 0x00002) -EVENT(PM_BRU_CMPL, 0x4d05e) +EVENT(PM_BR_CMPL, 0x4d05e) EVENT(PM_BR_MPRED_CMPL, 0x400f6) /* All L1 D cache load references counted at finish, gated by reject */ EVENT(PM_LD_REF_L1, 0x100fc) /* Load Missed L1 */ EVENT(PM_LD_MISS_L1_FIN, 0x2c04e) +EVENT(PM_LD_MISS_L1, 0x3e054) +/* Alternate event code for PM_LD_MISS_L1 */ +EVENT(PM_LD_MISS_L1_ALT, 0x400f0) /* Store Missed L1 */ EVENT(PM_ST_MISS_L1, 0x300f0) /* L1 cache data prefetches */ @@ -62,3 +65,7 @@ EVENT(PM_INST_DISP, 0x200f2) EVENT(PM_INST_DISP_ALT, 0x300f2) /* Alternate Branch event code */ EVENT(PM_BR_CMPL_ALT, 0x10012) +/* Branch event that are not strongly biased */ +EVENT(PM_BR_2PATH, 0x20036) +/* ALternate branch event that are not strongly biased */ +EVENT(PM_BR_2PATH_ALT, 0x40036) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index 2280cf87ff9c..24b5b5b7a206 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -109,14 +109,17 @@ static const unsigned int power9_event_alternatives[][MAX_ALT] = { { PM_INST_DISP, PM_INST_DISP_ALT }, { PM_RUN_CYC_ALT, PM_RUN_CYC }, { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, + { PM_LD_MISS_L1, PM_LD_MISS_L1_ALT }, + { PM_BR_2PATH, PM_BR_2PATH_ALT }, }; static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) { int num_alt = 0; - num_alt = isa207_get_alternatives(event, alt, power9_event_alternatives, - (int)ARRAY_SIZE(power9_event_alternatives)); + num_alt = isa207_get_alternatives(event, alt, + ARRAY_SIZE(power9_event_alternatives), flags, + power9_event_alternatives); return num_alt; } @@ -125,7 +128,7 @@ GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC); GENERIC_EVENT_ATTR(stalled-cycles-frontend, PM_ICT_NOSLOT_CYC); GENERIC_EVENT_ATTR(stalled-cycles-backend, PM_CMPLU_STALL); GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL); -GENERIC_EVENT_ATTR(branch-instructions, PM_BRU_CMPL); +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_CMPL); GENERIC_EVENT_ATTR(branch-misses, PM_BR_MPRED_CMPL); GENERIC_EVENT_ATTR(cache-references, PM_LD_REF_L1); GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1_FIN); @@ -143,7 +146,7 @@ CACHE_EVENT_ATTR(LLC-prefetches, PM_L3_PREF_ALL); CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS); CACHE_EVENT_ATTR(LLC-stores, PM_L2_ST); CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL); -CACHE_EVENT_ATTR(branch-loads, PM_BRU_CMPL); +CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL); CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS); CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS); @@ -152,7 +155,7 @@ static struct attribute *power9_events_attr[] = { GENERIC_EVENT_PTR(PM_ICT_NOSLOT_CYC), GENERIC_EVENT_PTR(PM_CMPLU_STALL), GENERIC_EVENT_PTR(PM_INST_CMPL), - GENERIC_EVENT_PTR(PM_BRU_CMPL), + GENERIC_EVENT_PTR(PM_BR_CMPL), GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL), GENERIC_EVENT_PTR(PM_LD_REF_L1), GENERIC_EVENT_PTR(PM_LD_MISS_L1_FIN), @@ -169,7 +172,7 @@ static struct attribute *power9_events_attr[] = { CACHE_EVENT_PTR(PM_L2_ST_MISS), CACHE_EVENT_PTR(PM_L2_ST), CACHE_EVENT_PTR(PM_BR_MPRED_CMPL), - CACHE_EVENT_PTR(PM_BRU_CMPL), + CACHE_EVENT_PTR(PM_BR_CMPL), CACHE_EVENT_PTR(PM_DTLB_MISS), CACHE_EVENT_PTR(PM_ITLB_MISS), NULL @@ -244,7 +247,7 @@ static int power9_generic_events[] = { [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = PM_ICT_NOSLOT_CYC, [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = PM_CMPLU_STALL, [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BRU_CMPL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = PM_BR_CMPL, [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL, [PERF_COUNT_HW_CACHE_REFERENCES] = PM_LD_REF_L1, [PERF_COUNT_HW_CACHE_MISSES] = PM_LD_MISS_L1_FIN, @@ -370,7 +373,7 @@ static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { }, [ C(BPU) ] = { [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = PM_BRU_CMPL, + [ C(RESULT_ACCESS) ] = PM_BR_CMPL, [ C(RESULT_MISS) ] = PM_BR_MPRED_CMPL, }, [ C(OP_WRITE) ] = { @@ -459,8 +462,8 @@ static int __init init_power9_pmu(void) * Power9 DD1 should use PM_BR_CMPL_ALT event code for * "branches" to provide correct counter value. */ - EVENT_VAR(PM_BRU_CMPL, _g).id = PM_BR_CMPL_ALT; - EVENT_VAR(PM_BRU_CMPL, _c).id = PM_BR_CMPL_ALT; + EVENT_VAR(PM_BR_CMPL, _g).id = PM_BR_CMPL_ALT; + EVENT_VAR(PM_BR_CMPL, _c).id = PM_BR_CMPL_ALT; rc = register_power_pmu(&power9_isa207_pmu); } else { rc = register_power_pmu(&power9_pmu); diff --git a/arch/powerpc/platforms/44x/Makefile b/arch/powerpc/platforms/44x/Makefile index 72b824160660..2c5651992369 100644 --- a/arch/powerpc/platforms/44x/Makefile +++ b/arch/powerpc/platforms/44x/Makefile @@ -1,6 +1,6 @@ -obj-$(CONFIG_44x) += misc_44x.o +obj-y += misc_44x.o machine_check.o ifneq ($(CONFIG_PPC4xx_CPM),y) -obj-$(CONFIG_44x) += idle.o +obj-y += idle.o endif obj-$(CONFIG_PPC44x_SIMPLE) += ppc44x_simple.o obj-$(CONFIG_EBONY) += ebony.o diff --git a/arch/powerpc/platforms/44x/machine_check.c b/arch/powerpc/platforms/44x/machine_check.c new file mode 100644 index 000000000000..034d70d6d335 --- /dev/null +++ b/arch/powerpc/platforms/44x/machine_check.c @@ -0,0 +1,89 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/ptrace.h> + +#include <asm/reg.h> + +int machine_check_440A(struct pt_regs *regs) +{ + unsigned long reason = regs->dsisr; + + printk("Machine check in kernel mode.\n"); + if (reason & ESR_IMCP){ + printk("Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } + else { + u32 mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk("Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk("Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk("Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk("TLB Parity Error\n"); + if (mcsr & MCSR_ICP){ + flush_instruction_cache(); + printk("I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk("D-Cache Search Parity Error\n"); + if (mcsr & MCSR_DCFP) + printk("D-Cache Flush Parity Error\n"); + if (mcsr & MCSR_IMPE) + printk("Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + } + return 0; +} + +#ifdef CONFIG_PPC_47x +int machine_check_47x(struct pt_regs *regs) +{ + unsigned long reason = regs->dsisr; + u32 mcsr; + + printk(KERN_ERR "Machine check in kernel mode.\n"); + if (reason & ESR_IMCP) { + printk(KERN_ERR "Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + return 0; + } + mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk(KERN_ERR "Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk(KERN_ERR "Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk(KERN_ERR "Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk(KERN_ERR "TLB Parity Error\n"); + if (mcsr & MCSR_ICP) { + flush_instruction_cache(); + printk(KERN_ERR "I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk(KERN_ERR "D-Cache Search Parity Error\n"); + if (mcsr & PPC47x_MCSR_GPR) + printk(KERN_ERR "GPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_FPR) + printk(KERN_ERR "FPR Parity Error\n"); + if (mcsr & PPC47x_MCSR_IPR) + printk(KERN_ERR "Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + + return 0; +} +#endif /* CONFIG_PPC_47x */ diff --git a/arch/powerpc/platforms/4xx/Makefile b/arch/powerpc/platforms/4xx/Makefile new file mode 100644 index 000000000000..9779c32db34e --- /dev/null +++ b/arch/powerpc/platforms/4xx/Makefile @@ -0,0 +1,8 @@ +obj-y += uic.o machine_check.o +obj-$(CONFIG_PPC4xx_OCM) += ocm.o +obj-$(CONFIG_4xx_SOC) += soc.o +obj-$(CONFIG_PCI) += pci.o +obj-$(CONFIG_PPC4xx_HSTA_MSI) += hsta_msi.o +obj-$(CONFIG_PPC4xx_MSI) += msi.o +obj-$(CONFIG_PPC4xx_CPM) += cpm.o +obj-$(CONFIG_PPC4xx_GPIO) += gpio.o diff --git a/arch/powerpc/sysdev/ppc4xx_cpm.c b/arch/powerpc/platforms/4xx/cpm.c index ba95adf81d8d..ba95adf81d8d 100644 --- a/arch/powerpc/sysdev/ppc4xx_cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c diff --git a/arch/powerpc/sysdev/ppc4xx_gpio.c b/arch/powerpc/platforms/4xx/gpio.c index 5382d04dd872..5382d04dd872 100644 --- a/arch/powerpc/sysdev/ppc4xx_gpio.c +++ b/arch/powerpc/platforms/4xx/gpio.c diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c index 9926ad67af76..9926ad67af76 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/platforms/4xx/hsta_msi.c diff --git a/arch/powerpc/platforms/4xx/machine_check.c b/arch/powerpc/platforms/4xx/machine_check.c new file mode 100644 index 000000000000..aa039dfaf82f --- /dev/null +++ b/arch/powerpc/platforms/4xx/machine_check.c @@ -0,0 +1,26 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/ptrace.h> + +#include <asm/reg.h> + +int machine_check_4xx(struct pt_regs *regs) +{ + unsigned long reason = regs->dsisr; + + if (reason & ESR_IMCP) { + printk("Instruction"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } else + printk("Data"); + printk(" machine check in kernel mode.\n"); + + return 0; +} diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/platforms/4xx/msi.c index 590dab4f47d6..590dab4f47d6 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/platforms/4xx/msi.c diff --git a/arch/powerpc/sysdev/ppc4xx_ocm.c b/arch/powerpc/platforms/4xx/ocm.c index 85d9e37f5ccb..85d9e37f5ccb 100644 --- a/arch/powerpc/sysdev/ppc4xx_ocm.c +++ b/arch/powerpc/platforms/4xx/ocm.c diff --git a/arch/powerpc/sysdev/ppc4xx_pci.c b/arch/powerpc/platforms/4xx/pci.c index 086aca69ecae..6713edc93a55 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.c +++ b/arch/powerpc/platforms/4xx/pci.c @@ -32,7 +32,7 @@ #include <asm/dcr-regs.h> #include <mm/mmu_decl.h> -#include "ppc4xx_pci.h" +#include "pci.h" static int dma_offset_set; diff --git a/arch/powerpc/sysdev/ppc4xx_pci.h b/arch/powerpc/platforms/4xx/pci.h index bb4821938ab1..bb4821938ab1 100644 --- a/arch/powerpc/sysdev/ppc4xx_pci.h +++ b/arch/powerpc/platforms/4xx/pci.h diff --git a/arch/powerpc/sysdev/ppc4xx_soc.c b/arch/powerpc/platforms/4xx/soc.c index d41134d2f786..d41134d2f786 100644 --- a/arch/powerpc/sysdev/ppc4xx_soc.c +++ b/arch/powerpc/platforms/4xx/soc.c diff --git a/arch/powerpc/sysdev/uic.c b/arch/powerpc/platforms/4xx/uic.c index a00949f3e378..a00949f3e378 100644 --- a/arch/powerpc/sysdev/uic.c +++ b/arch/powerpc/platforms/4xx/uic.c diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig index 80cbcb0ad9b1..536b0c5d5ce3 100644 --- a/arch/powerpc/platforms/8xx/Kconfig +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -5,7 +5,6 @@ config CPM1 choice prompt "8xx Machine Type" depends on PPC_8xx - depends on 8xx default MPC885ADS config MPC8XXFADS @@ -92,7 +91,7 @@ endmenu # menu "MPC8xx CPM Options" - depends on 8xx + depends on PPC_8xx # This doesn't really belong here, but it is convenient to ask # 8xx specific questions. diff --git a/arch/powerpc/platforms/8xx/Makefile b/arch/powerpc/platforms/8xx/Makefile index 76a81c3350a8..f9af3218bd9c 100644 --- a/arch/powerpc/platforms/8xx/Makefile +++ b/arch/powerpc/platforms/8xx/Makefile @@ -1,7 +1,7 @@ # # Makefile for the PowerPC 8xx linux kernel. # -obj-$(CONFIG_PPC_8xx) += m8xx_setup.o +obj-y += m8xx_setup.o machine_check.o pic.o obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o obj-$(CONFIG_MPC86XADS) += mpc86xads_setup.o obj-$(CONFIG_PPC_EP88XC) += ep88xc.o diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index f81069f79a94..1917d69f84df 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -23,7 +23,7 @@ #include <asm/fs_pd.h> #include <mm/mmu_decl.h> -#include <sysdev/mpc8xx_pic.h> +#include "pic.h" #include "mpc8xx.h" diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c new file mode 100644 index 000000000000..402016705a39 --- /dev/null +++ b/arch/powerpc/platforms/8xx/machine_check.c @@ -0,0 +1,37 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/ptrace.h> + +#include <asm/reg.h> + +int machine_check_8xx(struct pt_regs *regs) +{ + unsigned long reason = regs->msr; + + pr_err("Machine check in kernel mode.\n"); + pr_err("Caused by (from SRR1=%lx): ", reason); + if (reason & 0x40000000) + pr_err("Fetch error at address %lx\n", regs->nip); + else + pr_err("Data access error at address %lx\n", regs->dar); + +#ifdef CONFIG_PCI + /* the qspan pci read routines can cause machine checks -- Cort + * + * yuck !!! that totally needs to go away ! There are better ways + * to deal with that than having a wart in the mcheck handler. + * -- BenH + */ + bad_page_fault(regs, regs->dar, SIGBUS); + return 1; +#else + return 0; +#endif +} diff --git a/arch/powerpc/sysdev/mpc8xx_pic.c b/arch/powerpc/platforms/8xx/pic.c index 2842f9d63d21..8d5a25d43ef3 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.c +++ b/arch/powerpc/platforms/8xx/pic.c @@ -9,7 +9,7 @@ #include <asm/io.h> #include <asm/8xx_immap.h> -#include "mpc8xx_pic.h" +#include "pic.h" #define PIC_VEC_SPURRIOUS 15 diff --git a/arch/powerpc/sysdev/mpc8xx_pic.h b/arch/powerpc/platforms/8xx/pic.h index 9fe00eebdc8b..9fe00eebdc8b 100644 --- a/arch/powerpc/sysdev/mpc8xx_pic.h +++ b/arch/powerpc/platforms/8xx/pic.h diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 2f629e0551e9..13663efc1d31 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -32,7 +32,6 @@ config PPC_85xx config PPC_8xx bool "Freescale 8xx" select FSL_SOC - select 8xx select PPC_LIB_RHEAP select SYS_SUPPORTS_HUGETLBFS @@ -149,10 +148,6 @@ config 6xx depends on PPC32 && PPC_BOOK3S select PPC_HAVE_PMU_SUPPORT -# this is temp to handle compat with arch=ppc -config 8xx - bool - config E500 select FSL_EMB_PERFMON select PPC_FSL_BOOK3E @@ -271,44 +266,6 @@ config VSX If in doubt, say Y here. -config PPC_ICSWX - bool "Support for PowerPC icswx coprocessor instruction" - depends on PPC_BOOK3S_64 - default n - ---help--- - - This option enables kernel support for the PowerPC Initiate - Coprocessor Store Word (icswx) coprocessor instruction on POWER7 - and POWER8 processors. POWER9 uses new copy/paste instructions - to invoke the coprocessor. - - This option is only useful if you have a processor that supports - the icswx coprocessor instruction. It does not have any effect - on processors without the icswx coprocessor instruction. - - This option slightly increases kernel memory usage. - - If in doubt, say N here. - -config PPC_ICSWX_PID - bool "icswx requires direct PID management" - depends on PPC_ICSWX - default y - ---help--- - The PID register in server is used explicitly for ICSWX. In - embedded systems PID management is done by the system. - -config PPC_ICSWX_USE_SIGILL - bool "Should a bad CT cause a SIGILL?" - depends on PPC_ICSWX - default n - ---help--- - Should a bad CT used for "non-record form ICSWX" cause an - illegal instruction signal or should it be silent as - architected. - - If in doubt, say N here. - config SPE_POSSIBLE def_bool y depends on E200 || (E500 && !PPC_E500MC) @@ -413,7 +370,7 @@ config NR_CPUS config NOT_COHERENT_CACHE bool - depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON + depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON default n if PPC_47x default y diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 469ef170d218..d7a55ecfaee5 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -5,6 +5,7 @@ obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o obj-$(CONFIG_PPC_PMAC) += powermac/ obj-$(CONFIG_PPC_CHRP) += chrp/ +obj-$(CONFIG_4xx) += 4xx/ obj-$(CONFIG_40x) += 40x/ obj-$(CONFIG_44x) += 44x/ obj-$(CONFIG_PPC_MPC512x) += 512x/ diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c index 2b4dc6abde6c..19760712b39d 100644 --- a/arch/powerpc/platforms/chrp/pegasos_eth.c +++ b/arch/powerpc/platforms/chrp/pegasos_eth.c @@ -63,7 +63,7 @@ static struct platform_device mv643xx_eth_mvmdio_device = { .name = "orion-mdio", .id = -1, .num_resources = ARRAY_SIZE(mv643xx_eth_mvmdio_resources), - .resource = mv643xx_eth_shared_resources, + .resource = mv643xx_eth_mvmdio_resources, }; static struct resource mv643xx_eth_port1_resources[] = { diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index b5d98cb3f482..177b3d4542b5 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -2,7 +2,7 @@ obj-y += setup.o opal-wrappers.o opal.o opal-async.o idle.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-y += opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o -obj-y += opal-kmsg.o +obj-y += opal-kmsg.o opal-powercap.o opal-psr.o opal-sensor-groups.o obj-$(CONFIG_SMP) += smp.o subcore.o subcore-asm.o obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o @@ -12,3 +12,4 @@ obj-$(CONFIG_PPC_SCOM) += opal-xscom.o obj-$(CONFIG_MEMORY_FAILURE) += opal-memory-errors.o obj-$(CONFIG_TRACEPOINTS) += opal-tracepoints.o obj-$(CONFIG_OPAL_PRD) += opal-prd.o +obj-$(CONFIG_PERF_EVENTS) += opal-imc.o diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index 2abee070373f..a1296e72cd2e 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -68,7 +68,7 @@ static int pnv_save_sprs_for_deep_states(void) * all cpus at boot. Get these reg values of current cpu and use the * same across all cpus. */ - uint64_t lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; + uint64_t lpcr_val = mfspr(SPRN_LPCR); uint64_t hid0_val = mfspr(SPRN_HID0); uint64_t hid1_val = mfspr(SPRN_HID1); uint64_t hid4_val = mfspr(SPRN_HID4); @@ -355,6 +355,14 @@ void power9_idle(void) } #ifdef CONFIG_HOTPLUG_CPU +static void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val) +{ + u64 pir = get_hard_smp_processor_id(cpu); + + mtspr(SPRN_LPCR, lpcr_val); + opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val); +} + /* * pnv_cpu_offline: A function that puts the CPU into the deepest * available platform idle state on a CPU-Offline. @@ -364,6 +372,20 @@ unsigned long pnv_cpu_offline(unsigned int cpu) { unsigned long srr1; u32 idle_states = pnv_get_supported_cpuidle_states(); + u64 lpcr_val; + + /* + * We don't want to take decrementer interrupts while we are + * offline, so clear LPCR:PECE1. We keep PECE2 (and + * LPCR_PECE_HVEE on P9) enabled as to let IPIs in. + * + * If the CPU gets woken up by a special wakeup, ensure that + * the SLW engine sets LPCR with decrementer bit cleared, else + * the CPU will come back to the kernel due to a spurious + * wakeup. + */ + lpcr_val = mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1; + pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); __ppc64_runlatch_off(); @@ -394,6 +416,16 @@ unsigned long pnv_cpu_offline(unsigned int cpu) __ppc64_runlatch_on(); + /* + * Re-enable decrementer interrupts in LPCR. + * + * Further, we want stop states to be woken up by decrementer + * for non-hotplug cases. So program the LPCR via stop api as + * well. + */ + lpcr_val = mfspr(SPRN_LPCR) | (u64)LPCR_PECE1; + pnv_program_cpu_hotplug_lpcr(cpu, lpcr_val); + return srr1; } #endif diff --git a/arch/powerpc/platforms/powernv/opal-flash.c b/arch/powerpc/platforms/powernv/opal-flash.c index 4ec6219287fc..2fa3ac80cb4e 100644 --- a/arch/powerpc/platforms/powernv/opal-flash.c +++ b/arch/powerpc/platforms/powernv/opal-flash.c @@ -520,7 +520,7 @@ out: * update_flash : Flash new firmware image * */ -static struct bin_attribute image_data_attr = { +static const struct bin_attribute image_data_attr = { .attr = {.name = "image", .mode = 0200}, .size = MAX_IMAGE_SIZE, /* Limit image size */ .write = image_data_write, diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c new file mode 100644 index 000000000000..b903bf5e6006 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -0,0 +1,226 @@ +/* + * OPAL IMC interface detection driver + * Supported on POWERNV platform + * + * Copyright (C) 2017 Madhavan Srinivasan, IBM Corporation. + * (C) 2017 Anju T Sudhakar, IBM Corporation. + * (C) 2017 Hemant K Shaw, IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or later version. + */ +#include <linux/kernel.h> +#include <linux/platform_device.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_platform.h> +#include <linux/crash_dump.h> +#include <asm/opal.h> +#include <asm/io.h> +#include <asm/imc-pmu.h> +#include <asm/cputhreads.h> + +/* + * imc_get_mem_addr_nest: Function to get nest counter memory region + * for each chip + */ +static int imc_get_mem_addr_nest(struct device_node *node, + struct imc_pmu *pmu_ptr, + u32 offset) +{ + int nr_chips = 0, i; + u64 *base_addr_arr, baddr; + u32 *chipid_arr; + + nr_chips = of_property_count_u32_elems(node, "chip-id"); + if (nr_chips <= 0) + return -ENODEV; + + base_addr_arr = kcalloc(nr_chips, sizeof(u64), GFP_KERNEL); + if (!base_addr_arr) + return -ENOMEM; + + chipid_arr = kcalloc(nr_chips, sizeof(u32), GFP_KERNEL); + if (!chipid_arr) + return -ENOMEM; + + if (of_property_read_u32_array(node, "chip-id", chipid_arr, nr_chips)) + goto error; + + if (of_property_read_u64_array(node, "base-addr", base_addr_arr, + nr_chips)) + goto error; + + pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(struct imc_mem_info), + GFP_KERNEL); + if (!pmu_ptr->mem_info) + goto error; + + for (i = 0; i < nr_chips; i++) { + pmu_ptr->mem_info[i].id = chipid_arr[i]; + baddr = base_addr_arr[i] + offset; + pmu_ptr->mem_info[i].vbase = phys_to_virt(baddr); + } + + pmu_ptr->imc_counter_mmaped = true; + kfree(base_addr_arr); + kfree(chipid_arr); + return 0; + +error: + kfree(pmu_ptr->mem_info); + kfree(base_addr_arr); + kfree(chipid_arr); + return -1; +} + +/* + * imc_pmu_create : Takes the parent device which is the pmu unit, pmu_index + * and domain as the inputs. + * Allocates memory for the struct imc_pmu, sets up its domain, size and offsets + */ +static int imc_pmu_create(struct device_node *parent, int pmu_index, int domain) +{ + int ret = 0; + struct imc_pmu *pmu_ptr; + u32 offset; + + /* memory for pmu */ + pmu_ptr = kzalloc(sizeof(struct imc_pmu), GFP_KERNEL); + if (!pmu_ptr) + return -ENOMEM; + + /* Set the domain */ + pmu_ptr->domain = domain; + + ret = of_property_read_u32(parent, "size", &pmu_ptr->counter_mem_size); + if (ret) { + ret = -EINVAL; + goto free_pmu; + } + + if (!of_property_read_u32(parent, "offset", &offset)) { + if (imc_get_mem_addr_nest(parent, pmu_ptr, offset)) { + ret = -EINVAL; + goto free_pmu; + } + } + + /* Function to register IMC pmu */ + ret = init_imc_pmu(parent, pmu_ptr, pmu_index); + if (ret) + pr_err("IMC PMU %s Register failed\n", pmu_ptr->pmu.name); + + return 0; + +free_pmu: + kfree(pmu_ptr); + return ret; +} + +static void disable_nest_pmu_counters(void) +{ + int nid, cpu; + struct cpumask *l_cpumask; + + get_online_cpus(); + for_each_online_node(nid) { + l_cpumask = cpumask_of_node(nid); + cpu = cpumask_first(l_cpumask); + opal_imc_counters_stop(OPAL_IMC_COUNTERS_NEST, + get_hard_smp_processor_id(cpu)); + } + put_online_cpus(); +} + +static void disable_core_pmu_counters(void) +{ + cpumask_t cores_map; + int cpu, rc; + + get_online_cpus(); + /* Disable the IMC Core functions */ + cores_map = cpu_online_cores_map(); + for_each_cpu(cpu, &cores_map) { + rc = opal_imc_counters_stop(OPAL_IMC_COUNTERS_CORE, + get_hard_smp_processor_id(cpu)); + if (rc) + pr_err("%s: Failed to stop Core (cpu = %d)\n", + __FUNCTION__, cpu); + } + put_online_cpus(); +} + +static int opal_imc_counters_probe(struct platform_device *pdev) +{ + struct device_node *imc_dev = pdev->dev.of_node; + int pmu_count = 0, domain; + u32 type; + + /* + * Check whether this is kdump kernel. If yes, force the engines to + * stop and return. + */ + if (is_kdump_kernel()) { + disable_nest_pmu_counters(); + disable_core_pmu_counters(); + return -ENODEV; + } + + for_each_compatible_node(imc_dev, NULL, IMC_DTB_UNIT_COMPAT) { + if (of_property_read_u32(imc_dev, "type", &type)) { + pr_warn("IMC Device without type property\n"); + continue; + } + + switch (type) { + case IMC_TYPE_CHIP: + domain = IMC_DOMAIN_NEST; + break; + case IMC_TYPE_CORE: + domain =IMC_DOMAIN_CORE; + break; + case IMC_TYPE_THREAD: + domain = IMC_DOMAIN_THREAD; + break; + default: + pr_warn("IMC Unknown Device type \n"); + domain = -1; + break; + } + + if (!imc_pmu_create(imc_dev, pmu_count, domain)) + pmu_count++; + } + + return 0; +} + +static void opal_imc_counters_shutdown(struct platform_device *pdev) +{ + /* + * Function only stops the engines which is bare minimum. + * TODO: Need to handle proper memory cleanup and pmu + * unregister. + */ + disable_nest_pmu_counters(); + disable_core_pmu_counters(); +} + +static const struct of_device_id opal_imc_match[] = { + { .compatible = IMC_DTB_COMPAT }, + {}, +}; + +static struct platform_driver opal_imc_driver = { + .driver = { + .name = "opal-imc-counters", + .of_match_table = opal_imc_match, + }, + .probe = opal_imc_counters_probe, + .shutdown = opal_imc_counters_shutdown, +}; + +builtin_platform_driver(opal_imc_driver); diff --git a/arch/powerpc/platforms/powernv/opal-powercap.c b/arch/powerpc/platforms/powernv/opal-powercap.c new file mode 100644 index 000000000000..badb29bde93f --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-powercap.c @@ -0,0 +1,244 @@ +/* + * PowerNV OPAL Powercap interface + * + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "opal-powercap: " fmt + +#include <linux/of.h> +#include <linux/kobject.h> +#include <linux/slab.h> + +#include <asm/opal.h> + +DEFINE_MUTEX(powercap_mutex); + +static struct kobject *powercap_kobj; + +struct powercap_attr { + u32 handle; + struct kobj_attribute attr; +}; + +static struct pcap { + struct attribute_group pg; + struct powercap_attr *pattrs; +} *pcaps; + +static ssize_t powercap_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct powercap_attr *pcap_attr = container_of(attr, + struct powercap_attr, attr); + struct opal_msg msg; + u32 pcap; + int ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&powercap_mutex); + if (ret) + goto out_token; + + ret = opal_get_powercap(pcap_attr->handle, token, (u32 *)__pa(&pcap)); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) { + ret = sprintf(buf, "%u\n", be32_to_cpu(pcap)); + if (ret < 0) + ret = -EIO; + } + break; + case OPAL_SUCCESS: + ret = sprintf(buf, "%u\n", be32_to_cpu(pcap)); + if (ret < 0) + ret = -EIO; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&powercap_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static ssize_t powercap_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, + size_t count) +{ + struct powercap_attr *pcap_attr = container_of(attr, + struct powercap_attr, attr); + struct opal_msg msg; + u32 pcap; + int ret, token; + + ret = kstrtoint(buf, 0, &pcap); + if (ret) + return ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&powercap_mutex); + if (ret) + goto out_token; + + ret = opal_set_powercap(pcap_attr->handle, token, pcap); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&powercap_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static void powercap_add_attr(int handle, const char *name, + struct powercap_attr *attr) +{ + attr->handle = handle; + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = name; + attr->attr.attr.mode = 0444; + attr->attr.show = powercap_show; +} + +void __init opal_powercap_init(void) +{ + struct device_node *powercap, *node; + int i = 0; + + powercap = of_find_compatible_node(NULL, NULL, "ibm,opal-powercap"); + if (!powercap) { + pr_devel("Powercap node not found\n"); + return; + } + + pcaps = kcalloc(of_get_child_count(powercap), sizeof(*pcaps), + GFP_KERNEL); + if (!pcaps) + return; + + powercap_kobj = kobject_create_and_add("powercap", opal_kobj); + if (!powercap_kobj) { + pr_warn("Failed to create powercap kobject\n"); + goto out_pcaps; + } + + i = 0; + for_each_child_of_node(powercap, node) { + u32 cur, min, max; + int j = 0; + bool has_cur = false, has_min = false, has_max = false; + + if (!of_property_read_u32(node, "powercap-min", &min)) { + j++; + has_min = true; + } + + if (!of_property_read_u32(node, "powercap-max", &max)) { + j++; + has_max = true; + } + + if (!of_property_read_u32(node, "powercap-current", &cur)) { + j++; + has_cur = true; + } + + pcaps[i].pattrs = kcalloc(j, sizeof(struct powercap_attr), + GFP_KERNEL); + if (!pcaps[i].pattrs) + goto out_pcaps_pattrs; + + pcaps[i].pg.attrs = kcalloc(j + 1, sizeof(struct attribute *), + GFP_KERNEL); + if (!pcaps[i].pg.attrs) { + kfree(pcaps[i].pattrs); + goto out_pcaps_pattrs; + } + + j = 0; + pcaps[i].pg.name = node->name; + if (has_min) { + powercap_add_attr(min, "powercap-min", + &pcaps[i].pattrs[j]); + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (has_max) { + powercap_add_attr(max, "powercap-max", + &pcaps[i].pattrs[j]); + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (has_cur) { + powercap_add_attr(cur, "powercap-current", + &pcaps[i].pattrs[j]); + pcaps[i].pattrs[j].attr.attr.mode |= 0220; + pcaps[i].pattrs[j].attr.store = powercap_store; + pcaps[i].pg.attrs[j] = &pcaps[i].pattrs[j].attr.attr; + j++; + } + + if (sysfs_create_group(powercap_kobj, &pcaps[i].pg)) { + pr_warn("Failed to create powercap attribute group %s\n", + pcaps[i].pg.name); + goto out_pcaps_pattrs; + } + i++; + } + + return; + +out_pcaps_pattrs: + while (--i >= 0) { + kfree(pcaps[i].pattrs); + kfree(pcaps[i].pg.attrs); + } + kobject_put(powercap_kobj); +out_pcaps: + kfree(pcaps); +} diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 2d6ee1c5ad85..de4dd09f4a15 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -241,15 +241,9 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf, size = be16_to_cpu(hdr.size); - msg = kmalloc(size, GFP_KERNEL); - if (!msg) - return -ENOMEM; - - rc = copy_from_user(msg, buf, size); - if (rc) { - size = -EFAULT; - goto out_free; - } + msg = memdup_user(buf, size); + if (IS_ERR(msg)) + return PTR_ERR(msg); rc = opal_prd_msg(msg); if (rc) { @@ -257,7 +251,6 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf, size = -EIO; } -out_free: kfree(msg); return size; diff --git a/arch/powerpc/platforms/powernv/opal-psr.c b/arch/powerpc/platforms/powernv/opal-psr.c new file mode 100644 index 000000000000..7313b7fc9071 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-psr.c @@ -0,0 +1,175 @@ +/* + * PowerNV OPAL Power-Shift-Ratio interface + * + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "opal-psr: " fmt + +#include <linux/of.h> +#include <linux/kobject.h> +#include <linux/slab.h> + +#include <asm/opal.h> + +DEFINE_MUTEX(psr_mutex); + +static struct kobject *psr_kobj; + +struct psr_attr { + u32 handle; + struct kobj_attribute attr; +} *psr_attrs; + +static ssize_t psr_show(struct kobject *kobj, struct kobj_attribute *attr, + char *buf) +{ + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr); + struct opal_msg msg; + int psr, ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&psr_mutex); + if (ret) + goto out_token; + + ret = opal_get_power_shift_ratio(psr_attr->handle, token, + (u32 *)__pa(&psr)); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) { + ret = sprintf(buf, "%u\n", be32_to_cpu(psr)); + if (ret < 0) + ret = -EIO; + } + break; + case OPAL_SUCCESS: + ret = sprintf(buf, "%u\n", be32_to_cpu(psr)); + if (ret < 0) + ret = -EIO; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&psr_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static ssize_t psr_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct psr_attr *psr_attr = container_of(attr, struct psr_attr, attr); + struct opal_msg msg; + int psr, ret, token; + + ret = kstrtoint(buf, 0, &psr); + if (ret) + return ret; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&psr_mutex); + if (ret) + goto out_token; + + ret = opal_set_power_shift_ratio(psr_attr->handle, token, psr); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&psr_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +void __init opal_psr_init(void) +{ + struct device_node *psr, *node; + int i = 0; + + psr = of_find_compatible_node(NULL, NULL, + "ibm,opal-power-shift-ratio"); + if (!psr) { + pr_devel("Power-shift-ratio node not found\n"); + return; + } + + psr_attrs = kcalloc(of_get_child_count(psr), sizeof(struct psr_attr), + GFP_KERNEL); + if (!psr_attrs) + return; + + psr_kobj = kobject_create_and_add("psr", opal_kobj); + if (!psr_kobj) { + pr_warn("Failed to create psr kobject\n"); + goto out; + } + + for_each_child_of_node(psr, node) { + if (of_property_read_u32(node, "handle", + &psr_attrs[i].handle)) + goto out_kobj; + + sysfs_attr_init(&psr_attrs[i].attr.attr); + if (of_property_read_string(node, "label", + &psr_attrs[i].attr.attr.name)) + goto out_kobj; + psr_attrs[i].attr.attr.mode = 0664; + psr_attrs[i].attr.show = psr_show; + psr_attrs[i].attr.store = psr_store; + if (sysfs_create_file(psr_kobj, &psr_attrs[i].attr.attr)) { + pr_devel("Failed to create psr sysfs file %s\n", + psr_attrs[i].attr.attr.name); + goto out_kobj; + } + i++; + } + + return; +out_kobj: + kobject_put(psr_kobj); +out: + kfree(psr_attrs); +} diff --git a/arch/powerpc/platforms/powernv/opal-sensor-groups.c b/arch/powerpc/platforms/powernv/opal-sensor-groups.c new file mode 100644 index 000000000000..7e5a235ebf76 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-sensor-groups.c @@ -0,0 +1,212 @@ +/* + * PowerNV OPAL Sensor-groups interface + * + * Copyright 2017 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#define pr_fmt(fmt) "opal-sensor-groups: " fmt + +#include <linux/of.h> +#include <linux/kobject.h> +#include <linux/slab.h> + +#include <asm/opal.h> + +DEFINE_MUTEX(sg_mutex); + +static struct kobject *sg_kobj; + +struct sg_attr { + u32 handle; + struct kobj_attribute attr; +}; + +static struct sensor_group { + char name[20]; + struct attribute_group sg; + struct sg_attr *sgattrs; +} *sgs; + +static ssize_t sg_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct sg_attr *sattr = container_of(attr, struct sg_attr, attr); + struct opal_msg msg; + u32 data; + int ret, token; + + ret = kstrtoint(buf, 0, &data); + if (ret) + return ret; + + if (data != 1) + return -EINVAL; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_devel("Failed to get token\n"); + return token; + } + + ret = mutex_lock_interruptible(&sg_mutex); + if (ret) + goto out_token; + + ret = opal_sensor_group_clear(sattr->handle, token); + switch (ret) { + case OPAL_ASYNC_COMPLETION: + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_devel("Failed to wait for the async response\n"); + ret = -EIO; + goto out; + } + ret = opal_error_code(opal_get_async_rc(msg)); + if (!ret) + ret = count; + break; + case OPAL_SUCCESS: + ret = count; + break; + default: + ret = opal_error_code(ret); + } + +out: + mutex_unlock(&sg_mutex); +out_token: + opal_async_release_token(token); + return ret; +} + +static struct sg_ops_info { + int opal_no; + const char *attr_name; + ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count); +} ops_info[] = { + { OPAL_SENSOR_GROUP_CLEAR, "clear", sg_store }, +}; + +static void add_attr(int handle, struct sg_attr *attr, int index) +{ + attr->handle = handle; + sysfs_attr_init(&attr->attr.attr); + attr->attr.attr.name = ops_info[index].attr_name; + attr->attr.attr.mode = 0220; + attr->attr.store = ops_info[index].store; +} + +static int add_attr_group(const __be32 *ops, int len, struct sensor_group *sg, + u32 handle) +{ + int i, j; + int count = 0; + + for (i = 0; i < len; i++) + for (j = 0; j < ARRAY_SIZE(ops_info); j++) + if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) { + add_attr(handle, &sg->sgattrs[count], j); + sg->sg.attrs[count] = + &sg->sgattrs[count].attr.attr; + count++; + } + + return sysfs_create_group(sg_kobj, &sg->sg); +} + +static int get_nr_attrs(const __be32 *ops, int len) +{ + int i, j; + int nr_attrs = 0; + + for (i = 0; i < len; i++) + for (j = 0; j < ARRAY_SIZE(ops_info); j++) + if (be32_to_cpu(ops[i]) == ops_info[j].opal_no) + nr_attrs++; + + return nr_attrs; +} + +void __init opal_sensor_groups_init(void) +{ + struct device_node *sg, *node; + int i = 0; + + sg = of_find_compatible_node(NULL, NULL, "ibm,opal-sensor-group"); + if (!sg) { + pr_devel("Sensor groups node not found\n"); + return; + } + + sgs = kcalloc(of_get_child_count(sg), sizeof(*sgs), GFP_KERNEL); + if (!sgs) + return; + + sg_kobj = kobject_create_and_add("sensor_groups", opal_kobj); + if (!sg_kobj) { + pr_warn("Failed to create sensor group kobject\n"); + goto out_sgs; + } + + for_each_child_of_node(sg, node) { + const __be32 *ops; + u32 sgid, len, nr_attrs, chipid; + + ops = of_get_property(node, "ops", &len); + if (!ops) + continue; + + nr_attrs = get_nr_attrs(ops, len); + if (!nr_attrs) + continue; + + sgs[i].sgattrs = kcalloc(nr_attrs, sizeof(struct sg_attr), + GFP_KERNEL); + if (!sgs[i].sgattrs) + goto out_sgs_sgattrs; + + sgs[i].sg.attrs = kcalloc(nr_attrs + 1, + sizeof(struct attribute *), + GFP_KERNEL); + + if (!sgs[i].sg.attrs) { + kfree(sgs[i].sgattrs); + goto out_sgs_sgattrs; + } + + if (of_property_read_u32(node, "sensor-group-id", &sgid)) { + pr_warn("sensor-group-id property not found\n"); + goto out_sgs_sgattrs; + } + + if (!of_property_read_u32(node, "ibm,chip-id", &chipid)) + sprintf(sgs[i].name, "%s%d", node->name, chipid); + else + sprintf(sgs[i].name, "%s", node->name); + + sgs[i].sg.name = sgs[i].name; + if (add_attr_group(ops, len, &sgs[i], sgid)) { + pr_warn("Failed to create sensor attribute group %s\n", + sgs[i].sg.name); + goto out_sgs_sgattrs; + } + i++; + } + + return; + +out_sgs_sgattrs: + while (--i >= 0) { + kfree(sgs[i].sgattrs); + kfree(sgs[i].sg.attrs); + } + kobject_put(sg_kobj); +out_sgs: + kfree(sgs); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 4ca6c26a56d5..951fa93f881d 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -310,3 +310,12 @@ OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP); OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT); OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT); OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR); +OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT); +OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START); +OPAL_CALL(opal_imc_counters_stop, OPAL_IMC_COUNTERS_STOP); +OPAL_CALL(opal_pci_set_p2p, OPAL_PCI_SET_P2P); +OPAL_CALL(opal_get_powercap, OPAL_GET_POWERCAP); +OPAL_CALL(opal_set_powercap, OPAL_SET_POWERCAP); +OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO); +OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index cad6b57ce494..3c122d08b6c3 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -16,6 +16,7 @@ #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/of_platform.h> +#include <linux/of_address.h> #include <linux/interrupt.h> #include <linux/notifier.h> #include <linux/slab.h> @@ -30,6 +31,7 @@ #include <asm/opal.h> #include <asm/firmware.h> #include <asm/mce.h> +#include <asm/imc-pmu.h> #include "powernv.h" @@ -720,6 +722,15 @@ static void opal_pdev_init(const char *compatible) of_platform_device_create(np, NULL, NULL); } +static void __init opal_imc_init_dev(void) +{ + struct device_node *np; + + np = of_find_compatible_node(NULL, NULL, IMC_DTB_COMPAT); + if (np) + of_platform_device_create(np, NULL, NULL); +} + static int kopald(void *unused) { unsigned long timeout = msecs_to_jiffies(opal_heartbeat) + 1; @@ -793,6 +804,9 @@ static int __init opal_init(void) /* Setup a heatbeat thread if requested by OPAL */ opal_init_heartbeat(); + /* Detect In-Memory Collection counters and create devices*/ + opal_imc_init_dev(); + /* Create leds platform devices */ leds = of_find_node_by_path("/ibm,opal/leds"); if (leds) { @@ -836,6 +850,15 @@ static int __init opal_init(void) /* Initialise OPAL kmsg dumper for flushing console on panic */ opal_kmsg_init(); + /* Initialise OPAL powercap interface */ + opal_powercap_init(); + + /* Initialise OPAL Power-Shifting-Ratio interface */ + opal_psr_init(); + + /* Initialise OPAL sensor groups */ + opal_sensor_groups_init(); + return 0; } machine_subsys_initcall(powernv, opal_init); @@ -952,6 +975,7 @@ int opal_error_code(int rc) case OPAL_UNSUPPORTED: return -EIO; case OPAL_HARDWARE: return -EIO; case OPAL_INTERNAL_ERROR: return -EIO; + case OPAL_TIMEOUT: return -ETIMEDOUT; default: pr_err("%s: unexpected OPAL error %d\n", __func__, rc); return -EIO; diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 437613588df1..026a06c51458 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1408,7 +1408,6 @@ m64_failed: static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group, int num); -static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe) { @@ -2394,7 +2393,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group, return 0; } -static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) +void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable) { uint16_t window_id = (pe->pe_number << 1 ) + 1; int64_t rc; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 7905d179d036..5422f4a6317c 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -37,6 +37,8 @@ #include "powernv.h" #include "pci.h" +static DEFINE_MUTEX(p2p_mutex); + int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) { struct device_node *parent = np; @@ -1017,6 +1019,79 @@ void pnv_pci_dma_bus_setup(struct pci_bus *bus) } } +int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target, u64 desc) +{ + struct pci_controller *hose; + struct pnv_phb *phb_init, *phb_target; + struct pnv_ioda_pe *pe_init; + int rc; + + if (!opal_check_token(OPAL_PCI_SET_P2P)) + return -ENXIO; + + hose = pci_bus_to_host(initiator->bus); + phb_init = hose->private_data; + + hose = pci_bus_to_host(target->bus); + phb_target = hose->private_data; + + pe_init = pnv_ioda_get_pe(initiator); + if (!pe_init) + return -ENODEV; + + /* + * Configuring the initiator's PHB requires to adjust its + * TVE#1 setting. Since the same device can be an initiator + * several times for different target devices, we need to keep + * a reference count to know when we can restore the default + * bypass setting on its TVE#1 when disabling. Opal is not + * tracking PE states, so we add a reference count on the PE + * in linux. + * + * For the target, the configuration is per PHB, so we keep a + * target reference count on the PHB. + */ + mutex_lock(&p2p_mutex); + + if (desc & OPAL_PCI_P2P_ENABLE) { + /* always go to opal to validate the configuration */ + rc = opal_pci_set_p2p(phb_init->opal_id, phb_target->opal_id, + desc, pe_init->pe_number); + + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto out; + } + + pe_init->p2p_initiator_count++; + phb_target->p2p_target_count++; + } else { + if (!pe_init->p2p_initiator_count || + !phb_target->p2p_target_count) { + rc = -EINVAL; + goto out; + } + + if (--pe_init->p2p_initiator_count == 0) + pnv_pci_ioda2_set_bypass(pe_init, true); + + if (--phb_target->p2p_target_count == 0) { + rc = opal_pci_set_p2p(phb_init->opal_id, + phb_target->opal_id, desc, + pe_init->pe_number); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto out; + } + } + } + rc = 0; +out: + mutex_unlock(&p2p_mutex); + return rc; +} +EXPORT_SYMBOL_GPL(pnv_pci_set_p2p); + void pnv_pci_shutdown(void) { struct pci_controller *hose; diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h index f16bc403ec03..a95273c524f6 100644 --- a/arch/powerpc/platforms/powernv/pci.h +++ b/arch/powerpc/platforms/powernv/pci.h @@ -78,6 +78,9 @@ struct pnv_ioda_pe { struct pnv_ioda_pe *master; struct list_head slaves; + /* PCI peer-to-peer*/ + int p2p_initiator_count; + /* Link in list of PE#s */ struct list_head list; }; @@ -189,6 +192,7 @@ struct pnv_phb { #ifdef CONFIG_CXL_BASE struct cxl_afu *cxl_afu; #endif + int p2p_target_count; }; extern struct pci_ops pnv_pci_ops; @@ -229,6 +233,7 @@ extern void pnv_teardown_msi_irqs(struct pci_dev *pdev); extern struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev); extern void pnv_set_msi_irq_chip(struct pnv_phb *phb, unsigned int virq); extern bool pnv_pci_enable_device_hook(struct pci_dev *dev); +extern void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable); extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level, const char *fmt, ...); diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 1a9d84371a4d..c5ce3a8bd4c9 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -16,11 +16,13 @@ #include <linux/slab.h> #include <linux/smp.h> #include <asm/archrandom.h> +#include <asm/cputable.h> #include <asm/io.h> #include <asm/prom.h> #include <asm/machdep.h> #include <asm/smp.h> +#define DARN_ERR 0xFFFFFFFFFFFFFFFFul struct powernv_rng { void __iomem *regs; @@ -67,6 +69,41 @@ int powernv_get_random_real_mode(unsigned long *v) return 1; } +int powernv_get_random_darn(unsigned long *v) +{ + unsigned long val; + + /* Using DARN with L=1 - 64-bit conditioned random number */ + asm volatile(PPC_DARN(%0, 1) : "=r"(val)); + + if (val == DARN_ERR) + return 0; + + *v = val; + + return 1; +} + +static int initialise_darn(void) +{ + unsigned long val; + int i; + + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + return -ENODEV; + + for (i = 0; i < 10; i++) { + if (powernv_get_random_darn(&val)) { + ppc_md.get_random_seed = powernv_get_random_darn; + return 0; + } + } + + pr_warn("Unable to use DARN for get_random_seed()\n"); + + return -EIO; +} + int powernv_get_random_long(unsigned long *v) { struct powernv_rng *rng; @@ -150,6 +187,8 @@ static __init int rng_init(void) of_platform_device_create(dn, NULL, NULL); } + initialise_darn(); + return 0; } machine_subsys_initcall(powernv, rng_init); diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 40dae96f7e20..c17f81e433f7 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -57,7 +57,7 @@ static void pnv_smp_setup_cpu(int cpu) static int pnv_smp_kick_cpu(int nr) { - unsigned int pcpu = get_hard_smp_processor_id(nr); + unsigned int pcpu; unsigned long start_here = __pa(ppc_function_entry(generic_secondary_smp_init)); long rc; @@ -66,6 +66,7 @@ static int pnv_smp_kick_cpu(int nr) if (nr < 0 || nr >= nr_cpu_ids) return -EINVAL; + pcpu = get_hard_smp_processor_id(nr); /* * If we already started or OPAL is not supported, we just * kick the CPU via the PACA @@ -164,12 +165,6 @@ static void pnv_smp_cpu_kill_self(void) if (cpu_has_feature(CPU_FTR_ARCH_207S)) wmask = SRR1_WAKEMASK_P8; - /* We don't want to take decrementer interrupts while we are offline, - * so clear LPCR:PECE1. We keep PECE2 (and LPCR_PECE_HVEE on P9) - * enabled as to let IPIs in. - */ - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1); - while (!generic_check_cpu_restart(cpu)) { /* * Clear IPI flag, since we don't handle IPIs while @@ -219,8 +214,6 @@ static void pnv_smp_cpu_kill_self(void) } - /* Re-enable decrementer interrupts */ - mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_PECE1); DBG("CPU%d coming online...\n", cpu); } diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index ca9b2f4aaa22..9e3afd238d34 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -336,7 +336,38 @@ static struct memory_block *lmb_to_memblock(struct of_drconf_cell *lmb) return mem_block; } +static int dlpar_change_lmb_state(struct of_drconf_cell *lmb, bool online) +{ + struct memory_block *mem_block; + int rc; + + mem_block = lmb_to_memblock(lmb); + if (!mem_block) + return -EINVAL; + + if (online && mem_block->dev.offline) + rc = device_online(&mem_block->dev); + else if (!online && !mem_block->dev.offline) + rc = device_offline(&mem_block->dev); + else + rc = 0; + + put_device(&mem_block->dev); + + return rc; +} + +static int dlpar_online_lmb(struct of_drconf_cell *lmb) +{ + return dlpar_change_lmb_state(lmb, true); +} + #ifdef CONFIG_MEMORY_HOTREMOVE +static int dlpar_offline_lmb(struct of_drconf_cell *lmb) +{ + return dlpar_change_lmb_state(lmb, false); +} + static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) { unsigned long block_sz, start_pfn; @@ -431,19 +462,13 @@ static int dlpar_add_lmb(struct of_drconf_cell *); static int dlpar_remove_lmb(struct of_drconf_cell *lmb) { - struct memory_block *mem_block; unsigned long block_sz; int nid, rc; if (!lmb_is_removable(lmb)) return -EINVAL; - mem_block = lmb_to_memblock(lmb); - if (!mem_block) - return -EINVAL; - - rc = device_offline(&mem_block->dev); - put_device(&mem_block->dev); + rc = dlpar_offline_lmb(lmb); if (rc) return rc; @@ -737,20 +762,6 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index, } #endif /* CONFIG_MEMORY_HOTREMOVE */ -static int dlpar_online_lmb(struct of_drconf_cell *lmb) -{ - struct memory_block *mem_block; - int rc; - - mem_block = lmb_to_memblock(lmb); - if (!mem_block) - return -EINVAL; - - rc = device_online(&mem_block->dev); - put_device(&mem_block->dev); - return rc; -} - static int dlpar_add_lmb(struct of_drconf_cell *lmb) { unsigned long block_sz; diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index 164a13d3998a..35c891aabef0 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -229,10 +229,9 @@ static int __init pseries_energy_init(void) int cpu, err; struct device *cpu_dev; - if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY)) { - printk(KERN_INFO "Hypercall H_BEST_ENERGY not supported\n"); - return 0; - } + if (!firmware_has_feature(FW_FEATURE_BEST_ENERGY)) + return 0; /* H_BEST_ENERGY hcall not supported */ + /* Create the sysfs files */ err = device_create_file(cpu_subsys.dev_root, &attr_cpu_activate_hint_list); diff --git a/arch/powerpc/platforms/pseries/reconfig.c b/arch/powerpc/platforms/pseries/reconfig.c index e5bf1e84047f..e6bfff8a20a4 100644 --- a/arch/powerpc/platforms/pseries/reconfig.c +++ b/arch/powerpc/platforms/pseries/reconfig.c @@ -363,20 +363,13 @@ static int do_update_property(char *buf, size_t bufsize) static ssize_t ofdt_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { - int rv = 0; + int rv; char *kbuf; char *tmp; - if (!(kbuf = kmalloc(count + 1, GFP_KERNEL))) { - rv = -ENOMEM; - goto out; - } - if (copy_from_user(kbuf, buf, count)) { - rv = -EFAULT; - goto out; - } - - kbuf[count] = '\0'; + kbuf = memdup_user_nul(buf, count); + if (IS_ERR(kbuf)) + return PTR_ERR(kbuf); tmp = strchr(kbuf, ' '); if (!tmp) { diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index 3696ea6c4826..30277446892c 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -67,7 +67,7 @@ master: mr %r16,%r3 /* save dt address in reg16 */ li %r4,20 LWZX_BE %r6,%r3,%r4 /* fetch __be32 version number at byte 20 */ - cmpwi %r0,%r6,2 /* v2 or later? */ + cmpwi %cr0,%r6,2 /* v2 or later? */ blt 1f li %r4,28 STWX_BE %r17,%r3,%r4 /* Store my cpu as __be32 at byte 28 */ diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile index c0ae11d4f62f..79416fa2e3ba 100644 --- a/arch/powerpc/sysdev/Makefile +++ b/arch/powerpc/sysdev/Makefile @@ -36,25 +36,15 @@ obj-$(CONFIG_AXON_RAM) += axonram.o obj-$(CONFIG_PPC_INDIRECT_PCI) += indirect_pci.o obj-$(CONFIG_PPC_I8259) += i8259.o obj-$(CONFIG_IPIC) += ipic.o -obj-$(CONFIG_4xx) += uic.o -obj-$(CONFIG_PPC4xx_OCM) += ppc4xx_ocm.o -obj-$(CONFIG_4xx_SOC) += ppc4xx_soc.o obj-$(CONFIG_XILINX_VIRTEX) += xilinx_intc.o obj-$(CONFIG_XILINX_PCI) += xilinx_pci.o obj-$(CONFIG_OF_RTC) += of_rtc.o -ifeq ($(CONFIG_PCI),y) -obj-$(CONFIG_4xx) += ppc4xx_pci.o -endif -obj-$(CONFIG_PPC4xx_HSTA_MSI) += ppc4xx_hsta_msi.o -obj-$(CONFIG_PPC4xx_MSI) += ppc4xx_msi.o -obj-$(CONFIG_PPC4xx_CPM) += ppc4xx_cpm.o -obj-$(CONFIG_PPC4xx_GPIO) += ppc4xx_gpio.o obj-$(CONFIG_CPM) += cpm_common.o +obj-$(CONFIG_CPM1) += cpm1.o obj-$(CONFIG_CPM2) += cpm2.o cpm2_pic.o obj-$(CONFIG_QUICC_ENGINE) += cpm_common.o obj-$(CONFIG_PPC_DCR) += dcr.o -obj-$(CONFIG_8xx) += mpc8xx_pic.o cpm1.o obj-$(CONFIG_UCODE_PATCH) += micropatch.o obj-$(CONFIG_PPC_MPC512x) += mpc5xxx_clocks.o diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 19101f9cfcfc..1f614fb2be56 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -98,7 +98,7 @@ u32 fsl_get_sys_freq(void) } EXPORT_SYMBOL(fsl_get_sys_freq); -#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx) +#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) u32 get_brgfreq(void) { diff --git a/arch/powerpc/sysdev/fsl_soc.h b/arch/powerpc/sysdev/fsl_soc.h index d73daa4f0ccf..2640446f8bc4 100644 --- a/arch/powerpc/sysdev/fsl_soc.h +++ b/arch/powerpc/sysdev/fsl_soc.h @@ -7,7 +7,7 @@ struct spi_device; extern phys_addr_t get_immrbase(void); -#if defined(CONFIG_CPM2) || defined(CONFIG_QUICC_ENGINE) || defined(CONFIG_8xx) +#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) extern u32 get_brgfreq(void); extern u32 get_baudrate(void); #else diff --git a/arch/powerpc/sysdev/ipic.c b/arch/powerpc/sysdev/ipic.c index f267ee0afc08..16f1edd78c40 100644 --- a/arch/powerpc/sysdev/ipic.c +++ b/arch/powerpc/sysdev/ipic.c @@ -315,6 +315,7 @@ static struct ipic_info ipic_info[] = { .prio_mask = 7, }, [48] = { + .ack = IPIC_SEPNR, .mask = IPIC_SEMSR, .prio = IPIC_SMPRR_A, .force = IPIC_SEFCR, diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c index 330d56613c5a..f1fe17265981 100644 --- a/arch/powerpc/sysdev/mv64x60_pci.c +++ b/arch/powerpc/sysdev/mv64x60_pci.c @@ -70,7 +70,7 @@ static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj, return count; } -static struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */ +static const struct bin_attribute mv64x60_hs_reg_attr = { /* Hotswap register */ .attr = { .name = "hs_reg", .mode = S_IRUGO | S_IWUSR, diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index 6595462b1fc8..0db4c45bc561 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -190,7 +190,7 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek) * This is used to perform the magic loads from an ESB * described in xive.h */ -static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset) +static notrace u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset) { u64 val; @@ -204,7 +204,7 @@ static u8 xive_poke_esb(struct xive_irq_data *xd, u32 offset) } #ifdef CONFIG_XMON -static void xive_dump_eq(const char *name, struct xive_q *q) +static notrace void xive_dump_eq(const char *name, struct xive_q *q) { u32 i0, i1, idx; @@ -218,7 +218,7 @@ static void xive_dump_eq(const char *name, struct xive_q *q) q->toggle, i0, i1); } -void xmon_xive_do_dump(int cpu) +notrace void xmon_xive_do_dump(int cpu) { struct xive_cpu *xc = per_cpu(xive_cpu, cpu); @@ -672,6 +672,10 @@ static int xive_irq_set_affinity(struct irq_data *d, if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) return -EINVAL; + /* Don't do anything if the interrupt isn't started */ + if (!irqd_is_started(d)) + return IRQ_SET_MASK_OK; + /* * If existing target is already in the new mask, and is * online then do nothing. @@ -1395,8 +1399,8 @@ void xive_shutdown(void) xive_ops->shutdown(); } -bool xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, - u8 max_prio) +bool __init xive_core_init(const struct xive_ops *ops, void __iomem *area, u32 offset, + u8 max_prio) { xive_tima = area; xive_tima_offset = offset; diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c index 0f95476b01f6..1dbf782c9239 100644 --- a/arch/powerpc/sysdev/xive/native.c +++ b/arch/powerpc/sysdev/xive/native.c @@ -531,7 +531,7 @@ u32 xive_native_default_eq_shift(void) } EXPORT_SYMBOL_GPL(xive_native_default_eq_shift); -bool xive_native_init(void) +bool __init xive_native_init(void) { struct device_node *np; struct resource r; diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 0b2f771593eb..1dd88315cff4 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -5,6 +5,10 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror GCOV_PROFILE := n UBSAN_SANITIZE := n +# Disable ftrace for the entire directory +ORIG_CFLAGS := $(KBUILD_CFLAGS) +KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))) + ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y += xmon.o nonstdio.o spr_access.o diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 08e367e3e8c3..d038e7db44c4 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -89,6 +89,7 @@ static unsigned long nidump = 16; static unsigned long ncsum = 4096; static int termch; static char tmpstr[128]; +static int tracing_enabled; static long bus_error_jmp[JMP_BUF_LEN]; static int catch_memory_errors; @@ -234,6 +235,7 @@ Commands:\n\ "\ dr dump stream of raw bytes\n\ dt dump the tracing buffers (uses printk)\n\ + dtc dump the tracing buffers for current CPU (uses printk)\n\ " #ifdef CONFIG_PPC_POWERNV " dx# dump xive on CPU #\n\ @@ -461,6 +463,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi) local_irq_save(flags); hard_irq_disable(); + tracing_enabled = tracing_is_on(); + tracing_off(); + bp = in_breakpoint_table(regs->nip, &offset); if (bp != NULL) { regs->nip = bp->address + offset; @@ -981,6 +986,8 @@ cmds(struct pt_regs *excp) break; case 'x': case 'X': + if (tracing_enabled) + tracing_on(); return cmd; case EOF: printf(" <no input ...>\n"); @@ -2231,6 +2238,17 @@ static void xmon_rawdump (unsigned long adrs, long ndump) printf("\n"); } +static void dump_tracing(void) +{ + int c; + + c = inchar(); + if (c == 'c') + ftrace_dump(DUMP_ORIG); + else + ftrace_dump(DUMP_ALL); +} + #ifdef CONFIG_PPC64 static void dump_one_paca(int cpu) { @@ -2507,6 +2525,11 @@ dump(void) } #endif + if (c == 't') { + dump_tracing(); + return; + } + if (c == '\n') termch = c; @@ -2525,9 +2548,6 @@ dump(void) dump_log_buf(); } else if (c == 'o') { dump_opal_msglog(); - } else if (c == 't') { - ftrace_dump(DUMP_ALL); - tracing_on(); } else if (c == 'r') { scanhex(&ndump); if (ndump == 0) |