diff options
Diffstat (limited to 'arch/powerpc/kernel')
94 files changed, 2493 insertions, 3208 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index d3282fbea4f2..6ac621155ec3 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -3,9 +3,6 @@ # Makefile for the linux kernel. # -ifdef CONFIG_PPC64 -CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) -endif ifdef CONFIG_PPC32 CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC @@ -73,7 +70,7 @@ obj-y := cputable.o syscalls.o switch.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ - of_platform.o prom_parse.o firmware.o \ + prom_parse.o firmware.o \ hw_breakpoint_constraints.o interrupt.o \ kdebugfs.o stacktrace.o syscall.o obj-y += ptrace/ @@ -87,6 +84,7 @@ obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_DAWR) += dawr.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += dexcr.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_64e.o obj-$(CONFIG_PPC_BARRIER_NOSPEC) += security.o @@ -125,7 +123,6 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_PPC64) += head_64.o obj-$(CONFIG_PPC_BOOK3S_32) += head_book3s_32.o -obj-$(CONFIG_40x) += head_40x.o obj-$(CONFIG_44x) += head_44x.o obj-$(CONFIG_PPC_8xx) += head_8xx.o obj-$(CONFIG_PPC_85xx) += head_85xx.o @@ -142,6 +139,7 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_OPTPROBES) += optprobes.o optprobes_head.o obj-$(CONFIG_KPROBES_ON_FTRACE) += kprobes-ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_PPC_UDBG_16550) += legacy_serial.o udbg_16550.o obj-$(CONFIG_SWIOTLB) += dma-swiotlb.o obj-$(CONFIG_ARCH_HAS_DMA_SET_MASK) += dma-mask.o @@ -154,8 +152,6 @@ obj-$(CONFIG_PCI_MSI) += msi.o obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o -obj-$(CONFIG_PPC_IO_WORKAROUNDS) += io-workarounds.o - obj-y += trace/ ifneq ($(CONFIG_PPC_INDIRECT_PIO),y) @@ -190,9 +186,6 @@ GCOV_PROFILE_kprobes-ftrace.o := n KCOV_INSTRUMENT_kprobes-ftrace.o := n KCSAN_SANITIZE_kprobes-ftrace.o := n UBSAN_SANITIZE_kprobes-ftrace.o := n -GCOV_PROFILE_syscall_64.o := n -KCOV_INSTRUMENT_syscall_64.o := n -UBSAN_SANITIZE_syscall_64.o := n UBSAN_SANITIZE_vdso.o := n # Necessary for booting with kcov enabled on book3e machines diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index f029755f9e69..b3048f6d3822 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_PPC32 -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE #include "head_booke.h" #endif #endif @@ -334,7 +334,6 @@ int main(void) #endif /* ! CONFIG_PPC64 */ /* datapage offsets for use by vdso */ - OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); @@ -594,7 +593,6 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); - DEFINE(IPI_PRIORITY, IPI_PRIORITY); OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr); OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar); OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); @@ -674,5 +672,16 @@ int main(void) DEFINE(BPT_SIZE, BPT_SIZE); #endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call); +#endif +#endif + return 0; } diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index f502337dd37d..0fcc463b02e2 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -735,7 +735,7 @@ static const struct sysfs_ops cache_index_ops = { .show = cache_index_show, }; -static struct kobj_type cache_index_type = { +static const struct kobj_type cache_index_type = { .release = cache_index_release, .sysfs_ops = &cache_index_ops, .default_groups = cache_index_default_groups, diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index bfd3f442e5eb..ab3ca74e6730 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -401,7 +401,7 @@ _GLOBAL(__save_cpu_setup) andi. r3,r3,0xff00 cmpwi cr0,r3,0x0200 bne 1f - mfspr r4,SPRN_HID2 + mfspr r4,SPRN_HID2_750FX stw r4,CS_HID2(r5) 1: mtcr r7 @@ -496,7 +496,7 @@ _GLOBAL(__restore_cpu_setup) bne 4f lwz r4,CS_HID2(r5) rlwinm r4,r4,0,19,17 - mtspr SPRN_HID2,r4 + mtspr SPRN_HID2_750FX,r4 sync 4: lwz r4,CS_HID1(r5) diff --git a/arch/powerpc/kernel/cpu_specs.h b/arch/powerpc/kernel/cpu_specs.h index 85ded3f77204..5ea14605bb41 100644 --- a/arch/powerpc/kernel/cpu_specs.h +++ b/arch/powerpc/kernel/cpu_specs.h @@ -1,9 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifdef CONFIG_40x -#include "cpu_specs_40x.h" -#endif - #ifdef CONFIG_PPC_47x #include "cpu_specs_47x.h" #elif defined(CONFIG_44x) diff --git a/arch/powerpc/kernel/cpu_specs_40x.h b/arch/powerpc/kernel/cpu_specs_40x.h deleted file mode 100644 index a1362a75b8c8..000000000000 --- a/arch/powerpc/kernel/cpu_specs_40x.h +++ /dev/null @@ -1,280 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) - */ - -static struct cpu_spec cpu_specs[] __initdata = { - { /* STB 04xxx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41810000, - .cpu_name = "STB04xxx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405L */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41610000, - .cpu_name = "NP405L", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP4GS3 */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x40B10000, - .cpu_name = "NP4GS3", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* NP405H */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41410000, - .cpu_name = "NP405H", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405GPr */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x50910000, - .cpu_name = "405GPr", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* STBx25xx */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51510000, - .cpu_name = "STBx25xx", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405LP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41F10000, - .cpu_name = "405LP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EP */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x51210000, - .cpu_name = "405EP", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. A/B with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910007, - .cpu_name = "405EX Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000d, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000f, - .cpu_name = "405EX Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910003, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EX Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910005, - .cpu_name = "405EX Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. A/B without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910001, - .cpu_name = "405EXr Rev. A/B", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910009, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. C with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x1291000b, - .cpu_name = "405EXr Rev. C", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D without Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910000, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* 405EXr Rev. D with Security */ - .pvr_mask = 0xffff000f, - .pvr_value = 0x12910002, - .cpu_name = "405EXr Rev. D", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { - /* 405EZ */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x41510000, - .cpu_name = "405EZ", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* APM8018X */ - .pvr_mask = 0xffff0000, - .pvr_value = 0x7ff11432, - .cpu_name = "APM8018X", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - }, - { /* default match */ - .pvr_mask = 0x00000000, - .pvr_value = 0x00000000, - .cpu_name = "(generic 40x PPC)", - .cpu_features = CPU_FTRS_40X, - .cpu_user_features = PPC_FEATURE_32 | PPC_FEATURE_HAS_MMU | - PPC_FEATURE_HAS_4xxMAC, - .mmu_features = MMU_FTR_TYPE_40x, - .icache_bsize = 32, - .dcache_bsize = 32, - .machine_check = machine_check_4xx, - .platform = "ppc405", - } -}; diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c index 2086fa6cdc25..103b6605dd68 100644 --- a/arch/powerpc/kernel/crash_dump.c +++ b/arch/powerpc/kernel/crash_dump.c @@ -13,7 +13,7 @@ #include <linux/io.h> #include <linux/memblock.h> #include <linux/of.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/kdump.h> #include <asm/firmware.h> #include <linux/uio.h> diff --git a/arch/powerpc/kernel/dexcr.c b/arch/powerpc/kernel/dexcr.c new file mode 100644 index 000000000000..3a0358e91c60 --- /dev/null +++ b/arch/powerpc/kernel/dexcr.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <linux/capability.h> +#include <linux/cpu.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/sched.h> + +#include <asm/cpu_has_feature.h> +#include <asm/cputable.h> +#include <asm/processor.h> +#include <asm/reg.h> + +static int __init init_task_dexcr(void) +{ + if (!early_cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + + current->thread.dexcr_onexec = mfspr(SPRN_DEXCR); + + return 0; +} +early_initcall(init_task_dexcr) + +/* Allow thread local configuration of these by default */ +#define DEXCR_PRCTL_EDITABLE ( \ + DEXCR_PR_IBRTPD | \ + DEXCR_PR_SRAPD | \ + DEXCR_PR_NPHIE) + +static int prctl_to_aspect(unsigned long which, unsigned int *aspect) +{ + switch (which) { + case PR_PPC_DEXCR_SBHE: + *aspect = DEXCR_PR_SBHE; + break; + case PR_PPC_DEXCR_IBRTPD: + *aspect = DEXCR_PR_IBRTPD; + break; + case PR_PPC_DEXCR_SRAPD: + *aspect = DEXCR_PR_SRAPD; + break; + case PR_PPC_DEXCR_NPHIE: + *aspect = DEXCR_PR_NPHIE; + break; + default: + return -ENODEV; + } + + return 0; +} + +int get_dexcr_prctl(struct task_struct *task, unsigned long which) +{ + unsigned int aspect; + int ret; + + ret = prctl_to_aspect(which, &aspect); + if (ret) + return ret; + + if (aspect & DEXCR_PRCTL_EDITABLE) + ret |= PR_PPC_DEXCR_CTRL_EDITABLE; + + if (aspect & mfspr(SPRN_DEXCR)) + ret |= PR_PPC_DEXCR_CTRL_SET; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR; + + if (aspect & task->thread.dexcr_onexec) + ret |= PR_PPC_DEXCR_CTRL_SET_ONEXEC; + else + ret |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC; + + return ret; +} + +int set_dexcr_prctl(struct task_struct *task, unsigned long which, unsigned long ctrl) +{ + unsigned long dexcr; + unsigned int aspect; + int err = 0; + + err = prctl_to_aspect(which, &aspect); + if (err) + return err; + + if (!(aspect & DEXCR_PRCTL_EDITABLE)) + return -EPERM; + + if (ctrl & ~PR_PPC_DEXCR_CTRL_MASK) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET && ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + return -EINVAL; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC && ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + return -EINVAL; + + /* + * We do not want an unprivileged process being able to disable + * a setuid process's hash check instructions + */ + if (aspect == DEXCR_PR_NPHIE && + ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC && + !capable(CAP_SYS_ADMIN)) + return -EPERM; + + dexcr = mfspr(SPRN_DEXCR); + + if (ctrl & PR_PPC_DEXCR_CTRL_SET) + dexcr |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) + dexcr &= ~aspect; + + if (ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC) + task->thread.dexcr_onexec |= aspect; + else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC) + task->thread.dexcr_onexec &= ~aspect; + + mtspr(SPRN_DEXCR, dexcr); + + return 0; +} diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 8920862ffd79..4d64a5db50f3 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -136,7 +136,7 @@ static bool dma_iommu_bypass_supported(struct device *dev, u64 mask) struct pci_dev *pdev = to_pci_dev(dev); struct pci_controller *phb = pci_bus_to_host(pdev->bus); - if (iommu_fixed_is_weak || !phb->controller_ops.iommu_bypass_supported) + if (!phb->controller_ops.iommu_bypass_supported) return false; return phb->controller_ops.iommu_bypass_supported(pdev, mask); } @@ -216,6 +216,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, + .alloc_pages_op = dma_common_alloc_pages, .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index af4263594eb2..3af6c06af02f 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -867,7 +867,7 @@ bool __init dt_cpu_ftrs_init(void *fdt) using_dt_cpu_ftrs = false; /* Setup and verify the FDT, if it fails we just bail */ - if (!early_init_dt_verify(fdt)) + if (!early_init_dt_verify(fdt, __pa(fdt))) return false; if (!of_scan_flat_dt(fdt_find_cpu_features, NULL)) @@ -1087,12 +1087,10 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char /* Count and allocate space for cpu features */ of_scan_flat_dt_subnodes(node, count_cpufeatures_subnodes, &nr_dt_cpu_features); - dt_cpu_features = memblock_alloc(sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, PAGE_SIZE); - if (!dt_cpu_features) - panic("%s: Failed to allocate %zu bytes align=0x%lx\n", - __func__, - sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, - PAGE_SIZE); + dt_cpu_features = + memblock_alloc_or_panic( + sizeof(struct dt_cpu_feature) * nr_dt_cpu_features, + PAGE_SIZE); cpufeatures_setup_start(isa); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index ab316e155ea9..83fe99861eb1 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -506,9 +506,18 @@ int eeh_dev_check_failure(struct eeh_dev *edev) * We will punt with the following conditions: Failure to get * PE's state, EEH not support and Permanently unavailable * state, PE is in good state. + * + * On the pSeries, after reaching the threshold, get_state might + * return EEH_STATE_NOT_SUPPORT. However, it's possible that the + * device state remains uncleared if the device is not marked + * pci_channel_io_perm_failure. Therefore, consider logging the + * event to let device removal happen. + * */ if ((ret < 0) || - (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { + (ret == EEH_STATE_NOT_SUPPORT && + dev->error_state == pci_channel_io_perm_failure) || + eeh_state_active(ret)) { eeh_stats.false_positives++; pe->false_positives++; rc = 0; @@ -1264,22 +1273,6 @@ EXPORT_SYMBOL(eeh_dev_release); #ifdef CONFIG_IOMMU_API -static int dev_has_iommu_table(struct device *dev, void *data) -{ - struct pci_dev *pdev = to_pci_dev(dev); - struct pci_dev **ppdev = data; - - if (!dev) - return 0; - - if (device_iommu_mapped(dev)) { - *ppdev = pdev; - return 1; - } - - return 0; -} - /** * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE * @group: IOMMU group @@ -1544,10 +1537,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, if (!eeh_ops || !eeh_ops->err_inject) return -ENOENT; - /* Check on PCI error type */ - if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) - return -EINVAL; - /* Check on PCI error function */ if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) return -EINVAL; @@ -1585,6 +1574,104 @@ static int proc_eeh_show(struct seq_file *m, void *v) } #endif /* CONFIG_PROC_FS */ +static int eeh_break_device(struct pci_dev *pdev) +{ + struct resource *bar = NULL; + void __iomem *mapped; + u16 old, bit; + int i, pos; + + /* Do we have an MMIO BAR to disable? */ + for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { + struct resource *r = &pdev->resource[i]; + + if (!r->flags || !r->start) + continue; + if (r->flags & IORESOURCE_IO) + continue; + if (r->flags & IORESOURCE_UNSET) + continue; + + bar = r; + break; + } + + if (!bar) { + pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); + return -ENXIO; + } + + pci_err(pdev, "Going to break: %pR\n", bar); + + if (pdev->is_virtfn) { +#ifndef CONFIG_PCI_IOV + return -ENXIO; +#else + /* + * VFs don't have a per-function COMMAND register, so the best + * we can do is clear the Memory Space Enable bit in the PF's + * SRIOV control reg. + * + * Unfortunately, this requires that we have a PF (i.e doesn't + * work for a passed-through VF) and it has the potential side + * effect of also causing an EEH on every other VF under the + * PF. Oh well. + */ + pdev = pdev->physfn; + if (!pdev) + return -ENXIO; /* passed through VFs have no PF */ + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + pos += PCI_SRIOV_CTRL; + bit = PCI_SRIOV_CTRL_MSE; +#endif /* !CONFIG_PCI_IOV */ + } else { + bit = PCI_COMMAND_MEMORY; + pos = PCI_COMMAND; + } + + /* + * Process here is: + * + * 1. Disable Memory space. + * + * 2. Perform an MMIO to the device. This should result in an error + * (CA / UR) being raised by the device which results in an EEH + * PE freeze. Using the in_8() accessor skips the eeh detection hook + * so the freeze hook so the EEH Detection machinery won't be + * triggered here. This is to match the usual behaviour of EEH + * where the HW will asynchronously freeze a PE and it's up to + * the kernel to notice and deal with it. + * + * 3. Turn Memory space back on. This is more important for VFs + * since recovery will probably fail if we don't. For normal + * the COMMAND register is reset as a part of re-initialising + * the device. + * + * Breaking stuff is the point so who cares if it's racy ;) + */ + pci_read_config_word(pdev, pos, &old); + + mapped = ioremap(bar->start, PAGE_SIZE); + if (!mapped) { + pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); + return -ENXIO; + } + + pci_write_config_word(pdev, pos, old & ~bit); + in_8(mapped); + pci_write_config_word(pdev, pos, old); + + iounmap(mapped); + + return 0; +} + +int eeh_pe_inject_mmio_error(struct pci_dev *pdev) +{ + return eeh_break_device(pdev); +} + #ifdef CONFIG_DEBUG_FS @@ -1689,7 +1776,6 @@ static ssize_t eeh_force_recover_write(struct file *filp, static const struct file_operations eeh_force_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_force_recover_write, }; @@ -1733,104 +1819,10 @@ static ssize_t eeh_dev_check_write(struct file *filp, static const struct file_operations eeh_dev_check_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_check_write, .read = eeh_debugfs_dev_usage, }; -static int eeh_debugfs_break_device(struct pci_dev *pdev) -{ - struct resource *bar = NULL; - void __iomem *mapped; - u16 old, bit; - int i, pos; - - /* Do we have an MMIO BAR to disable? */ - for (i = 0; i <= PCI_STD_RESOURCE_END; i++) { - struct resource *r = &pdev->resource[i]; - - if (!r->flags || !r->start) - continue; - if (r->flags & IORESOURCE_IO) - continue; - if (r->flags & IORESOURCE_UNSET) - continue; - - bar = r; - break; - } - - if (!bar) { - pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n"); - return -ENXIO; - } - - pci_err(pdev, "Going to break: %pR\n", bar); - - if (pdev->is_virtfn) { -#ifndef CONFIG_PCI_IOV - return -ENXIO; -#else - /* - * VFs don't have a per-function COMMAND register, so the best - * we can do is clear the Memory Space Enable bit in the PF's - * SRIOV control reg. - * - * Unfortunately, this requires that we have a PF (i.e doesn't - * work for a passed-through VF) and it has the potential side - * effect of also causing an EEH on every other VF under the - * PF. Oh well. - */ - pdev = pdev->physfn; - if (!pdev) - return -ENXIO; /* passed through VFs have no PF */ - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); - pos += PCI_SRIOV_CTRL; - bit = PCI_SRIOV_CTRL_MSE; -#endif /* !CONFIG_PCI_IOV */ - } else { - bit = PCI_COMMAND_MEMORY; - pos = PCI_COMMAND; - } - - /* - * Process here is: - * - * 1. Disable Memory space. - * - * 2. Perform an MMIO to the device. This should result in an error - * (CA / UR) being raised by the device which results in an EEH - * PE freeze. Using the in_8() accessor skips the eeh detection hook - * so the freeze hook so the EEH Detection machinery won't be - * triggered here. This is to match the usual behaviour of EEH - * where the HW will asynchronously freeze a PE and it's up to - * the kernel to notice and deal with it. - * - * 3. Turn Memory space back on. This is more important for VFs - * since recovery will probably fail if we don't. For normal - * the COMMAND register is reset as a part of re-initialising - * the device. - * - * Breaking stuff is the point so who cares if it's racy ;) - */ - pci_read_config_word(pdev, pos, &old); - - mapped = ioremap(bar->start, PAGE_SIZE); - if (!mapped) { - pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar); - return -ENXIO; - } - - pci_write_config_word(pdev, pos, old & ~bit); - in_8(mapped); - pci_write_config_word(pdev, pos, old); - - iounmap(mapped); - - return 0; -} - static ssize_t eeh_dev_break_write(struct file *filp, const char __user *user_buf, size_t count, loff_t *ppos) @@ -1842,7 +1834,7 @@ static ssize_t eeh_dev_break_write(struct file *filp, if (IS_ERR(pdev)) return PTR_ERR(pdev); - ret = eeh_debugfs_break_device(pdev); + ret = eeh_break_device(pdev); pci_dev_put(pdev); if (ret < 0) @@ -1853,7 +1845,6 @@ static ssize_t eeh_dev_break_write(struct file *filp, static const struct file_operations eeh_dev_break_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_break_write, .read = eeh_debugfs_dev_usage, }; @@ -1900,7 +1891,6 @@ static ssize_t eeh_dev_can_recover(struct file *filp, static const struct file_operations eeh_dev_can_recover_fops = { .open = simple_open, - .llseek = no_llseek, .write = eeh_dev_can_recover, .read = eeh_debugfs_dev_usage, }; diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 48773d2d9be3..7efe04c68f0f 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c @@ -865,9 +865,18 @@ void eeh_handle_normal_event(struct eeh_pe *pe) devices++; if (!devices) { - pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n", + pr_warn("EEH: Frozen PHB#%x-PE#%x is empty!\n", pe->phb->global_number, pe->addr); - goto out; /* nothing to recover */ + /* + * The device is removed, tear down its state, on powernv + * hotplug driver would take care of it but not on pseries, + * permanently disable the card as it is hot removed. + * + * In the case of powernv, note that the removal of device + * is covered by pci rescan lock, so no problem even if hotplug + * driver attempts to remove the device. + */ + goto recover_failed; } /* Log the event */ diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c index e0ce81279624..d283d281d28e 100644 --- a/arch/powerpc/kernel/eeh_pe.c +++ b/arch/powerpc/kernel/eeh_pe.c @@ -24,10 +24,10 @@ static int eeh_pe_aux_size = 0; static LIST_HEAD(eeh_phb_pe); /** - * eeh_set_pe_aux_size - Set PE auxillary data size - * @size: PE auxillary data size + * eeh_set_pe_aux_size - Set PE auxiliary data size + * @size: PE auxiliary data size in bytes * - * Set PE auxillary data size + * Set PE auxiliary data size. */ void eeh_set_pe_aux_size(int size) { @@ -527,7 +527,7 @@ EXPORT_SYMBOL_GPL(eeh_pe_state_mark); * eeh_pe_mark_isolated * @pe: EEH PE * - * Record that a PE has been isolated by marking the PE and it's children as + * Record that a PE has been isolated by marking the PE and its children as * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices * as pci_channel_io_frozen. */ @@ -849,6 +849,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) { struct eeh_dev *edev; struct pci_dev *pdev; + struct pci_bus *bus = NULL; if (pe->type & EEH_PE_PHB) return pe->phb->bus; @@ -859,9 +860,11 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe) /* Retrieve the parent PCI bus of first (top) PCI device */ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry); + pci_lock_rescan_remove(); pdev = eeh_dev_to_pci_dev(edev); if (pdev) - return pdev->bus; + bus = pdev->bus; + pci_unlock_rescan_remove(); - return NULL; + return bus; } diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 7eda33a24bb4..f4a8c9877249 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -108,7 +108,7 @@ transfer_to_syscall: stw r11, 0(r1) mflr r12 stw r12, _LINK(r1) -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ #endif lis r12,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */ @@ -158,9 +158,6 @@ syscall_exit_finish: 1: REST_GPR(2, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif 3: mtcr r5 lwz r4,_CTR(r1) @@ -214,7 +211,7 @@ start_kernel_thread: .globl fast_exception_return fast_exception_return: -#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) +#ifndef CONFIG_BOOKE andi. r10,r9,MSR_RI /* check for recoverable interrupt */ beq 3f /* if not, we've got problems */ #endif @@ -237,9 +234,6 @@ fast_exception_return: REST_GPR(12, r11) REST_GPR(11, r11) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif _ASM_NOKPROBE_SYMBOL(fast_exception_return) /* aargh, a nonrecoverable interrupt, panic */ @@ -296,9 +290,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(0, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif .Lrestore_nvgprs: REST_NVGPRS(r1) @@ -346,9 +337,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) REST_GPR(0, r1) REST_GPR(1, r1) rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif 1: /* * Emulate stack store with update. New r1 value was already calculated @@ -375,12 +363,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS) mfspr r9, SPRN_SPRG_SCRATCH0 #endif rfi -#ifdef CONFIG_40x - b . /* Prevent prefetch past rfi */ -#endif _ASM_NOKPROBE_SYMBOL(interrupt_return) -#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +#ifdef CONFIG_BOOKE /* * Returning from a critical interrupt in user mode doesn't need @@ -395,17 +380,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) * time of the critical interrupt. * */ -#ifdef CONFIG_40x -#define PPC_40x_TURN_OFF_MSR_DR \ - /* avoid any possible TLB misses here by turning off MSR.DR, we \ - * assume the instructions here are mapped by a pinned TLB entry */ \ - li r10,MSR_IR; \ - mtmsr r10; \ - isync; \ - tophys(r1, r1); -#else -#define PPC_40x_TURN_OFF_MSR_DR -#endif #define RET_FROM_EXC_LEVEL(exc_lvl_srr0, exc_lvl_srr1, exc_lvl_rfi) \ REST_NVGPRS(r1); \ @@ -423,7 +397,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) mtlr r11; \ lwz r10,_CCR(r1); \ mtcrf 0xff,r10; \ - PPC_40x_TURN_OFF_MSR_DR; \ lwz r9,_DEAR(r1); \ lwz r10,_ESR(r1); \ mtspr SPRN_DEAR,r9; \ @@ -471,20 +444,6 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return) #define RESTORE_MMU_REGS #endif -#ifdef CONFIG_40x - .globl ret_from_crit_exc -ret_from_crit_exc: - lis r9,crit_srr0@ha; - lwz r9,crit_srr0@l(r9); - lis r10,crit_srr1@ha; - lwz r10,crit_srr1@l(r10); - mtspr SPRN_SRR0,r9; - mtspr SPRN_SRR1,r10; - RET_FROM_EXC_LEVEL(SPRN_CSRR0, SPRN_CSRR1, PPC_RFCI) -_ASM_NOKPROBE_SYMBOL(ret_from_crit_exc) -#endif /* CONFIG_40x */ - -#ifdef CONFIG_BOOKE .globl ret_from_crit_exc ret_from_crit_exc: RESTORE_xSRR(SRR0,SRR1); @@ -509,4 +468,3 @@ ret_from_mcheck_exc: RET_FROM_EXC_LEVEL(SPRN_MCSRR0, SPRN_MCSRR1, PPC_RFMCI) _ASM_NOKPROBE_SYMBOL(ret_from_mcheck_exc) #endif /* CONFIG_BOOKE */ -#endif /* !(CONFIG_4xx || CONFIG_BOOKE) */ diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 1a9b5ae8ccb2..6a414ed5a411 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -21,7 +21,7 @@ _GLOBAL(epapr_ev_idle) ori r4, r4,_TLF_NAPPING /* so when we take an exception */ PPC_STL r4, TI_LOCAL_FLAGS(r2) /* it will return to our caller */ -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE wrteei 1 #else mfmsr r4 diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c index d4b8aff20815..247ab2acaccc 100644 --- a/arch/powerpc/kernel/epapr_paravirt.c +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -9,7 +9,7 @@ #include <linux/of_fdt.h> #include <asm/epapr_hcalls.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/machdep.h> #include <asm/inst.h> diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index dcf0591ad3c2..63f6b9f513a4 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -485,8 +485,8 @@ interrupt_base_book3e: /* fake trap */ EXCEPTION_STUB(0x160, decrementer) /* 0x0900 */ EXCEPTION_STUB(0x180, fixed_interval) /* 0x0980 */ EXCEPTION_STUB(0x1a0, watchdog) /* 0x09f0 */ - EXCEPTION_STUB(0x1c0, data_tlb_miss) - EXCEPTION_STUB(0x1e0, instruction_tlb_miss) + EXCEPTION_STUB(0x1c0, data_tlb_miss_bolted) + EXCEPTION_STUB(0x1e0, instruction_tlb_miss_bolted) EXCEPTION_STUB(0x200, altivec_unavailable) EXCEPTION_STUB(0x220, altivec_assist) EXCEPTION_STUB(0x260, perfmon) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index eaf2f167c342..b7229430ca94 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1989,13 +1989,6 @@ INT_DEFINE_END(system_call) INTERRUPT_TO_KERNEL #endif -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH -BEGIN_FTR_SECTION - cmpdi r0,0x1ebe - beq- 1f -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) -#endif - /* We reach here with PACA in r13, r13 in r9. */ mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 @@ -2015,16 +2008,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) b system_call_common #endif .endif - -#ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH - /* Fast LE/BE switch system call */ -1: mfspr r12,SPRN_SRR1 - xori r12,r12,MSR_LE - mtspr SPRN_SRR1,r12 - mr r13,r9 - RFI_TO_USER /* return to userspace */ - b . /* prevent speculative execution */ -#endif .endm EXC_REAL_BEGIN(system_call, 0xc00, 0x100) @@ -2554,27 +2537,8 @@ EXC_REAL_NONE(0x1000, 0x100) EXC_VIRT_NONE(0x5000, 0x100) EXC_REAL_NONE(0x1100, 0x100) EXC_VIRT_NONE(0x5100, 0x100) - -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_system_error) - IVEC=0x1200 - IHSRR=1 -INT_DEFINE_END(cbe_system_error) - -EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100) - GEN_INT_ENTRY cbe_system_error, virt=0 -EXC_REAL_END(cbe_system_error, 0x1200, 0x100) -EXC_VIRT_NONE(0x5200, 0x100) -EXC_COMMON_BEGIN(cbe_system_error_common) - GEN_COMMON cbe_system_error - addi r3,r1,STACK_INT_FRAME_REGS - bl CFUNC(cbe_system_error_exception) - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1200, 0x100) EXC_VIRT_NONE(0x5200, 0x100) -#endif /** * Interrupt 0x1300 - Instruction Address Breakpoint Interrupt. @@ -2725,26 +2689,8 @@ EXC_COMMON_BEGIN(denorm_exception_common) b interrupt_return_hsrr -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_maintenance) - IVEC=0x1600 - IHSRR=1 -INT_DEFINE_END(cbe_maintenance) - -EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100) - GEN_INT_ENTRY cbe_maintenance, virt=0 -EXC_REAL_END(cbe_maintenance, 0x1600, 0x100) -EXC_VIRT_NONE(0x5600, 0x100) -EXC_COMMON_BEGIN(cbe_maintenance_common) - GEN_COMMON cbe_maintenance - addi r3,r1,STACK_INT_FRAME_REGS - bl CFUNC(cbe_maintenance_exception) - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1600, 0x100) EXC_VIRT_NONE(0x5600, 0x100) -#endif INT_DEFINE_BEGIN(altivec_assist) @@ -2772,26 +2718,8 @@ EXC_COMMON_BEGIN(altivec_assist_common) b interrupt_return_srr -#ifdef CONFIG_CBE_RAS -INT_DEFINE_BEGIN(cbe_thermal) - IVEC=0x1800 - IHSRR=1 -INT_DEFINE_END(cbe_thermal) - -EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100) - GEN_INT_ENTRY cbe_thermal, virt=0 -EXC_REAL_END(cbe_thermal, 0x1800, 0x100) -EXC_VIRT_NONE(0x5800, 0x100) -EXC_COMMON_BEGIN(cbe_thermal_common) - GEN_COMMON cbe_thermal - addi r3,r1,STACK_INT_FRAME_REGS - bl CFUNC(cbe_thermal_exception) - b interrupt_return_hsrr - -#else /* CONFIG_CBE_RAS */ EXC_REAL_NONE(0x1800, 0x100) EXC_VIRT_NONE(0x5800, 0x100) -#endif #ifdef CONFIG_PPC_WATCHDOG diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index d14eda1e8589..df16c7f547ab 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -33,6 +33,7 @@ #include <asm/fadump-internal.h> #include <asm/setup.h> #include <asm/interrupt.h> +#include <asm/prom.h> /* * The CPU who acquired the lock to trigger the fadump crash should @@ -53,8 +54,6 @@ static struct kobject *fadump_kobj; static atomic_t cpus_in_fadump; static DEFINE_MUTEX(fadump_mutex); -static struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false }; - #define RESERVED_RNGS_SZ 16384 /* 16K - 128 entries */ #define RESERVED_RNGS_CNT (RESERVED_RNGS_SZ / \ sizeof(struct fadump_memory_range)) @@ -80,26 +79,38 @@ static struct cma *fadump_cma; * But for some reason even if it fails we still have the memory reservation * with us and we can still continue doing fadump. */ -static int __init fadump_cma_init(void) +void __init fadump_cma_init(void) { - unsigned long long base, size; + unsigned long long base, size, end; int rc; - if (!fw_dump.fadump_enabled) - return 0; - + if (!fw_dump.fadump_supported || !fw_dump.fadump_enabled || + fw_dump.dump_active) + return; /* * Do not use CMA if user has provided fadump=nocma kernel parameter. - * Return 1 to continue with fadump old behaviour. */ - if (fw_dump.nocma) - return 1; + if (fw_dump.nocma || !fw_dump.boot_memory_size) + return; + /* + * [base, end) should be reserved during early init in + * fadump_reserve_mem(). No need to check this here as + * cma_init_reserved_mem() already checks for overlap. + * Here we give the aligned chunk of this reserved memory to CMA. + */ base = fw_dump.reserve_dump_area_start; size = fw_dump.boot_memory_size; + end = base + size; - if (!size) - return 0; + base = ALIGN(base, CMA_MIN_ALIGNMENT_BYTES); + end = ALIGN_DOWN(end, CMA_MIN_ALIGNMENT_BYTES); + size = end - base; + + if (end <= base) { + pr_warn("%s: Too less memory to give to CMA\n", __func__); + return; + } rc = cma_init_reserved_mem(base, size, 0, "fadump_cma", &fadump_cma); if (rc) { @@ -110,7 +121,7 @@ static int __init fadump_cma_init(void) * blocked from production system usage. Hence return 1, * so that we can continue with fadump. */ - return 1; + return; } /* @@ -122,17 +133,50 @@ static int __init fadump_cma_init(void) /* * So we now have successfully initialized cma area for fadump. */ - pr_info("Initialized 0x%lx bytes cma area at %ldMB from 0x%lx " + pr_info("Initialized [0x%llx, %luMB] cma area from [0x%lx, %luMB] " "bytes of memory reserved for firmware-assisted dump\n", - cma_get_size(fadump_cma), - (unsigned long)cma_get_base(fadump_cma) >> 20, - fw_dump.reserve_dump_area_size); - return 1; + cma_get_base(fadump_cma), cma_get_size(fadump_cma) >> 20, + fw_dump.reserve_dump_area_start, + fw_dump.boot_memory_size >> 20); + return; } -#else -static int __init fadump_cma_init(void) { return 1; } #endif /* CONFIG_CMA */ +/* + * Additional parameters meant for capture kernel are placed in a dedicated area. + * If this is capture kernel boot, append these parameters to bootargs. + */ +void __init fadump_append_bootargs(void) +{ + char *append_args; + size_t len; + + if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) + return; + + if (fw_dump.param_area < fw_dump.boot_mem_top) { + if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { + pr_warn("WARNING: Can't use additional parameters area!\n"); + fw_dump.param_area = 0; + return; + } + } + + append_args = (char *)fw_dump.param_area; + len = strlen(boot_command_line); + + /* + * Too late to fail even if cmdline size exceeds. Truncate additional parameters + * to cmdline size and proceed anyway. + */ + if (len + strlen(append_args) >= COMMAND_LINE_SIZE - 1) + pr_warn("WARNING: Appending parameters exceeds cmdline size. Truncating!\n"); + + pr_debug("Cmdline: %s\n", boot_command_line); + snprintf(boot_command_line + len, COMMAND_LINE_SIZE - len, " %s", append_args); + pr_info("Updated cmdline: %s\n", boot_command_line); +} + /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data) @@ -223,28 +267,6 @@ static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) } /* - * Returns true, if there are no holes in boot memory area, - * false otherwise. - */ -bool is_fadump_boot_mem_contiguous(void) -{ - unsigned long d_start, d_end; - bool ret = false; - int i; - - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - d_start = fw_dump.boot_mem_addr[i]; - d_end = d_start + fw_dump.boot_mem_sz[i]; - - ret = is_fadump_mem_area_contiguous(d_start, d_end); - if (!ret) - break; - } - - return ret; -} - -/* * Returns true, if there are no holes in reserved memory area, * false otherwise. */ @@ -373,12 +395,6 @@ static unsigned long __init get_fadump_area_size(void) size = PAGE_ALIGN(size); size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); - size += sizeof(struct elfhdr); /* ELF core header.*/ - size += sizeof(struct elf_phdr); /* place holder for cpu notes */ - /* Program headers for crash memory regions. */ - size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); - - size = PAGE_ALIGN(size); /* This is to hold kernel metadata on platforms that support it */ size += (fw_dump.ops->fadump_get_metadata_size ? @@ -389,10 +405,11 @@ static unsigned long __init get_fadump_area_size(void) static int __init add_boot_mem_region(unsigned long rstart, unsigned long rsize) { + int max_boot_mem_rgns = fw_dump.ops->fadump_max_boot_mem_rgns(); int i = fw_dump.boot_mem_regs_cnt++; - if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) { - fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS; + if (fw_dump.boot_mem_regs_cnt > max_boot_mem_rgns) { + fw_dump.boot_mem_regs_cnt = max_boot_mem_rgns; return 0; } @@ -552,13 +569,6 @@ int __init fadump_reserve_mem(void) if (!fw_dump.dump_active) { fw_dump.boot_memory_size = PAGE_ALIGN(fadump_calculate_reserve_size()); -#ifdef CONFIG_CMA - if (!fw_dump.nocma) { - fw_dump.boot_memory_size = - ALIGN(fw_dump.boot_memory_size, - CMA_MIN_ALIGNMENT_BYTES); - } -#endif bootmem_min = fw_dump.ops->fadump_get_bootmem_min(); if (fw_dump.boot_memory_size < bootmem_min) { @@ -573,22 +583,6 @@ int __init fadump_reserve_mem(void) } } - /* - * Calculate the memory boundary. - * If memory_limit is less than actual memory boundary then reserve - * the memory for fadump beyond the memory_limit and adjust the - * memory_limit accordingly, so that the running kernel can run with - * specified memory_limit. - */ - if (memory_limit && memory_limit < memblock_end_of_DRAM()) { - size = get_fadump_area_size(); - if ((memory_limit + size) < memblock_end_of_DRAM()) - memory_limit += size; - else - memory_limit = memblock_end_of_DRAM(); - printk(KERN_INFO "Adjusted memory_limit for firmware-assisted" - " dump, now %#016llx\n", memory_limit); - } if (memory_limit) mem_boundary = memory_limit; else @@ -647,8 +641,6 @@ int __init fadump_reserve_mem(void) pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n", (size >> 20), base, (memblock_phys_mem_size() >> 20)); - - ret = fadump_cma_init(); } return ret; @@ -705,7 +697,7 @@ void crash_fadump(struct pt_regs *regs, const char *str) * old_cpu == -1 means this is the first CPU which has come here, * go ahead and trigger fadump. * - * old_cpu != -1 means some other CPU has already on it's way + * old_cpu != -1 means some other CPU has already on its way * to trigger fadump, just keep looping here. */ this_cpu = smp_processor_id(); @@ -760,7 +752,7 @@ u32 *__init fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs) * prstatus.pr_pid = ???? */ elf_core_copy_regs(&prstatus.pr_reg, regs); - buf = append_elf_note(buf, CRASH_CORE_NOTE_NAME, NT_PRSTATUS, + buf = append_elf_note(buf, NN_PRSTATUS, NT_PRSTATUS, &prstatus, sizeof(prstatus)); return buf; } @@ -931,36 +923,6 @@ static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info, return 0; } -static int fadump_exclude_reserved_area(u64 start, u64 end) -{ - u64 ra_start, ra_end; - int ret = 0; - - ra_start = fw_dump.reserve_dump_area_start; - ra_end = ra_start + fw_dump.reserve_dump_area_size; - - if ((ra_start < end) && (ra_end > start)) { - if ((start < ra_start) && (end > ra_end)) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - if (ret) - return ret; - - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } else if (start < ra_start) { - ret = fadump_add_mem_range(&crash_mrange_info, - start, ra_start); - } else if (ra_end < end) { - ret = fadump_add_mem_range(&crash_mrange_info, - ra_end, end); - } - } else - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - - return ret; -} - static int fadump_init_elfcore_header(char *bufp) { struct elfhdr *elf; @@ -998,52 +960,6 @@ static int fadump_init_elfcore_header(char *bufp) } /* - * Traverse through memblock structure and setup crash memory ranges. These - * ranges will be used create PT_LOAD program headers in elfcore header. - */ -static int fadump_setup_crash_memory_ranges(void) -{ - u64 i, start, end; - int ret; - - pr_debug("Setup crash memory ranges.\n"); - crash_mrange_info.mem_range_cnt = 0; - - /* - * Boot memory region(s) registered with firmware are moved to - * different location at the time of crash. Create separate program - * header(s) for this memory chunk(s) with the correct offset. - */ - for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { - start = fw_dump.boot_mem_addr[i]; - end = start + fw_dump.boot_mem_sz[i]; - ret = fadump_add_mem_range(&crash_mrange_info, start, end); - if (ret) - return ret; - } - - for_each_mem_range(i, &start, &end) { - /* - * skip the memory chunk that is already added - * (0 through boot_memory_top). - */ - if (start < fw_dump.boot_mem_top) { - if (end > fw_dump.boot_mem_top) - start = fw_dump.boot_mem_top; - else - continue; - } - - /* add this range excluding the reserved dump area. */ - ret = fadump_exclude_reserved_area(start, end); - if (ret) - return ret; - } - - return 0; -} - -/* * If the given physical address falls within the boot memory region then * return the relocated address that points to the dump region reserved * for saving initial boot memory contents. @@ -1073,36 +989,50 @@ static inline unsigned long fadump_relocate(unsigned long paddr) return raddr; } -static int fadump_create_elfcore_headers(char *bufp) +static void __init populate_elf_pt_load(struct elf_phdr *phdr, u64 start, + u64 size, unsigned long long offset) { - unsigned long long raddr, offset; - struct elf_phdr *phdr; + phdr->p_align = 0; + phdr->p_memsz = size; + phdr->p_filesz = size; + phdr->p_paddr = start; + phdr->p_offset = offset; + phdr->p_type = PT_LOAD; + phdr->p_flags = PF_R|PF_W|PF_X; + phdr->p_vaddr = (unsigned long)__va(start); +} + +static void __init fadump_populate_elfcorehdr(struct fadump_crash_info_header *fdh) +{ + char *bufp; struct elfhdr *elf; - int i, j; + struct elf_phdr *phdr; + u64 boot_mem_dest_offset; + unsigned long long i, ra_start, ra_end, ra_size, mstart, mend; + bufp = (char *) fw_dump.elfcorehdr_addr; fadump_init_elfcore_header(bufp); elf = (struct elfhdr *)bufp; bufp += sizeof(struct elfhdr); /* - * setup ELF PT_NOTE, place holder for cpu notes info. The notes info - * will be populated during second kernel boot after crash. Hence - * this PT_NOTE will always be the first elf note. + * Set up ELF PT_NOTE, a placeholder for CPU notes information. + * The notes info will be populated later by platform-specific code. + * Hence, this PT_NOTE will always be the first ELF note. * * NOTE: Any new ELF note addition should be placed after this note. */ phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); phdr->p_type = PT_NOTE; - phdr->p_flags = 0; - phdr->p_vaddr = 0; - phdr->p_align = 0; - - phdr->p_offset = 0; - phdr->p_paddr = 0; - phdr->p_filesz = 0; - phdr->p_memsz = 0; - + phdr->p_flags = 0; + phdr->p_vaddr = 0; + phdr->p_align = 0; + phdr->p_offset = 0; + phdr->p_paddr = 0; + phdr->p_filesz = 0; + phdr->p_memsz = 0; + /* Increment number of program headers. */ (elf->e_phnum)++; /* setup ELF PT_NOTE for vmcoreinfo */ @@ -1112,55 +1042,66 @@ static int fadump_create_elfcore_headers(char *bufp) phdr->p_flags = 0; phdr->p_vaddr = 0; phdr->p_align = 0; - - phdr->p_paddr = fadump_relocate(paddr_vmcoreinfo_note()); - phdr->p_offset = phdr->p_paddr; - phdr->p_memsz = phdr->p_filesz = VMCOREINFO_NOTE_SIZE; - + phdr->p_paddr = phdr->p_offset = fdh->vmcoreinfo_raddr; + phdr->p_memsz = phdr->p_filesz = fdh->vmcoreinfo_size; /* Increment number of program headers. */ (elf->e_phnum)++; - /* setup PT_LOAD sections. */ - j = 0; - offset = 0; - raddr = fw_dump.boot_mem_addr[0]; - for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) { - u64 mbase, msize; - - mbase = crash_mrange_info.mem_ranges[i].base; - msize = crash_mrange_info.mem_ranges[i].size; - if (!msize) - continue; - + /* + * Setup PT_LOAD sections. first include boot memory regions + * and then add rest of the memory regions. + */ + boot_mem_dest_offset = fw_dump.boot_mem_dest_addr; + for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) { phdr = (struct elf_phdr *)bufp; bufp += sizeof(struct elf_phdr); - phdr->p_type = PT_LOAD; - phdr->p_flags = PF_R|PF_W|PF_X; - phdr->p_offset = mbase; - - if (mbase == raddr) { - /* - * The entire real memory region will be moved by - * firmware to the specified destination_address. - * Hence set the correct offset. - */ - phdr->p_offset = fw_dump.boot_mem_dest_addr + offset; - if (j < (fw_dump.boot_mem_regs_cnt - 1)) { - offset += fw_dump.boot_mem_sz[j]; - raddr = fw_dump.boot_mem_addr[++j]; - } + populate_elf_pt_load(phdr, fw_dump.boot_mem_addr[i], + fw_dump.boot_mem_sz[i], + boot_mem_dest_offset); + /* Increment number of program headers. */ + (elf->e_phnum)++; + boot_mem_dest_offset += fw_dump.boot_mem_sz[i]; + } + + /* Memory reserved for fadump in first kernel */ + ra_start = fw_dump.reserve_dump_area_start; + ra_size = get_fadump_area_size(); + ra_end = ra_start + ra_size; + + phdr = (struct elf_phdr *)bufp; + for_each_mem_range(i, &mstart, &mend) { + /* Boot memory regions already added, skip them now */ + if (mstart < fw_dump.boot_mem_top) { + if (mend > fw_dump.boot_mem_top) + mstart = fw_dump.boot_mem_top; + else + continue; } - phdr->p_paddr = mbase; - phdr->p_vaddr = (unsigned long)__va(mbase); - phdr->p_filesz = msize; - phdr->p_memsz = msize; - phdr->p_align = 0; + /* Handle memblock regions overlaps with fadump reserved area */ + if ((ra_start < mend) && (ra_end > mstart)) { + if ((mstart < ra_start) && (mend > ra_end)) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + /* Increment number of program headers. */ + (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *)bufp; + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } else if (mstart < ra_start) { + populate_elf_pt_load(phdr, mstart, ra_start - mstart, mstart); + } else if (ra_end < mend) { + populate_elf_pt_load(phdr, ra_end, mend - ra_end, ra_end); + } + } else { + /* No overlap with fadump reserved memory region */ + populate_elf_pt_load(phdr, mstart, mend - mstart, mstart); + } /* Increment number of program headers. */ (elf->e_phnum)++; + bufp += sizeof(struct elf_phdr); + phdr = (struct elf_phdr *) bufp; } - return 0; } static unsigned long init_fadump_header(unsigned long addr) @@ -1175,14 +1116,25 @@ static unsigned long init_fadump_header(unsigned long addr) memset(fdh, 0, sizeof(struct fadump_crash_info_header)); fdh->magic_number = FADUMP_CRASH_INFO_MAGIC; - fdh->elfcorehdr_addr = addr; + fdh->version = FADUMP_HEADER_VERSION; /* We will set the crashing cpu id in crash_fadump() during crash. */ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN; + + /* + * The physical address and size of vmcoreinfo are required in the + * second kernel to prepare elfcorehdr. + */ + fdh->vmcoreinfo_raddr = fadump_relocate(paddr_vmcoreinfo_note()); + fdh->vmcoreinfo_size = VMCOREINFO_NOTE_SIZE; + + + fdh->pt_regs_sz = sizeof(struct pt_regs); /* * When LPAR is terminated by PYHP, ensure all possible CPUs' * register data is processed while exporting the vmcore. */ fdh->cpu_mask = *cpu_possible_mask; + fdh->cpu_mask_sz = sizeof(struct cpumask); return addr; } @@ -1190,8 +1142,6 @@ static unsigned long init_fadump_header(unsigned long addr) static int register_fadump(void) { unsigned long addr; - void *vaddr; - int ret; /* * If no memory is reserved then we can not register for firmware- @@ -1200,18 +1150,10 @@ static int register_fadump(void) if (!fw_dump.reserve_dump_area_size) return -ENODEV; - ret = fadump_setup_crash_memory_ranges(); - if (ret) - return ret; - addr = fw_dump.fadumphdr_addr; /* Initialize fadump crash info header. */ addr = init_fadump_header(addr); - vaddr = __va(addr); - - pr_debug("Creating ELF core headers at %#016lx\n", addr); - fadump_create_elfcore_headers(vaddr); /* register the future kernel dump with firmware. */ pr_debug("Registering for firmware-assisted kernel dump...\n"); @@ -1230,7 +1172,6 @@ void fadump_cleanup(void) } else if (fw_dump.dump_registered) { /* Un-register Firmware-assisted dump if it was registered. */ fw_dump.ops->fadump_unregister(&fw_dump); - fadump_free_mem_ranges(&crash_mrange_info); } if (fw_dump.ops->fadump_cleanup) @@ -1416,6 +1357,22 @@ static void fadump_release_memory(u64 begin, u64 end) fadump_release_reserved_area(tstart, end); } +static void fadump_free_elfcorehdr_buf(void) +{ + if (fw_dump.elfcorehdr_addr == 0 || fw_dump.elfcorehdr_size == 0) + return; + + /* + * Before freeing the memory of `elfcorehdr`, reset the global + * `elfcorehdr_addr` to prevent modules like `vmcore` from accessing + * invalid memory. + */ + elfcorehdr_addr = ELFCORE_ADDR_ERR; + fadump_free_buffer(fw_dump.elfcorehdr_addr, fw_dump.elfcorehdr_size); + fw_dump.elfcorehdr_addr = 0; + fw_dump.elfcorehdr_size = 0; +} + static void fadump_invalidate_release_mem(void) { mutex_lock(&fadump_mutex); @@ -1427,6 +1384,7 @@ static void fadump_invalidate_release_mem(void) fadump_cleanup(); mutex_unlock(&fadump_mutex); + fadump_free_elfcorehdr_buf(); fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM()); fadump_free_cpu_notes_buf(); @@ -1484,6 +1442,18 @@ static ssize_t enabled_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.fadump_enabled); } +/* + * /sys/kernel/fadump/hotplug_ready sysfs node returns 1, which inidcates + * to usersapce that fadump re-registration is not required on memory + * hotplug events. + */ +static ssize_t hotplug_ready_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", 1); +} + static ssize_t mem_reserved_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -1498,6 +1468,43 @@ static ssize_t registered_show(struct kobject *kobj, return sprintf(buf, "%d\n", fw_dump.dump_registered); } +static ssize_t bootargs_append_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%s\n", (char *)__va(fw_dump.param_area)); +} + +static ssize_t bootargs_append_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + char *params; + + if (!fw_dump.fadump_enabled || fw_dump.dump_active) + return -EPERM; + + if (count >= COMMAND_LINE_SIZE) + return -EINVAL; + + /* + * Fail here instead of handling this scenario with + * some silly workaround in capture kernel. + */ + if (saved_command_line_len + count >= COMMAND_LINE_SIZE) { + pr_err("Appending parameters exceeds cmdline size!\n"); + return -ENOSPC; + } + + params = __va(fw_dump.param_area); + strscpy_pad(params, buf, COMMAND_LINE_SIZE); + /* Remove newline character at the end. */ + if (params[count-1] == '\n') + params[count-1] = '\0'; + + return count; +} + static ssize_t registered_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) @@ -1556,11 +1563,14 @@ static struct kobj_attribute release_attr = __ATTR_WO(release_mem); static struct kobj_attribute enable_attr = __ATTR_RO(enabled); static struct kobj_attribute register_attr = __ATTR_RW(registered); static struct kobj_attribute mem_reserved_attr = __ATTR_RO(mem_reserved); +static struct kobj_attribute hotplug_ready_attr = __ATTR_RO(hotplug_ready); +static struct kobj_attribute bootargs_append_attr = __ATTR_RW(bootargs_append); static struct attribute *fadump_attrs[] = { &enable_attr.attr, ®ister_attr.attr, &mem_reserved_attr.attr, + &hotplug_ready_attr.attr, NULL, }; @@ -1578,6 +1588,12 @@ static void __init fadump_init_files(void) return; } + if (fw_dump.param_area) { + rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr); + if (rc) + pr_err("unable to create bootargs_append sysfs file (%d)\n", rc); + } + debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, &fadump_region_fops); @@ -1632,6 +1648,150 @@ static void __init fadump_init_files(void) return; } +static int __init fadump_setup_elfcorehdr_buf(void) +{ + int elf_phdr_cnt; + unsigned long elfcorehdr_size; + + /* + * Program header for CPU notes comes first, followed by one for + * vmcoreinfo, and the remaining program headers correspond to + * memory regions. + */ + elf_phdr_cnt = 2 + fw_dump.boot_mem_regs_cnt + memblock_num_regions(memory); + elfcorehdr_size = sizeof(struct elfhdr) + (elf_phdr_cnt * sizeof(struct elf_phdr)); + elfcorehdr_size = PAGE_ALIGN(elfcorehdr_size); + + fw_dump.elfcorehdr_addr = (u64)fadump_alloc_buffer(elfcorehdr_size); + if (!fw_dump.elfcorehdr_addr) { + pr_err("Failed to allocate %lu bytes for elfcorehdr\n", + elfcorehdr_size); + return -ENOMEM; + } + fw_dump.elfcorehdr_size = elfcorehdr_size; + return 0; +} + +/* + * Check if the fadump header of crashed kernel is compatible with fadump kernel. + * + * It checks the magic number, endianness, and size of non-primitive type + * members of fadump header to ensure safe dump collection. + */ +static bool __init is_fadump_header_compatible(struct fadump_crash_info_header *fdh) +{ + if (fdh->magic_number == FADUMP_CRASH_INFO_MAGIC_OLD) { + pr_err("Old magic number, can't process the dump.\n"); + return false; + } + + if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) { + if (fdh->magic_number == swab64(FADUMP_CRASH_INFO_MAGIC)) + pr_err("Endianness mismatch between the crashed and fadump kernels.\n"); + else + pr_err("Fadump header is corrupted.\n"); + + return false; + } + + /* + * Dump collection is not safe if the size of non-primitive type members + * of the fadump header do not match between crashed and fadump kernel. + */ + if (fdh->pt_regs_sz != sizeof(struct pt_regs) || + fdh->cpu_mask_sz != sizeof(struct cpumask)) { + pr_err("Fadump header size mismatch.\n"); + return false; + } + + return true; +} + +static void __init fadump_process(void) +{ + struct fadump_crash_info_header *fdh; + + fdh = (struct fadump_crash_info_header *) __va(fw_dump.fadumphdr_addr); + if (!fdh) { + pr_err("Crash info header is empty.\n"); + goto err_out; + } + + /* Avoid processing the dump if fadump header isn't compatible */ + if (!is_fadump_header_compatible(fdh)) + goto err_out; + + /* Allocate buffer for elfcorehdr */ + if (fadump_setup_elfcorehdr_buf()) + goto err_out; + + fadump_populate_elfcorehdr(fdh); + + /* Let platform update the CPU notes in elfcorehdr */ + if (fw_dump.ops->fadump_process(&fw_dump) < 0) + goto err_out; + + /* + * elfcorehdr is now ready to be exported. + * + * set elfcorehdr_addr so that vmcore module will export the + * elfcorehdr through '/proc/vmcore'. + */ + elfcorehdr_addr = virt_to_phys((void *)fw_dump.elfcorehdr_addr); + return; + +err_out: + fadump_invalidate_release_mem(); +} + +/* + * Reserve memory to store additional parameters to be passed + * for fadump/capture kernel. + */ +void __init fadump_setup_param_area(void) +{ + phys_addr_t range_start, range_end; + + if (!fw_dump.param_area_supported || fw_dump.dump_active) + return; + + /* This memory can't be used by PFW or bootloader as it is shared across kernels */ + if (early_radix_enabled()) { + /* + * Anywhere in the upper half should be good enough as all memory + * is accessible in real mode. + */ + range_start = memblock_end_of_DRAM() / 2; + range_end = memblock_end_of_DRAM(); + } else { + /* + * Memory range for passing additional parameters for HASH MMU + * must meet the following conditions: + * 1. The first memory block size must be higher than the + * minimum RMA (MIN_RMA) size. Bootloader can use memory + * upto RMA size. So it should be avoided. + * 2. The range should be between MIN_RMA and RMA size (ppc64_rma_size) + * 3. It must not overlap with the fadump reserved area. + */ + if (ppc64_rma_size < MIN_RMA*1024*1024) + return; + + range_start = MIN_RMA * 1024 * 1024; + range_end = min(ppc64_rma_size, fw_dump.boot_mem_top); + } + + fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE, + COMMAND_LINE_SIZE, + range_start, + range_end); + if (!fw_dump.param_area) { + pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); + return; + } + + memset((void *)fw_dump.param_area, 0, COMMAND_LINE_SIZE); +} + /* * Prepare for firmware-assisted dump. */ @@ -1651,12 +1811,7 @@ int __init setup_fadump(void) * saving it to the disk. */ if (fw_dump.dump_active) { - /* - * if dump process fails then invalidate the registration - * and release memory before proceeding for re-registration. - */ - if (fw_dump.ops->fadump_process(&fw_dump) < 0) - fadump_invalidate_release_mem(); + fadump_process(); } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { @@ -1735,8 +1890,3 @@ static void __init fadump_reserve_crash_area(u64 base) memblock_reserve(mstart, msize); } } - -unsigned long __init arch_reserved_kernel_pages(void) -{ - return memblock_reserved_size() / PAGE_SIZE; -} diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index f8e2911478a7..9cba7dbf58dd 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -21,17 +21,9 @@ mtspr SPRN_SPRG_SCRATCH1,r11 mfspr r10, SPRN_SPRG_THREAD .if \handle_dar_dsisr -#ifdef CONFIG_40x - mfspr r11, SPRN_DEAR -#else mfspr r11, SPRN_DAR -#endif stw r11, DAR(r10) -#ifdef CONFIG_40x - mfspr r11, SPRN_ESR -#else mfspr r11, SPRN_DSISR -#endif stw r11, DSISR(r10) .endif mfspr r11, SPRN_SRR0 @@ -96,9 +88,7 @@ .endif lwz r9, SRR1(r12) lwz r12, SRR0(r12) -#ifdef CONFIG_40x - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ -#elif defined(CONFIG_PPC_8xx) +#ifdef CONFIG_PPC_8xx mtspr SPRN_EID, r2 /* Set MSR_RI */ #else li r10, MSR_KERNEL /* can take exceptions */ diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S deleted file mode 100644 index 9fc90410b385..000000000000 --- a/arch/powerpc/kernel/head_40x.S +++ /dev/null @@ -1,721 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> - * Initial PowerPC version. - * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> - * Rewritten for PReP - * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> - * Low-level exception handers, MMU support, and rewrite. - * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> - * PowerPC 8xx modifications. - * Copyright (c) 1998-1999 TiVo, Inc. - * PowerPC 403GCX modifications. - * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> - * PowerPC 403GCX/405GP modifications. - * Copyright 2000 MontaVista Software Inc. - * PPC405 modifications - * PowerPC 403GCX/405GP modifications. - * Author: MontaVista Software, Inc. - * frank_rowand@mvista.com or source@mvista.com - * debbie_chu@mvista.com - * - * Module name: head_4xx.S - * - * Description: - * Kernel execution entry point code. - */ - -#include <linux/init.h> -#include <linux/pgtable.h> -#include <linux/sizes.h> -#include <linux/linkage.h> - -#include <asm/processor.h> -#include <asm/page.h> -#include <asm/mmu.h> -#include <asm/cputable.h> -#include <asm/thread_info.h> -#include <asm/ppc_asm.h> -#include <asm/asm-offsets.h> -#include <asm/ptrace.h> - -#include "head_32.h" - -/* As with the other PowerPC ports, it is expected that when code - * execution begins here, the following registers contain valid, yet - * optional, information: - * - * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) - * r4 - Starting address of the init RAM disk - * r5 - Ending address of the init RAM disk - * r6 - Start of kernel command line string (e.g. "mem=96m") - * r7 - End of kernel command line string - * - * This is all going to change RSN when we add bi_recs....... -- Dan - */ - __HEAD -_GLOBAL(_stext); -_GLOBAL(_start); - - mr r31,r3 /* save device tree ptr */ - - /* We have to turn on the MMU right away so we get cache modes - * set correctly. - */ - bl initial_mmu - -/* We now have the lower 16 Meg mapped into TLB entries, and the caches - * ready to work. - */ -turn_on_mmu: - lis r0,MSR_KERNEL@h - ori r0,r0,MSR_KERNEL@l - mtspr SPRN_SRR1,r0 - lis r0,start_here@h - ori r0,r0,start_here@l - mtspr SPRN_SRR0,r0 - rfi /* enables MMU */ - b . /* prevent prefetch past rfi */ - -/* - * This area is used for temporarily saving registers during the - * critical exception prolog. - */ - . = 0xc0 -crit_save: -_GLOBAL(crit_r10) - .space 4 -_GLOBAL(crit_r11) - .space 4 -_GLOBAL(crit_srr0) - .space 4 -_GLOBAL(crit_srr1) - .space 4 -_GLOBAL(crit_r1) - .space 4 -_GLOBAL(crit_dear) - .space 4 -_GLOBAL(crit_esr) - .space 4 - -/* - * Exception prolog for critical exceptions. This is a little different - * from the normal exception prolog above since a critical exception - * can potentially occur at any point during normal exception processing. - * Thus we cannot use the same SPRG registers as the normal prolog above. - * Instead we use a couple of words of memory at low physical addresses. - * This is OK since we don't support SMP on these processors. - */ -.macro CRITICAL_EXCEPTION_PROLOG trapno name - stw r10,crit_r10@l(0) /* save two registers to work with */ - stw r11,crit_r11@l(0) - mfspr r10,SPRN_SRR0 - mfspr r11,SPRN_SRR1 - stw r10,crit_srr0@l(0) - stw r11,crit_srr1@l(0) - mfspr r10,SPRN_DEAR - mfspr r11,SPRN_ESR - stw r10,crit_dear@l(0) - stw r11,crit_esr@l(0) - mfcr r10 /* save CR in r10 for now */ - mfspr r11,SPRN_SRR3 /* check whether user or kernel */ - andi. r11,r11,MSR_PR - lis r11,(critirq_ctx-PAGE_OFFSET)@ha - lwz r11,(critirq_ctx-PAGE_OFFSET)@l(r11) - beq 1f - /* COMING FROM USER MODE */ - mfspr r11,SPRN_SPRG_THREAD /* if from user, start at top of */ - lwz r11,TASK_STACK-THREAD(r11) /* this thread's kernel stack */ -1: stw r1,crit_r1@l(0) - addi r1,r11,THREAD_SIZE-INT_FRAME_SIZE /* Alloc an excpt frm */ - LOAD_REG_IMMEDIATE(r11, MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)) /* re-enable MMU */ - mtspr SPRN_SRR1, r11 - lis r11, 1f@h - ori r11, r11, 1f@l - mtspr SPRN_SRR0, r11 - rfi - - .text -1: -\name\()_virt: - lwz r11,crit_r1@l(0) - stw r11,GPR1(r1) - stw r11,0(r1) - mr r11,r1 - stw r10,_CCR(r11) /* save various registers */ - stw r12,GPR12(r11) - stw r9,GPR9(r11) - mflr r10 - stw r10,_LINK(r11) - lis r9,PAGE_OFFSET@ha - lwz r10,crit_r10@l(r9) - lwz r12,crit_r11@l(r9) - stw r10,GPR10(r11) - stw r12,GPR11(r11) - lwz r12,crit_dear@l(r9) - lwz r9,crit_esr@l(r9) - stw r12,_DEAR(r11) /* since they may have had stuff */ - stw r9,_ESR(r11) /* exception was taken */ - mfspr r12,SPRN_SRR2 - mfspr r9,SPRN_SRR3 - rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */ - COMMON_EXCEPTION_PROLOG_END \trapno + 2 -_ASM_NOKPROBE_SYMBOL(\name\()_virt) -.endm - - /* - * State at this point: - * r9 saved in stack frame, now saved SRR3 & ~MSR_WE - * r10 saved in crit_r10 and in stack frame, trashed - * r11 saved in crit_r11 and in stack frame, - * now phys stack/exception frame pointer - * r12 saved in stack frame, now saved SRR2 - * CR saved in stack frame, CR0.EQ = !SRR3.PR - * LR, DEAR, ESR in stack frame - * r1 saved in stack frame, now virt stack/excframe pointer - * r0, r3-r8 saved in stack frame - */ - -/* - * Exception vectors. - */ -#define CRITICAL_EXCEPTION(n, label, hdlr) \ - START_EXCEPTION(n, label); \ - CRITICAL_EXCEPTION_PROLOG n label; \ - prepare_transfer_to_handler; \ - bl hdlr; \ - b ret_from_crit_exc - -/* - * 0x0100 - Critical Interrupt Exception - */ - CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, unknown_exception) - -/* - * 0x0200 - Machine Check Exception - */ - CRITICAL_EXCEPTION(0x0200, MachineCheck, machine_check_exception) - -/* - * 0x0300 - Data Storage Exception - * This happens for just a few reasons. U0 set (but we don't do that), - * or zone protection fault (user violation, write to protected page). - * The other Data TLB exceptions bail out to this point - * if they can't resolve the lightweight TLB fault. - */ - START_EXCEPTION(0x0300, DataStorage) - EXCEPTION_PROLOG 0x300 DataStorage handle_dar_dsisr=1 - prepare_transfer_to_handler - bl do_page_fault - b interrupt_return - -/* - * 0x0400 - Instruction Storage Exception - * This is caused by a fetch from non-execute or guarded pages. - */ - START_EXCEPTION(0x0400, InstructionAccess) - EXCEPTION_PROLOG 0x400 InstructionAccess - li r5,0 - stw r5, _ESR(r11) /* Zero ESR */ - stw r12, _DEAR(r11) /* SRR0 as DEAR */ - prepare_transfer_to_handler - bl do_page_fault - b interrupt_return - -/* 0x0500 - External Interrupt Exception */ - EXCEPTION(0x0500, HardwareInterrupt, do_IRQ) - -/* 0x0600 - Alignment Exception */ - START_EXCEPTION(0x0600, Alignment) - EXCEPTION_PROLOG 0x600 Alignment handle_dar_dsisr=1 - prepare_transfer_to_handler - bl alignment_exception - REST_NVGPRS(r1) - b interrupt_return - -/* 0x0700 - Program Exception */ - START_EXCEPTION(0x0700, ProgramCheck) - EXCEPTION_PROLOG 0x700 ProgramCheck handle_dar_dsisr=1 - prepare_transfer_to_handler - bl program_check_exception - REST_NVGPRS(r1) - b interrupt_return - - EXCEPTION(0x0800, Trap_08, unknown_exception) - EXCEPTION(0x0900, Trap_09, unknown_exception) - EXCEPTION(0x0A00, Trap_0A, unknown_exception) - EXCEPTION(0x0B00, Trap_0B, unknown_exception) - -/* 0x0C00 - System Call Exception */ - START_EXCEPTION(0x0C00, SystemCall) - SYSCALL_ENTRY 0xc00 -/* Trap_0D is commented out to get more space for system call exception */ - -/* EXCEPTION(0x0D00, Trap_0D, unknown_exception) */ - EXCEPTION(0x0E00, Trap_0E, unknown_exception) - EXCEPTION(0x0F00, Trap_0F, unknown_exception) - -/* 0x1000 - Programmable Interval Timer (PIT) Exception */ - START_EXCEPTION(0x1000, DecrementerTrap) - b Decrementer - -/* 0x1010 - Fixed Interval Timer (FIT) Exception */ - START_EXCEPTION(0x1010, FITExceptionTrap) - b FITException - -/* 0x1020 - Watchdog Timer (WDT) Exception */ - START_EXCEPTION(0x1020, WDTExceptionTrap) - b WDTException - -/* 0x1100 - Data TLB Miss Exception - * As the name implies, translation is not in the MMU, so search the - * page tables and fix it. The only purpose of this function is to - * load TLB entries from the page table if they exist. - */ - START_EXCEPTION(0x1100, DTLBMiss) - mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r12 - mfspr r9, SPRN_PID - rlwimi r12, r9, 0, 0xff - mfspr r10, SPRN_DEAR /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -#ifdef CONFIG_PPC_KUAP - rlwinm. r9, r9, 0, 0xff - beq 5f /* Kuap fault */ -#endif -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ - beq 2f /* Bail if no table */ - - rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ - andc. r9, r9, r11 /* Check permission */ - bne 5f - - rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */ - and r9, r9, r11 /* hwwrite = dirty & w */ - rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */ - - /* Create TLB tag. This is the faulting address plus a static - * set of bits. These are size, valid, E, U0. - */ - li r9, 0x00c0 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r11, 2, 22, 24 - beq 5f - - /* Create TLB tag. This is the faulting address, plus a static - * set of bits (valid, E, U0) plus the size from the PMD. - */ - ori r9, r9, 0x40 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -5: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - b DataStorage - -/* 0x1200 - Instruction TLB Miss Exception - * Nearly the same as above, except we get our information from different - * registers and bailout to a different point. - */ - START_EXCEPTION(0x1200, ITLBMiss) - mtspr SPRN_SPRG_SCRATCH5, r10 /* Save some working registers */ - mtspr SPRN_SPRG_SCRATCH6, r11 - mtspr SPRN_SPRG_SCRATCH3, r12 - mtspr SPRN_SPRG_SCRATCH4, r9 - mfcr r12 - mfspr r9, SPRN_PID - rlwimi r12, r9, 0, 0xff - mfspr r10, SPRN_SRR0 /* Get faulting address */ - - /* If we are faulting a kernel address, we have to use the - * kernel page tables. - */ - lis r11, PAGE_OFFSET@h - cmplw r10, r11 - blt+ 3f - lis r11, swapper_pg_dir@h - ori r11, r11, swapper_pg_dir@l - li r9, 0 - mtspr SPRN_PID, r9 /* TLB will have 0 TID */ - b 4f - - /* Get the PGD for the current thread. - */ -3: - mfspr r11,SPRN_SPRG_THREAD - lwz r11,PGDIR(r11) -#ifdef CONFIG_PPC_KUAP - rlwinm. r9, r9, 0, 0xff - beq 5f /* Kuap fault */ -#endif -4: - tophys(r11, r11) - rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ - lwz r11, 0(r11) /* Get L1 entry */ - andi. r9, r11, _PMD_PRESENT /* Check if it points to a PTE page */ - beq 2f /* Bail if no table */ - - rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */ - lwz r11, 0(r11) /* Get Linux PTE */ - li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC - andc. r9, r9, r11 /* Check permission */ - bne 5f - - rlwinm r9, r11, 1, _PAGE_WRITE /* dirty => w */ - and r9, r9, r11 /* hwwrite = dirty & w */ - rlwimi r11, r9, 0, _PAGE_WRITE /* replace w by hwwrite */ - - /* Create TLB tag. This is the faulting address plus a static - * set of bits. These are size, valid, E, U0. - */ - li r9, 0x00c0 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -2: /* Check for possible large-page pmd entry */ - rlwinm. r9, r11, 2, 22, 24 - beq 5f - - /* Create TLB tag. This is the faulting address, plus a static - * set of bits (valid, E, U0) plus the size from the PMD. - */ - ori r9, r9, 0x40 - rlwimi r10, r9, 0, 20, 31 - - b finish_tlb_load - -5: - /* The bailout. Restore registers to pre-exception conditions - * and call the heavyweights to help us out. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - b InstructionAccess - - EXCEPTION(0x1300, Trap_13, unknown_exception) - EXCEPTION(0x1400, Trap_14, unknown_exception) - EXCEPTION(0x1500, Trap_15, unknown_exception) - EXCEPTION(0x1600, Trap_16, unknown_exception) - EXCEPTION(0x1700, Trap_17, unknown_exception) - EXCEPTION(0x1800, Trap_18, unknown_exception) - EXCEPTION(0x1900, Trap_19, unknown_exception) - EXCEPTION(0x1A00, Trap_1A, unknown_exception) - EXCEPTION(0x1B00, Trap_1B, unknown_exception) - EXCEPTION(0x1C00, Trap_1C, unknown_exception) - EXCEPTION(0x1D00, Trap_1D, unknown_exception) - EXCEPTION(0x1E00, Trap_1E, unknown_exception) - EXCEPTION(0x1F00, Trap_1F, unknown_exception) - -/* Check for a single step debug exception while in an exception - * handler before state has been saved. This is to catch the case - * where an instruction that we are trying to single step causes - * an exception (eg ITLB/DTLB miss) and thus the first instruction of - * the exception handler generates a single step debug exception. - * - * If we get a debug trap on the first instruction of an exception handler, - * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is - * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). - * The exception handler was handling a non-critical interrupt, so it will - * save (and later restore) the MSR via SPRN_SRR1, which will still have - * the MSR_DE bit set. - */ - /* 0x2000 - Debug Exception */ - START_EXCEPTION(0x2000, DebugTrap) - CRITICAL_EXCEPTION_PROLOG 0x2000 DebugTrap - - /* - * If this is a single step or branch-taken exception in an - * exception entry sequence, it was probably meant to apply to - * the code where the exception occurred (since exception entry - * doesn't turn off DE automatically). We simulate the effect - * of turning off DE on entry to an exception handler by turning - * off DE in the SRR3 value and clearing the debug status. - */ - mfspr r10,SPRN_DBSR /* check single-step/branch taken */ - andis. r10,r10,DBSR_IC@h - beq+ 2f - - andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */ - beq 1f /* branch and fix it up */ - - mfspr r10,SPRN_SRR2 /* Faulting instruction address */ - cmplwi r10,0x2100 - bgt+ 2f /* address above exception vectors */ - - /* here it looks like we got an inappropriate debug exception. */ -1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */ - lis r10,DBSR_IC@h /* clear the IC event */ - mtspr SPRN_DBSR,r10 - /* restore state and get out */ - lwz r10,_CCR(r11) - lwz r0,GPR0(r11) - lwz r1,GPR1(r11) - mtcrf 0x80,r10 - mtspr SPRN_SRR2,r12 - mtspr SPRN_SRR3,r9 - lwz r9,GPR9(r11) - lwz r12,GPR12(r11) - lwz r10,crit_r10@l(0) - lwz r11,crit_r11@l(0) - rfci - b . - - /* continue normal handling for a critical exception... */ -2: mfspr r4,SPRN_DBSR - stw r4,_ESR(r11) /* DebugException takes DBSR in _ESR */ - prepare_transfer_to_handler - bl DebugException - b ret_from_crit_exc - - /* Programmable Interval Timer (PIT) Exception. (from 0x1000) */ - __HEAD -Decrementer: - EXCEPTION_PROLOG 0x1000 Decrementer - lis r0,TSR_PIS@h - mtspr SPRN_TSR,r0 /* Clear the PIT exception */ - prepare_transfer_to_handler - bl timer_interrupt - b interrupt_return - - /* Fixed Interval Timer (FIT) Exception. (from 0x1010) */ - __HEAD -FITException: - EXCEPTION_PROLOG 0x1010 FITException - prepare_transfer_to_handler - bl unknown_exception - b interrupt_return - - /* Watchdog Timer (WDT) Exception. (from 0x1020) */ - __HEAD -WDTException: - CRITICAL_EXCEPTION_PROLOG 0x1020 WDTException - prepare_transfer_to_handler - bl WatchdogException - b ret_from_crit_exc - -/* Other PowerPC processors, namely those derived from the 6xx-series - * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. - * However, for the 4xx-series processors these are neither defined nor - * reserved. - */ - - __HEAD - /* Damn, I came up one instruction too many to fit into the - * exception space :-). Both the instruction and data TLB - * miss get to this point to load the TLB. - * r10 - TLB_TAG value - * r11 - Linux PTE - * r9 - available to use - * PID - loaded with proper value when we get here - * Upon exit, we reload everything and RFI. - * Actually, it will fit now, but oh well.....a common place - * to load the TLB. - */ -tlb_4xx_index: - .long 0 -finish_tlb_load: - /* - * Clear out the software-only bits in the PTE to generate the - * TLB_DATA value. These are the bottom 2 bits of the RPM, the - * 4 bits of the zone field, and M. - */ - li r9, 0x0cf2 - andc r11, r11, r9 - rlwimi r11, r10, 8, 24, 27 /* Copy 4 upper address bit into zone */ - - /* load the next available TLB index. */ - lwz r9, tlb_4xx_index@l(0) - addi r9, r9, 1 - andi. r9, r9, PPC40X_TLB_SIZE - 1 - stw r9, tlb_4xx_index@l(0) - - tlbwe r11, r9, TLB_DATA /* Load TLB LO */ - tlbwe r10, r9, TLB_TAG /* Load TLB HI */ - - /* Done...restore registers and get out of here. - */ - mtspr SPRN_PID, r12 - mtcrf 0x80, r12 - mfspr r9, SPRN_SPRG_SCRATCH4 - mfspr r12, SPRN_SPRG_SCRATCH3 - mfspr r11, SPRN_SPRG_SCRATCH6 - mfspr r10, SPRN_SPRG_SCRATCH5 - rfi /* Should sync shadow TLBs */ - b . /* prevent prefetch past rfi */ - -/* This is where the main kernel code starts. - */ -start_here: - - /* ptr to current */ - lis r2,init_task@h - ori r2,r2,init_task@l - - /* ptr to phys current thread */ - tophys(r4,r2) - addi r4,r4,THREAD /* init task's THREAD */ - mtspr SPRN_SPRG_THREAD,r4 - - /* stack */ - lis r1,init_thread_union@ha - addi r1,r1,init_thread_union@l - li r0,0 - stwu r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1) - - bl early_init /* We have to do this with MMU on */ - -/* - * Decide what sort of machine this is and initialize the MMU. - */ -#ifdef CONFIG_KASAN - bl kasan_early_init -#endif - li r3,0 - mr r4,r31 - bl machine_init - bl MMU_init - -/* Go back to running unmapped so we can load up new values - * and change to using our exception vectors. - * On the 4xx, all we have to do is invalidate the TLB to clear - * the old 16M byte TLB mappings. - */ - lis r4,2f@h - ori r4,r4,2f@l - tophys(r4,r4) - lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h - ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l - mtspr SPRN_SRR0,r4 - mtspr SPRN_SRR1,r3 - rfi - b . /* prevent prefetch past rfi */ - -/* Load up the kernel context */ -2: - sync /* Flush to memory before changing TLB */ - tlbia - isync /* Flush shadow TLBs */ - - /* set up the PTE pointers for the Abatron bdiGDB. - */ - lis r6, swapper_pg_dir@h - ori r6, r6, swapper_pg_dir@l - lis r5, abatron_pteptrs@h - ori r5, r5, abatron_pteptrs@l - stw r5, 0xf0(0) /* Must match your Abatron config file */ - tophys(r5,r5) - stw r6, 0(r5) - -/* Now turn on the MMU for real! */ - lis r4,MSR_KERNEL@h - ori r4,r4,MSR_KERNEL@l - lis r3,start_kernel@h - ori r3,r3,start_kernel@l - mtspr SPRN_SRR0,r3 - mtspr SPRN_SRR1,r4 - rfi /* enable MMU and jump to start_kernel */ - b . /* prevent prefetch past rfi */ - -/* Set up the initial MMU state so we can do the first level of - * kernel initialization. This maps the first 32 MBytes of memory 1:1 - * virtual to physical and more importantly sets the cache mode. - */ -SYM_FUNC_START_LOCAL(initial_mmu) - tlbia /* Invalidate all TLB entries */ - isync - - /* We should still be executing code at physical address 0x0000xxxx - * at this point. However, start_here is at virtual address - * 0xC000xxxx. So, set up a TLB mapping to cover this once - * translation is enabled. - */ - - lis r3,KERNELBASE@h /* Load the kernel virtual address */ - ori r3,r3,KERNELBASE@l - tophys(r4,r3) /* Load the kernel physical address */ - - iccci r0,r3 /* Invalidate the i-cache before use */ - - /* Load the kernel PID. - */ - li r0,0 - mtspr SPRN_PID,r0 - sync - - /* Configure and load one entry into TLB slots 63 */ - clrrwi r4,r4,10 /* Mask off the real page number */ - ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ - - clrrwi r3,r3,10 /* Mask off the effective page number */ - ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M)) - - li r0,63 /* TLB slot 63 */ - - tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ - tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ - - li r0,62 /* TLB slot 62 */ - addis r4,r4,SZ_16M@h - addis r3,r3,SZ_16M@h - tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ - tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ - - isync - - /* Establish the exception vector base - */ - lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */ - tophys(r0,r4) /* Use the physical address */ - mtspr SPRN_EVPR,r0 - - blr -SYM_FUNC_END(initial_mmu) - -_GLOBAL(abort) - mfspr r13,SPRN_DBCR0 - oris r13,r13,DBCR0_RST_SYSTEM@h - mtspr SPRN_DBCR0,r13 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 4690c219bfa4..63432a33ec49 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -647,8 +647,9 @@ __after_prom_start: * Note: This process overwrites the OF exception vectors. */ LOAD_REG_IMMEDIATE(r3, PAGE_OFFSET) - mr. r4,r26 /* In some cases the loader may */ - beq 9f /* have already put us at zero */ + mr r4,r26 /* Load the virtual source address into r4 */ + cmpld r3,r4 /* Check if source == dest */ + beq 9f /* If so skip the copy */ li r6,0x100 /* Start offset, the first 0x100 */ /* bytes were copied earlier. */ diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index 39724ff5ae1f..f9a73fae6464 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -294,9 +294,10 @@ set_ivor: /* Macros to hide the PTE size differences * * FIND_PTE -- walks the page tables given EA & pgdir pointer - * r10 -- EA of fault + * r10 -- free * r11 -- PGDIR pointer * r12 -- free + * r13 -- EA of fault * label 2: is the bailout case * * if we find the pte (fall through): @@ -307,34 +308,34 @@ set_ivor: #ifdef CONFIG_PTE_64BIT #ifdef CONFIG_HUGETLB_PAGE #define FIND_PTE \ - rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ - lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ + add r12, r11, r12; \ + lwz r11, 4(r12); /* Get pgd/pmd entry */ \ + rlwinm. r10, r11, 32 - _PAGE_PSIZE_SHIFT, 0x1e; /* get tsize*/ \ + bne 1000f; /* Huge page (leaf entry) */ \ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ - blt 1000f; /* Normal non-huge page */ \ beq 2f; /* Bail if no table */ \ - oris r11, r11, PD_HUGE@h; /* Put back address bit */ \ - andi. r10, r11, HUGEPD_SHIFT_MASK@l; /* extract size field */ \ - xor r12, r10, r11; /* drop size bits from pointer */ \ - b 1001f; \ -1000: rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ li r10, 0; /* clear r10 */ \ -1001: lwz r11, 4(r12); /* Get pte entry */ + lwz r11, 4(r12); /* Get pte entry */ \ +1000: #else #define FIND_PTE \ - rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ - lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm r12, r13, 14, 18, 28; /* Compute pgdir/pmd offset */ \ + add r12, r11, r12; \ + lwz r11, 4(r12); /* Get pgd/pmd entry */ \ rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ beq 2f; /* Bail if no table */ \ - rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + rlwimi r12, r13, 23, 20, 28; /* Compute pte address */ \ lwz r11, 4(r12); /* Get pte entry */ #endif /* HUGEPAGE */ #else /* !PTE_64BIT */ #define FIND_PTE \ - rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ + rlwimi r11, r13, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ lwz r11, 0(r11); /* Get L1 entry */ \ rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ beq 2f; /* Bail if no table */ \ - rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \ + rlwimi r12, r13, 22, 20, 29; /* Compute PTE address */ \ lwz r11, 0(r12); /* Get Linux PTE */ #endif @@ -441,13 +442,13 @@ START_BTB_FLUSH_SECTION BTB_FLUSH(r10) 1: END_BTB_FLUSH_SECTION - mfspr r10, SPRN_DEAR /* Get faulting address */ + mfspr r13, SPRN_DEAR /* Get faulting address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw 5, r10, r11 + cmplw 5, r13, r11 blt 5, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -470,29 +471,14 @@ END_BTB_FLUSH_SECTION #endif 4: - /* Mask of required permission bits. Note that while we - * do copy ESR:ST to _PAGE_WRITE position as trying to write - * to an RO page is pretty common, we don't do it with - * _PAGE_DIRTY. We could do it, but it's a fairly rare - * event so I'd rather take the overhead when it happens - * rather than adding an instruction here. We should measure - * whether the whole thing is worth it in the first place - * as we could avoid loading SPRN_ESR completely in the first - * place... - * - * TODO: Is it worth doing that mfspr & rlwimi in the first - * place or can we save a couple of instructions here ? - */ - mfspr r12,SPRN_ESR + FIND_PTE + #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT|_PAGE_BAP_SR oris r13,r13,_PAGE_ACCESSED@h #else li r13,_PAGE_PRESENT|_PAGE_READ|_PAGE_ACCESSED #endif - rlwimi r13,r12,11,29,29 - - FIND_PTE andc. r13,r13,r11 /* Check permission */ #ifdef CONFIG_PTE_64BIT @@ -549,13 +535,13 @@ START_BTB_FLUSH_SECTION 1: END_BTB_FLUSH_SECTION - mfspr r10, SPRN_SRR0 /* Get faulting address */ + mfspr r13, SPRN_SRR0 /* Get faulting address */ /* If we are faulting a kernel address, we have to use the * kernel page tables. */ lis r11, PAGE_OFFSET@h - cmplw 5, r10, r11 + cmplw 5, r13, r11 blt 5, 3f lis r11, swapper_pg_dir@h ori r11, r11, swapper_pg_dir@l @@ -564,6 +550,7 @@ END_BTB_FLUSH_SECTION rlwinm r12,r12,0,16,1 mtspr SPRN_MAS1,r12 + FIND_PTE /* Make up the required permissions for kernel code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_SX @@ -584,6 +571,7 @@ END_BTB_FLUSH_SECTION beq 2f /* KUAP fault */ #endif + FIND_PTE /* Make up the required permissions for user code */ #ifdef CONFIG_PTE_64BIT li r13,_PAGE_PRESENT | _PAGE_BAP_UX @@ -593,7 +581,6 @@ END_BTB_FLUSH_SECTION #endif 4: - FIND_PTE andc. r13,r13,r11 /* Check permission */ #ifdef CONFIG_PTE_64BIT @@ -746,17 +733,12 @@ finish_tlb_load: lwz r15, 0(r14) 100: stw r15, 0(r17) - /* - * Calc MAS1_TSIZE from r10 (which has pshift encoded) - * tlb_enc = (pshift - 10). - */ - subi r15, r10, 10 mfspr r16, SPRN_MAS1 - rlwimi r16, r15, 7, 20, 24 + rlwimi r16, r10, MAS1_TSIZE_SHIFT, MAS1_TSIZE_MASK mtspr SPRN_MAS1, r16 /* copy the pshift for use later */ - mr r14, r10 + addi r14, r10, _PAGE_PSIZE_SHIFT_OFFSET /* fall through */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 647b0b445e89..56c5ebe21b99 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -40,16 +40,6 @@ #include "head_32.h" -.macro compare_to_kernel_boundary scratch, addr -#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000 -/* By simply checking Address >= 0x80000000, we know if its a kernel address */ - not. \scratch, \addr -#else - rlwinm \scratch, \addr, 16, 0xfff8 - cmpli cr0, \scratch, PAGE_OFFSET@h -#endif -.endm - #define PAGE_SHIFT_512K 19 #define PAGE_SHIFT_8M 23 @@ -199,18 +189,7 @@ instruction_counter: mfspr r10, SPRN_SRR0 /* Get effective address of fault */ INVALIDATE_ADJACENT_PAGES_CPU15(r10, r11) mtspr SPRN_MD_EPN, r10 -#ifdef CONFIG_MODULES - mfcr r11 - compare_to_kernel_boundary r10, r10 -#endif mfspr r10, SPRN_M_TWB /* Get level 1 table */ -#ifdef CONFIG_MODULES - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 -#endif lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 mfspr r10, SPRN_MD_TWC @@ -248,19 +227,12 @@ instruction_counter: START_EXCEPTION(INTERRUPT_DATA_TLB_MISS_8xx, DataStoreTLBMiss) mtspr SPRN_SPRG_SCRATCH2, r10 mtspr SPRN_M_TW, r11 - mfcr r11 /* If we are faulting a kernel address, we have to use the * kernel page tables. */ mfspr r10, SPRN_MD_EPN - compare_to_kernel_boundary r10, r10 mfspr r10, SPRN_M_TWB /* Get level 1 table */ - blt+ 3f - rlwinm r10, r10, 0, 20, 31 - oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha -3: - mtcr r11 lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */ mtspr SPRN_MD_TWC, r11 @@ -332,15 +304,19 @@ instruction_counter: cmpwi cr1, r11, RPN_PATTERN beq- cr1, FixupDAR /* must be a buggy dcbX, icbi insn. */ DARFixed:/* Return from dcbx instruction bug workaround */ + mfspr r11, SPRN_DSISR + rlwinm r11, r11, 0, DSISR_NOHPTE + cmpwi cr1, r11, 0 + beq+ cr1, .Ldtlbie + mfspr r11, SPRN_DAR + tlbie r11 + rlwinm r11, r11, 16, 0xffff + cmplwi cr1, r11, TASK_SIZE@h + bge- cr1, FixupPGD +.Ldtlbie: EXCEPTION_PROLOG_1 /* 0x300 is DataAccess exception, needed by bad_page_fault() */ EXCEPTION_PROLOG_2 INTERRUPT_DATA_STORAGE DataTLBError handle_dar_dsisr=1 - lwz r4, _DAR(r11) - lwz r5, _DSISR(r11) - andis. r10,r5,DSISR_NOHPTE@h - beq+ .Ldtlbie - tlbie r4 -.Ldtlbie: prepare_transfer_to_handler bl do_page_fault b interrupt_return @@ -394,6 +370,30 @@ DARFixed:/* Return from dcbx instruction bug workaround */ __HEAD . = 0x2000 +FixupPGD: + mtspr SPRN_M_TW, r10 + mfspr r10, SPRN_DAR + mtspr SPRN_MD_EPN, r10 + mfspr r11, SPRN_M_TWB /* Get level 1 table */ + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + bne cr1, 1f + + rlwinm r10, r11, 0, 20, 31 + oris r10, r10, (swapper_pg_dir - PAGE_OFFSET)@ha + lwz r10, (swapper_pg_dir - PAGE_OFFSET)@l(r10) /* Get the level 1 entry */ + cmpwi cr1, r10, 0 + beq cr1, 1f + stw r10, (swapper_pg_dir - PAGE_OFFSET)@l(r11) /* Set the level 1 entry */ + mfspr r10, SPRN_M_TW + mtcr r10 + mfspr r10, SPRN_SPRG_SCRATCH0 + mfspr r11, SPRN_SPRG_SCRATCH1 + rfi +1: + mfspr r10, SPRN_M_TW + b .Ldtlbie + /* This is the procedure to calculate the data EA for buggy dcbx,dcbi instructions * by decoding the registers used by the dcbx instruction and adding them. * DAR is set to the calculated address. @@ -404,7 +404,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ mfspr r10, SPRN_SRR0 mtspr SPRN_MD_EPN, r10 rlwinm r11, r10, 16, 0xfff8 - cmpli cr1, r11, PAGE_OFFSET@h + cmpli cr1, r11, TASK_SIZE@h mfspr r11, SPRN_M_TWB /* Get level 1 table */ blt+ cr1, 3f @@ -415,14 +415,13 @@ FixupDAR:/* Entry point for dcbx workaround. */ oris r11, r11, (swapper_pg_dir - PAGE_OFFSET)@ha 3: lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */ + rlwinm r11, r11, 0, ~_PMD_PAGE_8M mtspr SPRN_MD_TWC, r11 - mtcrf 0x01, r11 mfspr r11, SPRN_MD_TWC lwz r11, 0(r11) /* Get the pte */ - bt 28,200f /* bit 28 = Large page (8M) */ /* concat physical page address(r11) and page offset(r10) */ rlwimi r11, r10, 0, 32 - PAGE_SHIFT, 31 -201: lwz r11,0(r11) + lwz r11,0(r11) /* Check if it really is a dcbx instruction. */ /* dcbt and dcbtst does not generate DTLB Misses/Errors, * no need to include them here */ @@ -441,11 +440,6 @@ FixupDAR:/* Entry point for dcbx workaround. */ 141: mfspr r10,SPRN_M_TW b DARFixed /* Nope, go back to normal TLB processing */ -200: - /* concat physical page address(r11) and page offset(r10) */ - rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31 - b 201b - 144: mfspr r10, SPRN_DSISR rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */ mtspr SPRN_DSISR, r10 @@ -500,6 +494,7 @@ FixupDAR:/* Entry point for dcbx workaround. */ bctr /* jump into table */ 152: mfdar r11 + mtdar r10 mtctr r11 /* restore ctr reg from DAR */ mfspr r11, SPRN_SPRG_THREAD stw r10, DAR(r11) @@ -593,6 +588,10 @@ start_here: lis r0, (MD_TWAM | MD_RSV4I)@h mtspr SPRN_MD_CTR, r0 #endif +#ifndef CONFIG_PIN_TLB_TEXT + li r0, 0 + mtspr SPRN_MI_CTR, r0 +#endif #if !defined(CONFIG_PIN_TLB_DATA) && !defined(CONFIG_PIN_TLB_IMMR) lis r0, MD_TWAM@h mtspr SPRN_MD_CTR, r0 @@ -689,6 +688,7 @@ SYM_FUNC_START_LOCAL(initial_mmu) blr SYM_FUNC_END(initial_mmu) +#ifdef CONFIG_PIN_TLB _GLOBAL(mmu_pin_tlb) lis r9, (1f - PAGE_OFFSET)@h ori r9, r9, (1f - PAGE_OFFSET)@l @@ -710,6 +710,7 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_MD_CTR, r6 tlbia +#ifdef CONFIG_PIN_TLB_TEXT LOAD_REG_IMMEDIATE(r5, 28 << 8) LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET) LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED) @@ -730,6 +731,7 @@ _GLOBAL(mmu_pin_tlb) bdnzt lt, 2b lis r0, MI_RSV4I@h mtspr SPRN_MI_CTR, r0 +#endif LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM) #ifdef CONFIG_PIN_TLB_DATA @@ -789,3 +791,4 @@ _GLOBAL(mmu_pin_tlb) mtspr SPRN_SRR1, r10 mtspr SPRN_SRR0, r11 rfi +#endif diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c1d89764dd22..cb2bca76be53 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -411,39 +411,26 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE) */ . = INTERRUPT_INST_TLB_MISS_603 InstructionTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_IMISS -#ifdef CONFIG_MODULES - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 -#endif + mfspr r0,SPRN_IMISS mfspr r2, SPRN_SDR1 li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC rlwinm r2, r2, 28, 0xfffff000 -#ifdef CONFIG_MODULES - li r0, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -#endif -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ +#ifdef CONFIG_EXECMEM + rlwinm r3, r0, 4, 0xf + subi r3, r3, (TASK_SIZE >> 28) & 0xf +#endif rlwinm. r2,r2,0,0,19 /* extract address of pte page */ beq- InstructionAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ andc. r1,r1,r2 /* check access & ~permission */ bne- InstructionAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ -#ifdef CONFIG_MODULES - rlwimi r2, r0, 0, 31, 31 /* userspace ? -> PP lsb */ +#ifdef CONFIG_EXECMEM + rlwimi r2, r3, 1, 31, 31 /* userspace ? -> PP lsb */ #endif ori r1, r1, 0xe06 /* clear out reserved bits */ andc r1, r2, r1 /* PP = user? 1 : 0 */ @@ -451,7 +438,7 @@ BEGIN_FTR_SECTION rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 - tlbli r3 + tlbli r0 mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r3 rfi @@ -480,35 +467,24 @@ InstructionAddressInvalid: */ . = INTERRUPT_DATA_LOAD_TLB_MISS_603 DataLoadTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + mfspr r0,SPRN_DMISS mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ - rlwinm r2, r2, 28, 0xfffff000 - li r0, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_READ andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ rlwinm r1,r2,32-9,30,30 /* _PAGE_WRITE -> PP msb */ - rlwimi r2,r0,0,30,31 /* userspace ? -> PP */ + rlwimi r2,r3,2,30,31 /* userspace ? -> PP */ rlwimi r1,r2,32-3,24,24 /* _PAGE_WRITE -> _PAGE_DIRTY */ xori r1,r1,_PAGE_DIRTY /* clear dirty when not rw */ ori r1,r1,0xe04 /* clear out reserved bits */ @@ -518,25 +494,35 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mtspr SPRN_RPA,r1 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) + +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + b 1b DataAddressInvalid: mfspr r3,SPRN_SRR1 rlwinm r1,r3,9,6,6 /* Get load/store bit */ @@ -560,34 +546,24 @@ DataAddressInvalid: */ . = INTERRUPT_DATA_STORE_TLB_MISS_603 DataStoreTLBMiss: -/* - * r0: userspace flag (later scratch) - * r1: linux style pte ( later becomes ppc hardware pte ) - * r2: ptr to linux-style pte - * r3: fault address - */ /* Get PTE (linux-style) and check access */ - mfspr r3,SPRN_DMISS - lis r1, TASK_SIZE@h /* check if kernel address */ - cmplw 0,r1,r3 + mfspr r0,SPRN_DMISS mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED - rlwinm r2, r2, 28, 0xfffff000 - li r0, 3 - bgt- 112f - lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ - li r0, 0 - addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ -112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ - lwz r2,0(r2) /* get pmd entry */ + rlwinm r1, r2, 28, 0xfffff000 + rlwimi r1,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r1) /* get pmd entry */ + rlwinm r3, r0, 4, 0xf rlwinm. r2,r2,0,0,19 /* extract address of pte page */ - beq- DataAddressInvalid /* return if no mapping */ - rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + subi r3, r3, (TASK_SIZE >> 28) & 0xf + beq- 2f /* bail if no mapping */ +1: + rlwimi r2,r0,22,20,29 /* insert next 10 bits of address */ lwz r2,0(r2) /* get linux-style pte */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED andc. r1,r1,r2 /* check access & ~permission */ bne- DataAddressInvalid /* return if access not permitted */ /* Convert linux-style PTE to low word of PPC-style PTE */ - rlwimi r2,r0,0,31,31 /* userspace ? -> PP lsb */ + rlwimi r2,r3,1,31,31 /* userspace ? -> PP lsb */ li r1,0xe06 /* clear out reserved bits & PP msb */ andc r1,r2,r1 /* PP = user? 1: 0 */ BEGIN_FTR_SECTION @@ -597,26 +573,36 @@ END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT) mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 BEGIN_MMU_FTR_SECTION - li r0,1 + li r3,1 mfspr r1,SPRN_SPRG_603_LRU - rlwinm r2,r3,20,27,31 /* Get Address bits 15:19 */ - slw r0,r0,r2 - xor r1,r0,r1 - srw r0,r1,r2 + rlwinm r2,r0,20,27,31 /* Get Address bits 15:19 */ + slw r3,r3,r2 + xor r1,r3,r1 + srw r3,r1,r2 mtspr SPRN_SPRG_603_LRU,r1 mfspr r2,SPRN_SRR1 - rlwimi r2,r0,31-14,14,14 + rlwimi r2,r3,31-14,14,14 mtspr SPRN_SRR1,r2 mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi MMU_FTR_SECTION_ELSE mfspr r2,SPRN_SRR1 /* Need to restore CR0 */ mtcrf 0x80,r2 - tlbld r3 + tlbld r0 rfi ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) +2: lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha + addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ + rlwimi r2,r0,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + cmpwi cr0,r2,0 + beq- DataAddressInvalid /* return if no mapping */ + stw r2,0(r1) + rlwinm r2,r2,0,0,19 /* extract address of pte page */ + b 1b + #ifndef CONFIG_ALTIVEC #define altivec_assist_exception unknown_exception #endif diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index b6b5b01a173c..0b5c1993809e 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -145,10 +145,9 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV) b transfer_to_syscall /* jump to handler */ .endm -/* To handle the additional exception priority levels on 40x and Book-E +/* To handle the additional exception priority levels on Book-E * processors we allocate a stack per additional priority level. * - * On 40x critical is the only additional level * On 44x/e500 we have critical and machine check * * Additionally we reserve a SPRG for each priority level so we can free up a diff --git a/arch/powerpc/kernel/idle.c b/arch/powerpc/kernel/idle.c index 30b56c67fa61..e527cd3ef128 100644 --- a/arch/powerpc/kernel/idle.c +++ b/arch/powerpc/kernel/idle.c @@ -97,7 +97,7 @@ void power4_idle(void) /* * Register the sysctl to set/clear powersave_nap. */ -static struct ctl_table powersave_nap_ctl_table[] = { +static const struct ctl_table powersave_nap_ctl_table[] = { { .procname = "powersave-nap", .data = &powersave_nap, diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index eca293794a1e..8f4acc55407b 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -185,7 +185,7 @@ again: ti_flags = read_thread_flags(); while (unlikely(ti_flags & (_TIF_USER_WORK_MASK & ~_TIF_RESTORE_TM))) { local_irq_enable(); - if (ti_flags & _TIF_NEED_RESCHED) { + if (ti_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY)) { schedule(); } else { /* @@ -266,7 +266,7 @@ notrace unsigned long syscall_exit_prepare(unsigned long r3, unsigned long ret = 0; bool is_not_scv = !IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !scv; - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap_assert_locked(); @@ -344,7 +344,7 @@ notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs) BUG_ON(regs_is_unrecoverable(regs)); BUG_ON(arch_irq_disabled_regs(regs)); - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); /* * We don't need to restore AMR on the way back to userspace for KUAP. @@ -386,7 +386,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_PPC_BOOK3E_64) && TRAP(regs) != INTERRUPT_PROGRAM && TRAP(regs) != INTERRUPT_PERFMON) - CT_WARN_ON(ct_state() == CONTEXT_USER); + CT_WARN_ON(ct_state() == CT_STATE_USER); kuap = kuap_get_and_assert_locked(); @@ -396,7 +396,7 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) /* Returning to a kernel context with local irqs enabled. */ WARN_ON_ONCE(!(regs->msr & MSR_EE)); again: - if (IS_ENABLED(CONFIG_PREEMPT)) { + if (IS_ENABLED(CONFIG_PREEMPTION)) { /* Return to preemptible kernel context */ if (unlikely(read_thread_flags() & _TIF_NEED_RESCHED)) { if (preempt_count() == 0) diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c deleted file mode 100644 index c877f074d174..000000000000 --- a/arch/powerpc/kernel/io-workarounds.c +++ /dev/null @@ -1,197 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Support PCI IO workaround - * - * Copyright (C) 2006 Benjamin Herrenschmidt <benh@kernel.crashing.org> - * IBM, Corp. - * (C) Copyright 2007-2008 TOSHIBA CORPORATION - */ -#undef DEBUG - -#include <linux/kernel.h> -#include <linux/sched/mm.h> /* for init_mm */ -#include <linux/pgtable.h> - -#include <asm/io.h> -#include <asm/machdep.h> -#include <asm/ppc-pci.h> -#include <asm/io-workarounds.h> -#include <asm/pte-walk.h> - - -#define IOWA_MAX_BUS 8 - -static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; -static unsigned int iowa_bus_count; - -static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) -{ - int i, j; - struct resource *res; - unsigned long vstart, vend; - - for (i = 0; i < iowa_bus_count; i++) { - struct iowa_bus *bus = &iowa_busses[i]; - struct pci_controller *phb = bus->phb; - - if (vaddr) { - vstart = (unsigned long)phb->io_base_virt; - vend = vstart + phb->pci_io_size - 1; - if ((vaddr >= vstart) && (vaddr <= vend)) - return bus; - } - - if (paddr) - for (j = 0; j < 3; j++) { - res = &phb->mem_resources[j]; - if (paddr >= res->start && paddr <= res->end) - return bus; - } - } - - return NULL; -} - -#ifdef CONFIG_PPC_INDIRECT_MMIO -struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) -{ - struct iowa_bus *bus; - int token; - - token = PCI_GET_ADDR_TOKEN(addr); - - if (token && token <= iowa_bus_count) - bus = &iowa_busses[token - 1]; - else { - unsigned long vaddr, paddr; - - vaddr = (unsigned long)PCI_FIX_ADDR(addr); - if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) - return NULL; - - paddr = ppc_find_vmap_phys(vaddr); - - bus = iowa_pci_find(vaddr, paddr); - - if (bus == NULL) - return NULL; - } - - return bus; -} -#else /* CONFIG_PPC_INDIRECT_MMIO */ -struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) -{ - return NULL; -} -#endif /* !CONFIG_PPC_INDIRECT_MMIO */ - -#ifdef CONFIG_PPC_INDIRECT_PIO -struct iowa_bus *iowa_pio_find_bus(unsigned long port) -{ - unsigned long vaddr = (unsigned long)pci_io_base + port; - return iowa_pci_find(vaddr, 0); -} -#else -struct iowa_bus *iowa_pio_find_bus(unsigned long port) -{ - return NULL; -} -#endif - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ -static ret iowa_##name at \ -{ \ - struct iowa_bus *bus; \ - bus = iowa_##space##_find_bus(aa); \ - if (bus && bus->ops && bus->ops->name) \ - return bus->ops->name al; \ - return __do_##name al; \ -} - -#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ -static void iowa_##name at \ -{ \ - struct iowa_bus *bus; \ - bus = iowa_##space##_find_bus(aa); \ - if (bus && bus->ops && bus->ops->name) { \ - bus->ops->name al; \ - return; \ - } \ - __do_##name al; \ -} - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -static const struct ppc_pci_io iowa_pci_io = { - -#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, -#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, - -#include <asm/io-defs.h> - -#undef DEF_PCI_AC_RET -#undef DEF_PCI_AC_NORET - -}; - -#ifdef CONFIG_PPC_INDIRECT_MMIO -void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size, - pgprot_t prot, void *caller) -{ - struct iowa_bus *bus; - void __iomem *res = __ioremap_caller(addr, size, prot, caller); - int busno; - - bus = iowa_pci_find(0, (unsigned long)addr); - if (bus != NULL) { - busno = bus - iowa_busses; - PCI_SET_ADDR_TOKEN(res, busno + 1); - } - return res; -} -#endif /* !CONFIG_PPC_INDIRECT_MMIO */ - -bool io_workaround_inited; - -/* Enable IO workaround */ -static void io_workaround_init(void) -{ - if (io_workaround_inited) - return; - ppc_pci_io = iowa_pci_io; - io_workaround_inited = true; -} - -/* Register new bus to support workaround */ -void iowa_register_bus(struct pci_controller *phb, struct ppc_pci_io *ops, - int (*initfunc)(struct iowa_bus *, void *), void *data) -{ - struct iowa_bus *bus; - struct device_node *np = phb->dn; - - io_workaround_init(); - - if (iowa_bus_count >= IOWA_MAX_BUS) { - pr_err("IOWA:Too many pci bridges, " - "workarounds disabled for %pOF\n", np); - return; - } - - bus = &iowa_busses[iowa_bus_count]; - bus->phb = phb; - bus->ops = ops; - bus->private = data; - - if (initfunc) - if ((*initfunc)(bus, data)) - return; - - iowa_bus_count++; - - pr_debug("IOWA:[%d]Add bus, %pOF.\n", iowa_bus_count-1, np); -} - diff --git a/arch/powerpc/kernel/io.c b/arch/powerpc/kernel/io.c index 6af535905984..bcc201c01514 100644 --- a/arch/powerpc/kernel/io.c +++ b/arch/powerpc/kernel/io.c @@ -31,13 +31,14 @@ void _insb(const volatile u8 __iomem *port, void *buf, long count) if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { tmp = *(const volatile u8 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } EXPORT_SYMBOL(_insb); @@ -47,75 +48,80 @@ void _outsb(volatile u8 __iomem *port, const void *buf, long count) if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { *(volatile u8 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } EXPORT_SYMBOL(_outsb); -void _insw_ns(const volatile u16 __iomem *port, void *buf, long count) +void _insw(const volatile u16 __iomem *port, void *buf, long count) { u16 *tbuf = buf; u16 tmp; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { tmp = *(const volatile u16 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } -EXPORT_SYMBOL(_insw_ns); +EXPORT_SYMBOL(_insw); -void _outsw_ns(volatile u16 __iomem *port, const void *buf, long count) +void _outsw(volatile u16 __iomem *port, const void *buf, long count) { const u16 *tbuf = buf; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { *(volatile u16 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } -EXPORT_SYMBOL(_outsw_ns); +EXPORT_SYMBOL(_outsw); -void _insl_ns(const volatile u32 __iomem *port, void *buf, long count) +void _insl(const volatile u32 __iomem *port, void *buf, long count) { u32 *tbuf = buf; u32 tmp; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { tmp = *(const volatile u32 __force *)port; eieio(); *tbuf++ = tmp; } while (--count != 0); - asm volatile("twi 0,%0,0; isync" : : "r" (tmp)); + data_barrier(tmp); } -EXPORT_SYMBOL(_insl_ns); +EXPORT_SYMBOL(_insl); -void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count) +void _outsl(volatile u32 __iomem *port, const void *buf, long count) { const u32 *tbuf = buf; if (unlikely(count <= 0)) return; - asm volatile("sync"); + + mb(); do { *(volatile u32 __force *)port = *tbuf++; } while (--count != 0); - asm volatile("sync"); + mb(); } -EXPORT_SYMBOL(_outsl_ns); +EXPORT_SYMBOL(_outsl); #define IO_CHECK_ALIGN(v,a) ((((unsigned long)(v)) & ((a) - 1)) == 0) @@ -127,7 +133,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n) lc |= lc << 8; lc |= lc << 16; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && !IO_CHECK_ALIGN(p, 4)) { *((volatile u8 *)p) = c; p++; @@ -143,7 +149,7 @@ _memset_io(volatile void __iomem *addr, int c, unsigned long n) p++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memset_io); @@ -152,7 +158,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, { void *vsrc = (void __force *) src; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && (!IO_CHECK_ALIGN(vsrc, 4) || !IO_CHECK_ALIGN(dest, 4))) { *((u8 *)dest) = *((volatile u8 *)vsrc); eieio(); @@ -174,7 +180,7 @@ void _memcpy_fromio(void *dest, const volatile void __iomem *src, dest++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memcpy_fromio); @@ -182,7 +188,7 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) { void *vdest = (void __force *) dest; - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); while(n && (!IO_CHECK_ALIGN(vdest, 4) || !IO_CHECK_ALIGN(src, 4))) { *((volatile u8 *)vdest) = *((u8 *)src); src++; @@ -201,6 +207,6 @@ void _memcpy_toio(volatile void __iomem *dest, const void *src, unsigned long n) vdest++; n--; } - __asm__ __volatile__ ("sync" : : : "memory"); + mb(); } EXPORT_SYMBOL(_memcpy_toio); diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 29a8c8e18585..0ebae6e4c19d 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -26,6 +26,7 @@ #include <linux/iommu.h> #include <linux/sched.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <asm/io.h> #include <asm/iommu.h> #include <asm/pci-bridge.h> @@ -642,7 +643,7 @@ void ppc_iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist, tbl->it_ops->flush(tbl); } -static void iommu_table_clear(struct iommu_table *tbl) +void iommu_table_clear(struct iommu_table *tbl) { /* * In case of firmware assisted dump system goes through clean @@ -683,10 +684,10 @@ static void iommu_table_clear(struct iommu_table *tbl) #endif } -static void iommu_table_reserve_pages(struct iommu_table *tbl, +void iommu_table_reserve_pages(struct iommu_table *tbl, unsigned long res_start, unsigned long res_end) { - int i; + unsigned long i; WARN_ON_ONCE(res_end < res_start); /* @@ -987,6 +988,23 @@ unsigned long iommu_direction_to_tce_perm(enum dma_data_direction dir) EXPORT_SYMBOL_GPL(iommu_direction_to_tce_perm); #ifdef CONFIG_IOMMU_API + +int dev_has_iommu_table(struct device *dev, void *data) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct pci_dev **ppdev = data; + + if (!dev) + return 0; + + if (device_iommu_mapped(dev)) { + *ppdev = pdev; + return 1; + } + + return 0; +} + /* * SPAPR TCE API */ @@ -1101,59 +1119,6 @@ void iommu_tce_kill(struct iommu_table *tbl, } EXPORT_SYMBOL_GPL(iommu_tce_kill); -#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) -static int iommu_take_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - int ret = 0; - - /* - * VFIO does not control TCE entries allocation and the guest - * can write new TCEs on top of existing ones so iommu_tce_build() - * must be able to release old pages. This functionality - * requires exchange() callback defined so if it is not - * implemented, we disallow taking ownership over the table. - */ - if (!tbl->it_ops->xchg_no_kill) - return -EINVAL; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - if (iommu_table_in_use(tbl)) { - pr_err("iommu_tce: it_map is not empty"); - ret = -EBUSY; - } else { - memset(tbl->it_map, 0xff, sz); - } - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); - - return ret; -} - -static void iommu_release_ownership(struct iommu_table *tbl) -{ - unsigned long flags, i, sz = (tbl->it_size + 7) >> 3; - - spin_lock_irqsave(&tbl->large_pool.lock, flags); - for (i = 0; i < tbl->nr_pools; i++) - spin_lock_nest_lock(&tbl->pools[i].lock, &tbl->large_pool.lock); - - memset(tbl->it_map, 0, sz); - - iommu_table_reserve_pages(tbl, tbl->it_reserved_start, - tbl->it_reserved_end); - - for (i = 0; i < tbl->nr_pools; i++) - spin_unlock(&tbl->pools[i].lock); - spin_unlock_irqrestore(&tbl->large_pool.lock, flags); -} -#endif - int iommu_add_device(struct iommu_table_group *table_group, struct device *dev) { /* @@ -1186,98 +1151,6 @@ EXPORT_SYMBOL_GPL(iommu_add_device); #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* - * A simple iommu_table_group_ops which only allows reusing the existing - * iommu_table. This handles VFIO for POWER7 or the nested KVM. - * The ops does not allow creating windows and only allows reusing the existing - * one if it matches table_group->tce32_start/tce32_size/page_shift. - */ -static unsigned long spapr_tce_get_table_size(__u32 page_shift, - __u64 window_size, __u32 levels) -{ - unsigned long size; - - if (levels > 1) - return ~0U; - size = window_size >> (page_shift - 3); - return size; -} - -static long spapr_tce_create_table(struct iommu_table_group *table_group, int num, - __u32 page_shift, __u64 window_size, __u32 levels, - struct iommu_table **ptbl) -{ - struct iommu_table *tbl = table_group->tables[0]; - - if (num > 0) - return -EPERM; - - if (tbl->it_page_shift != page_shift || - tbl->it_size != (window_size >> page_shift) || - tbl->it_indirect_levels != levels - 1) - return -EINVAL; - - *ptbl = iommu_tce_table_get(tbl); - return 0; -} - -static long spapr_tce_set_window(struct iommu_table_group *table_group, - int num, struct iommu_table *tbl) -{ - return tbl == table_group->tables[num] ? 0 : -EPERM; -} - -static long spapr_tce_unset_window(struct iommu_table_group *table_group, int num) -{ - return 0; -} - -static long spapr_tce_take_ownership(struct iommu_table_group *table_group) -{ - int i, j, rc = 0; - - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; - - if (!tbl || !tbl->it_map) - continue; - - rc = iommu_take_ownership(tbl); - if (!rc) - continue; - - for (j = 0; j < i; ++j) - iommu_release_ownership(table_group->tables[j]); - return rc; - } - return 0; -} - -static void spapr_tce_release_ownership(struct iommu_table_group *table_group) -{ - int i; - - for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { - struct iommu_table *tbl = table_group->tables[i]; - - if (!tbl) - continue; - - iommu_table_clear(tbl); - if (tbl->it_map) - iommu_release_ownership(tbl); - } -} - -struct iommu_table_group_ops spapr_tce_table_group_ops = { - .get_table_size = spapr_tce_get_table_size, - .create_table = spapr_tce_create_table, - .set_window = spapr_tce_set_window, - .unset_window = spapr_tce_unset_window, - .take_ownership = spapr_tce_take_ownership, - .release_ownership = spapr_tce_release_ownership, -}; - -/* * A simple iommu_ops to allow less cruft in generic VFIO code. */ static int @@ -1298,7 +1171,7 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, * The domain being set to PLATFORM from earlier * BLOCKED. The table_group ownership has to be released. */ - table_group->ops->release_ownership(table_group); + table_group->ops->release_ownership(table_group, dev); iommu_group_put(grp); return 0; @@ -1326,7 +1199,7 @@ spapr_tce_blocked_iommu_attach_dev(struct iommu_domain *platform_domain, * also sets the dma_api ops */ table_group = iommu_group_get_iommudata(grp); - ret = table_group->ops->take_ownership(table_group); + ret = table_group->ops->take_ownership(table_group, dev); iommu_group_put(grp); return ret; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 7504ceec5c58..a0e8b998c9b5 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -89,69 +89,69 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized) { - seq_printf(p, "%*s: ", prec, "TAU"); + seq_printf(p, "%*s:", prec, "TAU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); + seq_put_decimal_ull_width(p, " ", tau_interrupts(j), 10); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10); seq_printf(p, " Local timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "BCT"); + seq_printf(p, "%*s:", prec, "BCT"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10); seq_printf(p, " Broadcast timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10); seq_printf(p, " Local timer interrupts for others\n"); - seq_printf(p, "%*s: ", prec, "SPU"); + seq_printf(p, "%*s:", prec, "SPU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "PMI"); + seq_printf(p, "%*s:", prec, "PMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "MCE"); + seq_printf(p, "%*s:", prec, "MCE"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10); seq_printf(p, " Machine check exceptions\n"); #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { - seq_printf(p, "%*s: ", prec, "HMI"); + seq_printf(p, "%*s:", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); + seq_put_decimal_ull_width(p, " ", paca_ptrs[j]->hmi_irqs, 10); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } #endif - seq_printf(p, "%*s: ", prec, "NMI"); + seq_printf(p, "%*s:", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10); seq_printf(p, " System Reset interrupts\n"); #ifdef CONFIG_PPC_WATCHDOG - seq_printf(p, "%*s: ", prec, "WDG"); + seq_printf(p, "%*s:", prec, "WDG"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10); seq_printf(p, " Watchdog soft-NMI interrupts\n"); #endif #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { - seq_printf(p, "%*s: ", prec, "DBL"); + seq_printf(p, "%*s:", prec, "DBL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10); seq_printf(p, " Doorbell interrupts\n"); } #endif @@ -333,7 +333,7 @@ void __init init_IRQ(void) static_call_update(ppc_get_irq, ppc_md.get_irq); } -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void *critirq_ctx[NR_CPUS] __read_mostly; void *dbgirq_ctx[NR_CPUS] __read_mostly; void *mcheckirq_ctx[NR_CPUS] __read_mostly; diff --git a/arch/powerpc/kernel/jump_label.c b/arch/powerpc/kernel/jump_label.c index 5277cf582c16..2659e1ac8604 100644 --- a/arch/powerpc/kernel/jump_label.c +++ b/arch/powerpc/kernel/jump_label.c @@ -5,7 +5,7 @@ #include <linux/kernel.h> #include <linux/jump_label.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/inst.h> void arch_jump_label_transform(struct jump_entry *entry, diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c index ebe4d1645ca1..5081334b7bd2 100644 --- a/arch/powerpc/kernel/kgdb.c +++ b/arch/powerpc/kernel/kgdb.c @@ -21,7 +21,7 @@ #include <asm/processor.h> #include <asm/machdep.h> #include <asm/debug.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/slab.h> #include <asm/inst.h> @@ -45,7 +45,7 @@ static struct hard_trap_info { 0x0800, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x0900, 0x0e /* SIGALRM */ }, /* decrementer */ { 0x0c00, 0x14 /* SIGCHLD */ }, /* system call */ -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE { 0x2002, 0x05 /* SIGTRAP */ }, /* debug */ #if defined(CONFIG_PPC_85xx) { 0x2010, 0x08 /* SIGFPE */ }, /* spe unavailable */ @@ -64,7 +64,7 @@ static struct hard_trap_info { 0x2010, 0x08 /* SIGFPE */ }, /* fp unavailable */ { 0x2020, 0x08 /* SIGFPE */ }, /* ap unavailable */ #endif -#else /* !CONFIG_BOOKE_OR_40x */ +#else /* !CONFIG_BOOKE */ { 0x0d00, 0x05 /* SIGTRAP */ }, /* single-step */ #if defined(CONFIG_PPC_8xx) { 0x1000, 0x04 /* SIGILL */ }, /* software emulation */ diff --git a/arch/powerpc/kernel/kprobes-ftrace.c b/arch/powerpc/kernel/kprobes-ftrace.c index 072ebe7f290b..f8208c027148 100644 --- a/arch/powerpc/kernel/kprobes-ftrace.c +++ b/arch/powerpc/kernel/kprobes-ftrace.c @@ -21,6 +21,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip, struct pt_regs *regs; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(nip, parent_nip); if (bit < 0) return; diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bbca90a5e2ec..c0d9f12cb441 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -19,9 +19,9 @@ #include <linux/extable.h> #include <linux/kdebug.h> #include <linux/slab.h> -#include <linux/moduleloader.h> #include <linux/set_memory.h> -#include <asm/code-patching.h> +#include <linux/execmem.h> +#include <asm/text-patching.h> #include <asm/cacheflush.h> #include <asm/sstep.h> #include <asm/sections.h> @@ -105,47 +105,25 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } -static bool arch_kprobe_on_func_entry(unsigned long offset) +static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset) { -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else + unsigned long ip = ftrace_location(addr); + + if (ip) + return offset <= (ip - addr); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return offset <= 8; return !offset; -#endif } /* XXX try and fold the magic of kprobe_lookup_name() in this */ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - *on_func_entry = arch_kprobe_on_func_entry(offset); + *on_func_entry = arch_kprobe_on_func_entry(addr, offset); return (kprobe_opcode_t *)(addr + offset); } -void *alloc_insn_page(void) -{ - void *page; - - page = module_alloc(PAGE_SIZE); - if (!page) - return NULL; - - if (strict_module_rwx_enabled()) { - int err = set_memory_rox((unsigned long)page, 1); - - if (err) - goto error; - } - return page; -error: - module_memfree(page); - return NULL; -} - int arch_prepare_kprobe(struct kprobe *p) { int ret = 0; @@ -248,16 +226,6 @@ static nokprobe_inline void set_current_kprobe(struct kprobe *p, struct pt_regs kcb->kprobe_saved_msr = regs->msr; } -void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->link; - ri->fp = NULL; - - /* Replace the return addr with trampoline addr */ - regs->link = (unsigned long)__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - static int try_to_emulate(struct kprobe *p, struct pt_regs *regs) { int ret; @@ -415,49 +383,6 @@ no_kprobe: NOKPROBE_SYMBOL(kprobe_handler); /* - * Function return probe trampoline: - * - init_kprobes() establishes a probepoint here - * - When the probed function returns, this probe - * causes the handlers to fire - */ -asm(".global __kretprobe_trampoline\n" - ".type __kretprobe_trampoline, @function\n" - "__kretprobe_trampoline:\n" - "nop\n" - "blr\n" - ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"); - -/* - * Called when the probe at kretprobe trampoline is hit - */ -static int trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) -{ - unsigned long orig_ret_address; - - orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); - /* - * We get here through one of two paths: - * 1. by taking a trap -> kprobe_handler() -> here - * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here - * - * When going back through (1), we need regs->nip to be setup properly - * as it is used to determine the return address from the trap. - * For (2), since nip is not honoured with optprobes, we instead setup - * the link register properly so that the subsequent 'blr' in - * __kretprobe_trampoline jumps back to the right instruction. - * - * For nip, we should set the address to the previous instruction since - * we end up emulating it in kprobe_handler(), which increments the nip - * again. - */ - regs_set_return_ip(regs, orig_ret_address - 4); - regs->link = orig_ret_address; - - return 0; -} -NOKPROBE_SYMBOL(trampoline_probe_handler); - -/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" * instruction. To avoid the SMP problems that can occur when we @@ -559,19 +484,9 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr) } NOKPROBE_SYMBOL(kprobe_fault_handler); -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &__kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - -int __init arch_init_kprobes(void) -{ - return register_kprobe(&trampoline_p); -} - int arch_trampoline_kprobe(struct kprobe *p) { - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return 1; return 0; diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 2eabb15687a6..acb727f54e9d 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -28,32 +28,6 @@ .text /* - * This returns the high 64 bits of the product of two 64-bit numbers. - */ -_GLOBAL(mulhdu) - cmpwi r6,0 - cmpwi cr1,r3,0 - mr r10,r4 - mulhwu r4,r4,r5 - beq 1f - mulhwu r0,r10,r6 - mullw r7,r10,r5 - addc r7,r0,r7 - addze r4,r4 -1: beqlr cr1 /* all done if high part of A is 0 */ - mullw r9,r3,r5 - mulhwu r10,r3,r5 - beq 2f - mullw r0,r3,r6 - mulhwu r8,r3,r6 - addc r7,r0,r7 - adde r4,r4,r8 - addze r10,r10 -2: addc r4,r4,r9 - addze r3,r10 - blr - -/* * reloc_got2 runs through the .got2 section adding an offset * to each entry. */ @@ -176,46 +150,6 @@ _GLOBAL(low_choose_7447a_dfs) #endif /* CONFIG_CPU_FREQ_PMAC && CONFIG_PPC_BOOK3S_32 */ -#ifdef CONFIG_40x - -/* - * Do an IO access in real mode - */ -_GLOBAL(real_readb) - mfmsr r7 - rlwinm r0,r7,0,~MSR_DR - sync - mtmsr r0 - sync - isync - lbz r3,0(r3) - sync - mtmsr r7 - sync - isync - blr -_ASM_NOKPROBE_SYMBOL(real_readb) - - /* - * Do an IO access in real mode - */ -_GLOBAL(real_writeb) - mfmsr r7 - rlwinm r0,r7,0,~MSR_DR - sync - mtmsr r0 - sync - isync - stb r3,0(r4) - sync - mtmsr r7 - sync - isync - blr -_ASM_NOKPROBE_SYMBOL(real_writeb) - -#endif /* CONFIG_40x */ - /* * Copy a whole page. We use the dcbz instruction on the destination * to reduce memory traffic (it eliminates the unnecessary reads of diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 1a8cdafd68e8..a997c7f43dc0 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -74,7 +74,7 @@ _GLOBAL(rmci_off) blr #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_PMAC /* * Do an IO access in real mode @@ -137,7 +137,7 @@ _GLOBAL(real_writeb) sync isync blr -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ +#endif // CONFIG_PPC_PMAC #ifdef CONFIG_PPC_PASEMI @@ -174,7 +174,7 @@ _GLOBAL(real_205_writeb) #endif /* CONFIG_PPC_PASEMI */ -#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE) +#ifdef CONFIG_CPU_FREQ_PMAC64 /* * SCOM access functions for 970 (FX only for now) * @@ -192,7 +192,7 @@ _GLOBAL(scom970_read) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd, * and finally or in RW bit */ @@ -226,7 +226,7 @@ _GLOBAL(scom970_write) xori r0,r0,MSR_EE mtmsrd r0,1 - /* rotate 24 bits SCOM address 8 bits left and mask out it's low 8 bits + /* rotate 24 bits SCOM address 8 bits left and mask out its low 8 bits * (including parity). On current CPUs they must be 0'd. */ @@ -243,7 +243,7 @@ _GLOBAL(scom970_write) /* restore interrupts */ mtmsrd r5,1 blr -#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ +#endif // CONFIG_CPU_FREQ_PMAC64 /* kexec_wait(phys_cpu) * diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index f6d6ae0a1692..baeb24c102c8 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -7,7 +7,6 @@ #include <linux/elf.h> #include <linux/moduleloader.h> #include <linux/err.h> -#include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/bug.h> #include <asm/module.h> @@ -17,8 +16,6 @@ #include <asm/setup.h> #include <asm/sections.h> -static LIST_HEAD(module_bug_list); - static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) @@ -88,40 +85,3 @@ int module_finalize(const Elf_Ehdr *hdr, return 0; } - -static __always_inline void * -__module_alloc(unsigned long size, unsigned long start, unsigned long end, bool nowarn) -{ - pgprot_t prot = strict_module_rwx_enabled() ? PAGE_KERNEL : PAGE_KERNEL_EXEC; - gfp_t gfp = GFP_KERNEL | (nowarn ? __GFP_NOWARN : 0); - - /* - * Don't do huge page allocations for modules yet until more testing - * is done. STRICT_MODULE_RWX may require extra work to support this - * too. - */ - return __vmalloc_node_range(size, 1, start, end, gfp, prot, - VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, __builtin_return_address(0)); -} - -void *module_alloc(unsigned long size) -{ -#ifdef MODULES_VADDR - unsigned long limit = (unsigned long)_etext - SZ_32M; - void *ptr = NULL; - - BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR); - - /* First try within 32M limit from _etext to avoid branch trampolines */ - if (MODULES_VADDR < PAGE_OFFSET && MODULES_END > limit) - ptr = __module_alloc(size, limit, MODULES_END, true); - - if (!ptr) - ptr = __module_alloc(size, MODULES_VADDR, MODULES_END, false); - - return ptr; -#else - return __module_alloc(size, VMALLOC_START, VMALLOC_END, false); -#endif -} diff --git a/arch/powerpc/kernel/module_32.c b/arch/powerpc/kernel/module_32.c index 816a63fd71fb..f930e3395a7f 100644 --- a/arch/powerpc/kernel/module_32.c +++ b/arch/powerpc/kernel/module_32.c @@ -18,7 +18,7 @@ #include <linux/bug.h> #include <linux/sort.h> #include <asm/setup.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> /* Count how many different relocations (different symbol, different addend) */ diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 7112adc597a8..34a5aec4908f 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -17,7 +17,7 @@ #include <linux/kernel.h> #include <asm/module.h> #include <asm/firmware.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <linux/sort.h> #include <asm/setup.h> #include <asm/sections.h> @@ -205,7 +205,9 @@ static int relacmp(const void *_x, const void *_y) /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs) + const Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) { /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; @@ -241,13 +243,25 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, } } -#ifdef CONFIG_DYNAMIC_FTRACE - /* make the trampoline to the ftrace_caller */ - relocs++; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - /* an additional one for ftrace_regs_caller */ - relocs++; -#endif + /* stubs for ftrace_caller and ftrace_regs_caller */ + relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* stubs for the function tracer */ + for (i = 1; i < hdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) { + me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long); + me->arch.ool_stub_index = 0; + relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / + sizeof(struct ppc64_stub_entry); + break; + } + } + if (i == hdr->e_shnum) { + pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); + return -ENOEXEC; + } #endif pr_debug("Looks like a total of %lu stubs, max\n", relocs); @@ -355,6 +369,24 @@ static void dedotify_versions(struct modversion_info *vers, } } +/* Same as normal versions, remove a leading dot if present. */ +static void dedotify_ext_version_names(char *str_seq, unsigned long size) +{ + unsigned long out = 0; + unsigned long in; + char last = '\0'; + + for (in = 0; in < size; in++) { + /* Skip one leading dot */ + if (last == '\0' && str_seq[in] == '.') + in++; + last = str_seq[in]; + str_seq[out++] = last; + } + /* Zero the trailing portion of the names table for robustness */ + memset(&str_seq[out], 0, size - out); +} + /* * Undefined symbols which refer to .funcname, hack to funcname. Make .TOC. * seem to be defined (value set later). @@ -424,10 +456,12 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, me->arch.toc_section = i; if (sechdrs[i].sh_addralign < 8) sechdrs[i].sh_addralign = 8; - } - else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0) + } else if (strcmp(secstrings + sechdrs[i].sh_name, "__versions") == 0) dedotify_versions((void *)hdr + sechdrs[i].sh_offset, sechdrs[i].sh_size); + else if (strcmp(secstrings + sechdrs[i].sh_name, "__version_ext_names") == 0) + dedotify_ext_version_names((void *)hdr + sechdrs[i].sh_offset, + sechdrs[i].sh_size); if (sechdrs[i].sh_type == SHT_SYMTAB) dedotify((void *)hdr + sechdrs[i].sh_offset, @@ -460,7 +494,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #endif /* Override the stubs size */ - sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me); return 0; } @@ -651,12 +685,11 @@ static inline int create_stub(const Elf64_Shdr *sechdrs, // func_desc_t is 8 bytes if ABIv2, else 16 bytes desc = func_desc(addr); for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) { - if (patch_instruction(((u32 *)&entry->funcdata) + i, - ppc_inst(((u32 *)(&desc))[i]))) + if (patch_u32(((u32 *)&entry->funcdata) + i, ((u32 *)&desc)[i])) return 0; } - if (patch_instruction(&entry->magic, ppc_inst(STUB_MAGIC))) + if (patch_u32(&entry->magic, STUB_MAGIC)) return 0; return 1; @@ -1086,6 +1119,37 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return 0; } +static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + unsigned int i, total_stubs, num_stubs; + struct ppc64_stub_entry *stub; + + total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub); + num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry); + + /* Find the next available entry */ + stub = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stub_func_addr(stub[i].funcdata); i++) + if (WARN_ON(i >= total_stubs)) + return -1; + + if (WARN_ON(i + num_stubs > total_stubs)) + return -1; + + stub += i; + me->arch.ool_stubs = (struct ftrace_ool_stub *)stub; + + /* reserve stubs */ + for (i = 0; i < num_stubs; i++) + if (patch_u32((void *)&stub->funcdata, PPC_RAW_NOP())) + return -1; +#endif + + return 0; +} + int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.tramp = stub_for_addr(sechdrs, @@ -1104,6 +1168,9 @@ int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) if (!mod->arch.tramp) return -ENOENT; + if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod)) + return -ENOENT; + return 0; } #endif diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index e385d3164648..f9c6568a9137 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -73,7 +73,7 @@ static const char *nvram_os_partitions[] = { }; static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason); + struct kmsg_dump_detail *detail); static struct kmsg_dumper nvram_kmsg_dumper = { .dump = oops_to_nvram @@ -643,7 +643,7 @@ void __init nvram_init_oops_partition(int rtas_partition_exists) * partition. If that's too much, go back and capture uncompressed text. */ static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; @@ -655,7 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; int rc = -1; - switch (reason) { + switch (detail->reason) { case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; @@ -671,7 +671,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, break; default: pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", - __func__, (int) reason); + __func__, (int) detail->reason); return; } diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c deleted file mode 100644 index adc76fa58d1e..000000000000 --- a/arch/powerpc/kernel/of_platform.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corp. - * <benh@kernel.crashing.org> - * and Arnd Bergmann, IBM Corp. - */ - -#undef DEBUG - -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/export.h> -#include <linux/mod_devicetable.h> -#include <linux/pci.h> -#include <linux/platform_device.h> -#include <linux/atomic.h> - -#include <asm/errno.h> -#include <asm/topology.h> -#include <asm/pci-bridge.h> -#include <asm/ppc-pci.h> -#include <asm/eeh.h> - -#ifdef CONFIG_PPC_OF_PLATFORM_PCI - -/* The probing of PCI controllers from of_platform is currently - * 64 bits only, mostly due to gratuitous differences between - * the 32 and 64 bits PCI code on PowerPC and the 32 bits one - * lacking some bits needed here. - */ - -static int of_pci_phb_probe(struct platform_device *dev) -{ - struct pci_controller *phb; - - /* Check if we can do that ... */ - if (ppc_md.pci_setup_phb == NULL) - return -ENODEV; - - pr_info("Setting up PCI bus %pOF\n", dev->dev.of_node); - - /* Alloc and setup PHB data structure */ - phb = pcibios_alloc_controller(dev->dev.of_node); - if (!phb) - return -ENODEV; - - /* Setup parent in sysfs */ - phb->parent = &dev->dev; - - /* Setup the PHB using arch provided callback */ - if (ppc_md.pci_setup_phb(phb)) { - pcibios_free_controller(phb); - return -ENODEV; - } - - /* Process "ranges" property */ - pci_process_bridge_OF_ranges(phb, dev->dev.of_node, 0); - - /* Init pci_dn data structures */ - pci_devs_phb_init_dynamic(phb); - - /* Create EEH PE for the PHB */ - eeh_phb_pe_create(phb); - - /* Scan the bus */ - pcibios_scan_phb(phb); - if (phb->bus == NULL) - return -ENXIO; - - /* Claim resources. This might need some rework as well depending - * whether we are doing probe-only or not, like assigning unassigned - * resources etc... - */ - pcibios_claim_one_bus(phb->bus); - - /* Add probed PCI devices to the device model */ - pci_bus_add_devices(phb->bus); - - return 0; -} - -static const struct of_device_id of_pci_phb_ids[] = { - { .type = "pci", }, - { .type = "pcix", }, - { .type = "pcie", }, - { .type = "pciex", }, - { .type = "ht", }, - {} -}; - -static struct platform_driver of_pci_phb_driver = { - .probe = of_pci_phb_probe, - .driver = { - .name = "of-pci", - .of_match_table = of_pci_phb_ids, - }, -}; - -builtin_platform_driver(of_pci_phb_driver); - -#endif /* CONFIG_PPC_OF_PLATFORM_PCI */ diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c index 004fae2044a3..2e83702bf9ba 100644 --- a/arch/powerpc/kernel/optprobes.c +++ b/arch/powerpc/kernel/optprobes.c @@ -13,7 +13,7 @@ #include <asm/kprobes.h> #include <asm/ptrace.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/sstep.h> #include <asm/ppc-opcode.h> #include <asm/inst.h> @@ -56,7 +56,7 @@ static unsigned long can_optimize(struct kprobe *p) * has a 'nop' instruction, which can be emulated. * So further checks can be skipped. */ - if (p->addr == (kprobe_opcode_t *)&__kretprobe_trampoline) + if (p->addr == (kprobe_opcode_t *)&arch_rethook_trampoline) return addr + sizeof(kprobe_opcode_t); /* diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index d95a48eff412..eac84d687b53 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -517,7 +517,7 @@ int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma) } /* - * This one is used by /dev/mem and fbdev who have no clue about the + * This one is used by /dev/mem and video who have no clue about the * PCI device, it tries to find the PCI device first and calls the * above routine */ diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c index 0fe251c6ac2c..9ea74973d78d 100644 --- a/arch/powerpc/kernel/pci-hotplug.c +++ b/arch/powerpc/kernel/pci-hotplug.c @@ -93,6 +93,36 @@ void pci_hp_remove_devices(struct pci_bus *bus) } EXPORT_SYMBOL_GPL(pci_hp_remove_devices); +static void traverse_siblings_and_scan_slot(struct device_node *start, struct pci_bus *bus) +{ + struct device_node *dn; + int slotno; + + u32 class = 0; + + if (!of_property_read_u32(start->child, "class-code", &class)) { + /* Call of pci_scan_slot for non-bridge/EP case */ + if (!((class >> 8) == PCI_CLASS_BRIDGE_PCI)) { + slotno = PCI_SLOT(PCI_DN(start->child)->devfn); + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + return; + } + } + + /* Iterate all siblings */ + for_each_child_of_node(start, dn) { + class = 0; + + if (!of_property_read_u32(start->child, "class-code", &class)) { + /* Call of pci_scan_slot on each sibling-nodes/bridge-ports */ + if ((class >> 8) == PCI_CLASS_BRIDGE_PCI) { + slotno = PCI_SLOT(PCI_DN(dn)->devfn); + pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + } + } + } +} + /** * pci_hp_add_devices - adds new pci devices to bus * @bus: the indicated PCI bus @@ -106,7 +136,7 @@ EXPORT_SYMBOL_GPL(pci_hp_remove_devices); */ void pci_hp_add_devices(struct pci_bus *bus) { - int slotno, mode, max; + int mode, max; struct pci_dev *dev; struct pci_controller *phb; struct device_node *dn = pci_bus_to_OF_node(bus); @@ -129,8 +159,7 @@ void pci_hp_add_devices(struct pci_bus *bus) * order for fully rescan all the way down to pick them up. * They can have been removed during partial hotplug. */ - slotno = PCI_SLOT(PCI_DN(dn->child)->devfn); - pci_scan_slot(bus, PCI_DEVFN(slotno, 0)); + traverse_siblings_and_scan_slot(dn, bus); max = bus->busn_res.start; /* * Scan bridges that are already configured. We don't touch diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c index ce0c8623e563..f8a3bd8cfae4 100644 --- a/arch/powerpc/kernel/pci_32.c +++ b/arch/powerpc/kernel/pci_32.c @@ -213,11 +213,8 @@ pci_create_OF_bus_map(void) struct property* of_prop; struct device_node *dn; - of_prop = memblock_alloc(sizeof(struct property) + 256, + of_prop = memblock_alloc_or_panic(sizeof(struct property) + 256, SMP_CACHE_BYTES); - if (!of_prop) - panic("%s: Failed to allocate %zu bytes\n", __func__, - sizeof(struct property) + 256); dn = of_find_node_by_path("/"); if (dn) { memset(of_prop, -1, sizeof(struct property) + 256); diff --git a/arch/powerpc/kernel/proc_powerpc.c b/arch/powerpc/kernel/proc_powerpc.c index b109cd7b5d01..3816a2bf2b84 100644 --- a/arch/powerpc/kernel/proc_powerpc.c +++ b/arch/powerpc/kernel/proc_powerpc.c @@ -4,6 +4,7 @@ */ #include <linux/init.h> +#include <linux/memblock.h> #include <linux/mm.h> #include <linux/proc_fs.h> #include <linux/kernel.h> @@ -12,9 +13,10 @@ #include <asm/machdep.h> #include <asm/vdso_datapage.h> #include <asm/rtas.h> +#include <asm/systemcfg.h> #include <linux/uaccess.h> -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG static loff_t page_map_seek(struct file *file, loff_t off, int whence) { @@ -33,10 +35,9 @@ static int page_map_mmap( struct file *file, struct vm_area_struct *vma ) if ((vma->vm_end - vma->vm_start) > PAGE_SIZE) return -EINVAL; - remap_pfn_range(vma, vma->vm_start, - __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, - PAGE_SIZE, vma->vm_page_prot); - return 0; + return remap_pfn_range(vma, vma->vm_start, + __pa(pde_data(file_inode(file))) >> PAGE_SHIFT, + PAGE_SIZE, vma->vm_page_prot); } static const struct proc_ops page_map_proc_ops = { @@ -45,13 +46,35 @@ static const struct proc_ops page_map_proc_ops = { .proc_mmap = page_map_mmap, }; +static union { + struct systemcfg data; + u8 page[PAGE_SIZE]; +} systemcfg_data_store __page_aligned_data; +struct systemcfg *systemcfg = &systemcfg_data_store.data; static int __init proc_ppc64_init(void) { struct proc_dir_entry *pde; + strcpy((char *)systemcfg->eye_catcher, "SYSTEMCFG:PPC64"); + systemcfg->version.major = SYSTEMCFG_MAJOR; + systemcfg->version.minor = SYSTEMCFG_MINOR; + systemcfg->processor = mfspr(SPRN_PVR); + /* + * Fake the old platform number for pSeries and add + * in LPAR bit if necessary + */ + systemcfg->platform = 0x100; + if (firmware_has_feature(FW_FEATURE_LPAR)) + systemcfg->platform |= 1; + systemcfg->physicalMemorySize = memblock_phys_mem_size(); + systemcfg->dcache_size = ppc64_caches.l1d.size; + systemcfg->dcache_line_size = ppc64_caches.l1d.line_size; + systemcfg->icache_size = ppc64_caches.l1i.size; + systemcfg->icache_line_size = ppc64_caches.l1i.line_size; + pde = proc_create_data("powerpc/systemcfg", S_IFREG | 0444, NULL, - &page_map_proc_ops, vdso_data); + &page_map_proc_ops, systemcfg); if (!pde) return 1; proc_set_size(pde, PAGE_SIZE); @@ -60,7 +83,7 @@ static int __init proc_ppc64_init(void) } __initcall(proc_ppc64_init); -#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_PPC64_PROC_SYSTEMCFG */ /* * Create the ppc64 and ppc64/rtas directories early. This allows us to diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9452a54d356c..ef91f71e07c4 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -54,7 +54,7 @@ #include <asm/firmware.h> #include <asm/hw_irq.h> #endif -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/exec.h> #include <asm/livepatch.h> #include <asm/cpu_has_feature.h> @@ -72,8 +72,6 @@ #define TM_DEBUG(x...) do { } while(0) #endif -extern unsigned long _get_SP(void); - #ifdef CONFIG_PPC_TRANSACTIONAL_MEM /* * Are we running in "Suspend disabled" mode? If so we have to block any @@ -1185,6 +1183,9 @@ static inline void save_sprs(struct thread_struct *t) if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) t->hashkeyr = mfspr(SPRN_HASHKEYR); + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + t->dexcr = mfspr(SPRN_DEXCR); #endif } @@ -1267,6 +1268,10 @@ static inline void restore_sprs(struct thread_struct *old_thread, if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && old_thread->hashkeyr != new_thread->hashkeyr) mtspr(SPRN_HASHKEYR, new_thread->hashkeyr); + + if (cpu_has_feature(CPU_FTR_ARCH_31) && + old_thread->dexcr != new_thread->dexcr) + mtspr(SPRN_DEXCR, new_thread->dexcr); #endif } @@ -1566,7 +1571,7 @@ static void __show_regs(struct pt_regs *regs) if (trap == INTERRUPT_MACHINE_CHECK || trap == INTERRUPT_DATA_STORAGE || trap == INTERRUPT_ALIGNMENT) { - if (IS_ENABLED(CONFIG_4xx) || IS_ENABLED(CONFIG_BOOKE)) + if (IS_ENABLED(CONFIG_BOOKE)) pr_cont("DEAR: "REG" ESR: "REG" ", regs->dear, regs->esr); else pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr); @@ -1634,6 +1639,13 @@ void arch_setup_new_exec(void) current->thread.regs->amr = default_amr; current->thread.regs->iamr = default_iamr; #endif + +#ifdef CONFIG_PPC_BOOK3S_64 + if (cpu_has_feature(CPU_FTR_ARCH_31)) { + current->thread.dexcr = current->thread.dexcr_onexec; + mtspr(SPRN_DEXCR, current->thread.dexcr); + } +#endif /* CONFIG_PPC_BOOK3S_64 */ } #ifdef CONFIG_PPC64 @@ -1647,7 +1659,7 @@ void arch_setup_new_exec(void) * cases will happen: * * 1. The correct thread is running, the wrong thread is not - * In this situation, the correct thread is woken and proceeds to pass it's + * In this situation, the correct thread is woken and proceeds to pass its * condition check. * * 2. Neither threads are running @@ -1657,15 +1669,15 @@ void arch_setup_new_exec(void) * for the wrong thread, or they will execute the condition check immediately. * * 3. The wrong thread is running, the correct thread is not - * The wrong thread will be woken, but will fail it's condition check and + * The wrong thread will be woken, but will fail its condition check and * re-execute wait. The correct thread, when scheduled, will execute either - * it's condition check (which will pass), or wait, which returns immediately - * when called the first time after the thread is scheduled, followed by it's + * its condition check (which will pass), or wait, which returns immediately + * when called the first time after the thread is scheduled, followed by its * condition check (which will pass). * * 4. Both threads are running - * Both threads will be woken. The wrong thread will fail it's condition check - * and execute another wait, while the correct thread will pass it's condition + * Both threads will be woken. The wrong thread will fail its condition check + * and execute another wait, while the correct thread will pass its condition * check. * * @t: the task to set the thread ID for @@ -1861,7 +1873,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) #if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP) p->thread.kuap = KUAP_NONE; #endif -#if defined(CONFIG_BOOKE_OR_40x) && defined(CONFIG_PPC_KUAP) +#if defined(CONFIG_BOOKE) && defined(CONFIG_PPC_KUAP) p->thread.pid = MMU_NO_CONTEXT; #endif @@ -1878,6 +1890,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE)) p->thread.hashkeyr = current->thread.hashkeyr; + + if (cpu_has_feature(CPU_FTR_ARCH_31)) + p->thread.dexcr = mfspr(SPRN_DEXCR); #endif return 0; } @@ -1945,8 +1960,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp) * address of _start and the second entry is the TOC * value we need to use. */ - __get_user(entry, (unsigned long __user *)start); - __get_user(toc, (unsigned long __user *)start+1); + get_user(entry, (unsigned long __user *)start); + get_user(toc, (unsigned long __user *)start+1); /* Check whether the e_entry function descriptor entries * need to be relocated before we can use them. @@ -2160,10 +2175,10 @@ static inline int valid_irq_stack(unsigned long sp, struct task_struct *p, return 0; } +#ifdef CONFIG_PPC64 static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, unsigned long nbytes) { -#ifdef CONFIG_PPC64 unsigned long stack_page; unsigned long cpu = task_cpu(p); @@ -2191,10 +2206,26 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) return 1; # endif -#endif return 0; } +#else +static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page; + unsigned long cpu = task_cpu(p); + + if (!IS_ENABLED(CONFIG_VMAP_STACK)) + return 0; + + stack_page = (unsigned long)emergency_ctx[cpu] - THREAD_SIZE; + if (sp >= stack_page && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + return 0; +} +#endif /* * validate the stack frame of a particular minimum size, used for when we are diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index cd8d8883de90..9ed9dde7d231 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -331,6 +331,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, void *data) { const char *type = of_get_flat_dt_prop(node, "device_type", NULL); + const __be32 *cpu_version = NULL; const __be32 *prop; const __be32 *intserv; int i, nthreads; @@ -420,7 +421,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, prop = of_get_flat_dt_prop(node, "cpu-version", NULL); if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) { identify_cpu(0, be32_to_cpup(prop)); - seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(prop)); + cpu_version = prop; } check_cpu_feature_properties(node); @@ -431,6 +432,12 @@ static int __init early_init_dt_scan_cpus(unsigned long node, } identical_pvr_fixup(node); + + // We can now add the CPU name & PVR to the hardware description + seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); + if (cpu_version) + seq_buf_printf(&ppc_hw_desc, "0x%04x ", be32_to_cpup(cpu_version)); + init_mmu_slb_size(node); #ifdef CONFIG_PPC64 @@ -779,12 +786,12 @@ static inline void save_fscr_to_task(void) {} void __init early_init_devtree(void *params) { - phys_addr_t limit; + phys_addr_t int_vector_size; DBG(" -> early_init_devtree(%px)\n", params); /* Too early to BUG_ON(), do it by hand */ - if (!early_init_dt_verify(params)) + if (!early_init_dt_verify(params, __pa(params))) panic("BUG: Failed verifying flat device tree, bad version?"); of_scan_flat_dt(early_init_dt_scan_model, NULL); @@ -813,6 +820,9 @@ void __init early_init_devtree(void *params) */ of_scan_flat_dt(early_init_dt_scan_chosen_ppc, boot_command_line); + /* Append additional parameters passed for fadump capture kernel */ + fadump_append_bootargs(); + /* Scan memory nodes and rebuild MEMBLOCKs */ early_init_dt_scan_root(); early_init_dt_scan_memory_ppc(); @@ -832,9 +842,16 @@ void __init early_init_devtree(void *params) setup_initial_memory_limit(memstart_addr, first_memblock_size); /* Reserve MEMBLOCK regions used by kernel, initrd, dt, etc... */ memblock_reserve(PHYSICAL_START, __pa(_end) - PHYSICAL_START); +#ifdef CONFIG_PPC64 + /* If relocatable, reserve at least 32k for interrupt vectors etc. */ + int_vector_size = __end_interrupts - _stext; + int_vector_size = max_t(phys_addr_t, SZ_32K, int_vector_size); +#else /* If relocatable, reserve first 32k for interrupt vectors etc. */ + int_vector_size = SZ_32K; +#endif if (PHYSICAL_START > MEMORY_START) - memblock_reserve(MEMORY_START, 0x8000); + memblock_reserve(MEMORY_START, int_vector_size); reserve_kdump_trampoline(); #if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP) /* @@ -843,12 +860,15 @@ void __init early_init_devtree(void *params) */ if (fadump_reserve_mem() == 0) #endif - reserve_crashkernel(); + arch_reserve_crashkernel(); early_reserve_mem(); - /* Ensure that total memory size is page-aligned. */ - limit = ALIGN(memory_limit ?: memblock_phys_mem_size(), PAGE_SIZE); - memblock_enforce_memory_limit(limit); + if (memory_limit > memblock_phys_mem_size()) + memory_limit = 0; + + /* Align down to 16 MB which is large page size with hash page translation */ + memory_limit = ALIGN_DOWN(memory_limit ?: memblock_phys_mem_size(), SZ_16M); + memblock_enforce_memory_limit(memory_limit); #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_4K_PAGES) if (!early_radix_enabled()) @@ -868,9 +888,6 @@ void __init early_init_devtree(void *params) dt_cpu_ftrs_scan(); - // We can now add the CPU name & PVR to the hardware description - seq_buf_printf(&ppc_hw_desc, "%s 0x%04lx ", cur_cpu_spec->cpu_name, mfspr(SPRN_PVR)); - /* Retrieve CPU related informations from the flat tree * (altivec support, boot CPU ID, ...) */ @@ -891,6 +908,9 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + /* Setup param area for passing additional parameters to fadump capture kernel. */ + fadump_setup_param_area(); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 0ef358285337..827c958677f8 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -817,8 +817,8 @@ static void __init early_cmdline_parse(void) opt += 4; prom_memory_limit = prom_memparse(opt, (const char **)&opt); #ifdef CONFIG_PPC64 - /* Align to 16 MB == size of ppc64 large page */ - prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); + /* Align down to 16 MB which is large page size with hash page translation */ + prom_memory_limit = ALIGN_DOWN(prom_memory_limit, SZ_16M); #endif } @@ -1061,7 +1061,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = { .virt_base = cpu_to_be32(0xffffffff), .virt_size = cpu_to_be32(0xffffffff), .load_base = cpu_to_be32(0xffffffff), - .min_rma = cpu_to_be32(512), /* 512MB min RMA */ + .min_rma = cpu_to_be32(MIN_RMA), .min_load = cpu_to_be32(0xffffffff), /* full client load */ .min_rma_percent = 0, /* min RMA percentage of total RAM */ .max_pft_size = 48, /* max log_2(hash table size) */ @@ -2792,91 +2792,6 @@ static void __init flatten_device_tree(void) dt_struct_start, dt_struct_end); } -#ifdef CONFIG_PPC_MAPLE -/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property. - * The values are bad, and it doesn't even have the right number of cells. */ -static void __init fixup_device_tree_maple(void) -{ - phandle isa; - u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ - u32 isa_ranges[6]; - char *name; - - name = "/ht@0/isa@4"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(isa)) { - name = "/ht@0/isa@6"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - rloc = 0x01003000; /* IO space; PCI device = 6 */ - } - if (!PHANDLE_VALID(isa)) - return; - - if (prom_getproplen(isa, "ranges") != 12) - return; - if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) - == PROM_ERROR) - return; - - if (isa_ranges[0] != 0x1 || - isa_ranges[1] != 0xf4000000 || - isa_ranges[2] != 0x00010000) - return; - - prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); - - isa_ranges[0] = 0x1; - isa_ranges[1] = 0x0; - isa_ranges[2] = rloc; - isa_ranges[3] = 0x0; - isa_ranges[4] = 0x0; - isa_ranges[5] = 0x00010000; - prom_setprop(isa, name, "ranges", - isa_ranges, sizeof(isa_ranges)); -} - -#define CPC925_MC_START 0xf8000000 -#define CPC925_MC_LENGTH 0x1000000 -/* The values for memory-controller don't have right number of cells */ -static void __init fixup_device_tree_maple_memory_controller(void) -{ - phandle mc; - u32 mc_reg[4]; - char *name = "/hostbridge@f8000000"; - u32 ac, sc; - - mc = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(mc)) - return; - - if (prom_getproplen(mc, "reg") != 8) - return; - - prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac)); - prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc)); - if ((ac != 2) || (sc != 2)) - return; - - if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR) - return; - - if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH) - return; - - prom_printf("Fixing up bogus hostbridge on Maple...\n"); - - mc_reg[0] = 0x0; - mc_reg[1] = CPC925_MC_START; - mc_reg[2] = 0x0; - mc_reg[3] = CPC925_MC_LENGTH; - prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg)); -} -#else -#define fixup_device_tree_maple() -#define fixup_device_tree_maple_memory_controller() -#endif - -#ifdef CONFIG_PPC_CHRP /* * Pegasos and BriQ lacks the "ranges" property in the isa node * Pegasos needs decimal IRQ 14/15, not hexadecimal @@ -2927,12 +2842,8 @@ static void __init fixup_device_tree_chrp(void) } } } -#else -#define fixup_device_tree_chrp() -#endif -#if defined(CONFIG_PPC64) && defined(CONFIG_PPC_PMAC) -static void __init fixup_device_tree_pmac(void) +static void __init fixup_device_tree_pmac64(void) { phandle u3, i2c, mpic; u32 u3_rev; @@ -2971,11 +2882,27 @@ static void __init fixup_device_tree_pmac(void) prom_setprop(i2c, "/u3@0,f8000000/i2c@f8001000", "interrupt-parent", &parent, sizeof(parent)); } -#else -#define fixup_device_tree_pmac() -#endif -#ifdef CONFIG_PPC_EFIKA +static void __init fixup_device_tree_pmac(void) +{ + __be32 val = 1; + char type[8]; + phandle node; + + // Some pmacs are missing #size-cells on escc or i2s nodes + for (node = 0; prom_next_node(&node); ) { + type[0] = '\0'; + prom_getprop(node, "device_type", type, sizeof(type)); + if (prom_strcmp(type, "escc") && prom_strcmp(type, "i2s")) + continue; + + if (prom_getproplen(node, "#size-cells") != PROM_ERROR) + continue; + + prom_setprop(node, NULL, "#size-cells", &val, sizeof(val)); + } +} + /* * The MPC5200 FEC driver requires an phy-handle property to tell it how * to talk to the phy. If the phy-handle property is missing, then this @@ -3107,11 +3034,7 @@ static void __init fixup_device_tree_efika(void) /* Make sure ethernet phy-handle property exists */ fixup_device_tree_efika_add_phy(); } -#else -#define fixup_device_tree_efika() -#endif -#ifdef CONFIG_PPC_PASEMI_NEMO /* * CFE supplied on Nemo is broken in several ways, biggest * problem is that it reassigns ISA interrupts to unused mpic ints. @@ -3187,18 +3110,23 @@ static void __init fixup_device_tree_pasemi(void) prom_setprop(iob, name, "device_type", "isa", sizeof("isa")); } -#else /* !CONFIG_PPC_PASEMI_NEMO */ -static inline void fixup_device_tree_pasemi(void) { } -#endif static void __init fixup_device_tree(void) { - fixup_device_tree_maple(); - fixup_device_tree_maple_memory_controller(); - fixup_device_tree_chrp(); - fixup_device_tree_pmac(); - fixup_device_tree_efika(); - fixup_device_tree_pasemi(); + if (IS_ENABLED(CONFIG_PPC_CHRP)) + fixup_device_tree_chrp(); + + if (IS_ENABLED(CONFIG_PPC_PMAC)) + fixup_device_tree_pmac(); + + if (IS_ENABLED(CONFIG_PPC_PMAC) && IS_ENABLED(CONFIG_PPC64)) + fixup_device_tree_pmac64(); + + if (IS_ENABLED(CONFIG_PPC_EFIKA)) + fixup_device_tree_efika(); + + if (IS_ENABLED(CONFIG_PPC_PASEMI_NEMO)) + fixup_device_tree_pasemi(); } static void __init prom_find_boot_cpu(void) diff --git a/arch/powerpc/kernel/ptrace/ptrace-tm.c b/arch/powerpc/kernel/ptrace/ptrace-tm.c index 210ea834e603..447bff87fd21 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-tm.c +++ b/arch/powerpc/kernel/ptrace/ptrace-tm.c @@ -12,7 +12,7 @@ void flush_tmregs_to_thread(struct task_struct *tsk) { /* * If task is not current, it will have been flushed already to - * it's thread_struct during __switch_to(). + * its thread_struct during __switch_to(). * * A reclaim flushes ALL the state or if not in TM save TM SPRs * in the appropriate thread structures from live. diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 584cf5c3df50..c1819e0a6684 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -469,12 +469,7 @@ static int dexcr_get(struct task_struct *target, const struct user_regset *regse if (!cpu_has_feature(CPU_FTR_ARCH_31)) return -ENODEV; - /* - * The DEXCR is currently static across all CPUs, so we don't - * store the target's value anywhere, but the static value - * will also be correct. - */ - membuf_store(&to, (u64)lower_32_bits(DEXCR_INIT)); + membuf_store(&to, (u64)lower_32_bits(target->thread.dexcr)); /* * Technically the HDEXCR is per-cpu, but a hypervisor can't reasonably diff --git a/arch/powerpc/kernel/ptrace/ptrace.c b/arch/powerpc/kernel/ptrace/ptrace.c index 727ed4a14545..c6997df63287 100644 --- a/arch/powerpc/kernel/ptrace/ptrace.c +++ b/arch/powerpc/kernel/ptrace/ptrace.c @@ -215,7 +215,7 @@ static int do_seccomp(struct pt_regs *regs) * have already loaded -ENOSYS into r3, or seccomp has put * something else in r3 (via SECCOMP_RET_ERRNO/TRACE). */ - if (__secure_computing(NULL)) + if (__secure_computing()) return -1; /* diff --git a/arch/powerpc/kernel/rethook.c b/arch/powerpc/kernel/rethook.c new file mode 100644 index 000000000000..5f5f47ae82cf --- /dev/null +++ b/arch/powerpc/kernel/rethook.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PowerPC implementation of rethook. This depends on kprobes. + */ + +#include <linux/kprobes.h> +#include <linux/rethook.h> + +/* + * Function return trampoline: + * - init_kprobes() establishes a probepoint here + * - When the probed function returns, this probe + * causes the handlers to fire + */ +asm(".global arch_rethook_trampoline\n" + ".type arch_rethook_trampoline, @function\n" + "arch_rethook_trampoline:\n" + "nop\n" + "blr\n" + ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n"); + +/* + * Called when the probe at kretprobe trampoline is hit + */ +static int trampoline_rethook_handler(struct kprobe *p, struct pt_regs *regs) +{ + return !rethook_trampoline_handler(regs, regs->gpr[1]); +} +NOKPROBE_SYMBOL(trampoline_rethook_handler); + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + rh->ret_addr = regs->link; + rh->frame = regs->gpr[1]; + + /* Replace the return addr with trampoline addr */ + regs->link = (unsigned long)arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); + +/* This is called from rethook_trampoline_handler(). */ +void arch_rethook_fixup_return(struct pt_regs *regs, unsigned long orig_ret_address) +{ + /* + * We get here through one of two paths: + * 1. by taking a trap -> kprobe_handler() -> here + * 2. by optprobe branch -> optimized_callback() -> opt_pre_handler() -> here + * + * When going back through (1), we need regs->nip to be setup properly + * as it is used to determine the return address from the trap. + * For (2), since nip is not honoured with optprobes, we instead setup + * the link register properly so that the subsequent 'blr' in + * arch_rethook_trampoline jumps back to the right instruction. + * + * For nip, we should set the address to the previous instruction since + * we end up emulating it in kprobe_handler(), which increments the nip + * again. + */ + regs_set_return_ip(regs, orig_ret_address - 4); + regs->link = orig_ret_address; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +static struct kprobe trampoline_p = { + .addr = (kprobe_opcode_t *) &arch_rethook_trampoline, + .pre_handler = trampoline_rethook_handler +}; + +/* rethook initializer */ +int __init arch_init_kprobes(void) +{ + return register_kprobe(&trampoline_p); +} diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8064d9c3de86..d7a738f1858d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -19,6 +19,7 @@ #include <linux/lockdep.h> #include <linux/memblock.h> #include <linux/mutex.h> +#include <linux/nospec.h> #include <linux/of.h> #include <linux/of_fdt.h> #include <linux/reboot.h> @@ -797,66 +798,6 @@ void __init udbg_init_rtas_panel(void) udbg_putc = call_rtas_display_status_delay; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - -/* If you think you're dying before early_init_dt_scan_rtas() does its - * work, you can hard code the token values for your firmware here and - * hardcode rtas.base/entry etc. - */ -static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE; -static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE; - -static void udbg_rtascon_putc(char c) -{ - int tries; - - if (!rtas.base) - return; - - /* Add CRs before LFs */ - if (c == '\n') - udbg_rtascon_putc('\r'); - - /* if there is more than one character to be displayed, wait a bit */ - for (tries = 0; tries < 16; tries++) { - if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0) - break; - udelay(1000); - } -} - -static int udbg_rtascon_getc_poll(void) -{ - int c; - - if (!rtas.base) - return -1; - - if (rtas_call(rtas_getchar_token, 0, 2, &c)) - return -1; - - return c; -} - -static int udbg_rtascon_getc(void) -{ - int c; - - while ((c = udbg_rtascon_getc_poll()) == -1) - ; - - return c; -} - - -void __init udbg_init_rtas_console(void) -{ - udbg_putc = udbg_rtascon_putc; - udbg_getc = udbg_rtascon_getc; - udbg_getc_poll = udbg_rtascon_getc_poll; -} -#endif /* CONFIG_UDBG_RTAS_CONSOLE */ - void rtas_progress(char *s, unsigned short hex) { struct device_node *root; @@ -1389,21 +1330,14 @@ bool __ref rtas_busy_delay(int status) */ ms = clamp(ms, 1U, 1000U); /* - * The delay hint is an order-of-magnitude suggestion, not - * a minimum. It is fine, possibly even advantageous, for - * us to pause for less time than hinted. For small values, - * use usleep_range() to ensure we don't sleep much longer - * than actually needed. - * - * See Documentation/timers/timers-howto.rst for - * explanation of the threshold used here. In effect we use - * usleep_range() for 9900 and 9901, msleep() for - * 9902-9905. + * The delay hint is an order-of-magnitude suggestion, not a + * minimum. It is fine, possibly even advantageous, for us to + * pause for less time than hinted. To make sure pause time will + * not be way longer than requested independent of HZ + * configuration, use fsleep(). See fsleep() for details of + * used sleeping functions. */ - if (ms <= 20) - usleep_range(ms * 100, ms * 1000); - else - msleep(ms); + fsleep(ms * 1000); break; case RTAS_BUSY: ret = true; @@ -1916,6 +1850,9 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs) || nargs + nret > ARRAY_SIZE(args.args)) return -EINVAL; + nargs = array_index_nospec(nargs, ARRAY_SIZE(args.args)); + nret = array_index_nospec(nret, ARRAY_SIZE(args.args) - nargs); + /* Copy in args. */ if (copy_from_user(args.args, uargs->args, nargs * sizeof(rtas_arg_t)) != 0) @@ -2138,21 +2075,6 @@ int __init early_init_dt_scan_rtas(unsigned long node, rtas.size = *sizep; } -#ifdef CONFIG_UDBG_RTAS_CONSOLE - basep = of_get_flat_dt_prop(node, "put-term-char", NULL); - if (basep) - rtas_putchar_token = *basep; - - basep = of_get_flat_dt_prop(node, "get-term-char", NULL); - if (basep) - rtas_getchar_token = *basep; - - if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE && - rtas_getchar_token != RTAS_UNKNOWN_SERVICE) - udbg_init_rtas_console(); - -#endif - /* break now */ return 1; } diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 359577ec1680..5407024881e5 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -773,4 +773,5 @@ static void __exit rtas_flash_cleanup(void) module_init(rtas_flash_init); module_exit(rtas_flash_cleanup); +MODULE_DESCRIPTION("PPC procfs firmware flash interface"); MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 9e0efb657f39..3a28795b4ed8 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -5,6 +5,7 @@ */ #include <linux/types.h> #include <linux/of.h> +#include <linux/string_choices.h> #include <asm/secure_boot.h> static struct device_node *get_ppc_fw_sb_node(void) @@ -38,7 +39,7 @@ bool is_ppc_secureboot_enabled(void) of_node_put(node); out: - pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Secure boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } @@ -62,7 +63,7 @@ bool is_ppc_trustedboot_enabled(void) of_node_put(node); out: - pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Trusted boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 4856e1a5161c..fbb7ebd8aa08 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -14,7 +14,7 @@ #include <linux/debugfs.h> #include <asm/asm-prototypes.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/security_features.h> #include <asm/sections.h> #include <asm/setup.h> diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c index eb3c053f323f..afb690a172b4 100644 --- a/arch/powerpc/kernel/secvar-sysfs.c +++ b/arch/powerpc/kernel/secvar-sysfs.c @@ -52,7 +52,7 @@ static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr, } static ssize_t data_read(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { char *data; @@ -85,7 +85,7 @@ data_fail: } static ssize_t update_write(struct file *filep, struct kobject *kobj, - struct bin_attribute *attr, char *buf, loff_t off, + const struct bin_attribute *attr, char *buf, loff_t off, size_t count) { int rc; @@ -104,11 +104,11 @@ static struct kobj_attribute format_attr = __ATTR_RO(format); static struct kobj_attribute size_attr = __ATTR_RO(size); -static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0); +static struct bin_attribute data_attr __ro_after_init = __BIN_ATTR_RO(data, 0); -static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0); +static struct bin_attribute update_attr __ro_after_init = __BIN_ATTR_WO(update, 0); -static struct bin_attribute *secvar_bin_attrs[] = { +static const struct bin_attribute *const secvar_bin_attrs[] = { &data_attr, &update_attr, NULL, @@ -121,16 +121,16 @@ static struct attribute *secvar_attrs[] = { static const struct attribute_group secvar_attr_group = { .attrs = secvar_attrs, - .bin_attrs = secvar_bin_attrs, + .bin_attrs_new = secvar_bin_attrs, }; __ATTRIBUTE_GROUPS(secvar_attr); -static struct kobj_type secvar_ktype = { +static const struct kobj_type secvar_ktype = { .sysfs_ops = &kobj_sysfs_ops, .default_groups = secvar_attr_groups, }; -static int update_kobj_size(void) +static __init int update_kobj_size(void) { u64 varsize; @@ -145,7 +145,7 @@ static int update_kobj_size(void) return 0; } -static int secvar_sysfs_config(struct kobject *kobj) +static __init int secvar_sysfs_config(struct kobject *kobj) { struct attribute_group config_group = { .name = "config", @@ -158,7 +158,7 @@ static int secvar_sysfs_config(struct kobject *kobj) return 0; } -static int add_var(const char *name) +static __init int add_var(const char *name) { struct kobject *kobj; int rc; @@ -181,7 +181,7 @@ static int add_var(const char *name) return 0; } -static int secvar_sysfs_load(void) +static __init int secvar_sysfs_load(void) { u64 namesize = 0; char *name; @@ -209,7 +209,7 @@ static int secvar_sysfs_load(void) return rc; } -static int secvar_sysfs_load_static(void) +static __init int secvar_sysfs_load_static(void) { const char * const *name_ptr = secvar_ops->var_names; int rc; @@ -224,7 +224,7 @@ static int secvar_sysfs_load_static(void) return 0; } -static int secvar_sysfs_init(void) +static __init int secvar_sysfs_init(void) { u64 max_size; int rc; diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 01ed1263e1a9..68d47c53876c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -67,6 +67,7 @@ #include <asm/cpu_has_feature.h> #include <asm/kasan.h> #include <asm/mce.h> +#include <asm/systemcfg.h> #include "setup.h" @@ -405,7 +406,7 @@ static void __init cpu_init_thread_core_maps(int tpc) cpumask_set_cpu(i, &threads_core_mask); printk(KERN_INFO "CPU maps initialized for %d thread%s per core\n", - tpc, tpc > 1 ? "s" : ""); + tpc, str_plural(tpc)); printk(KERN_DEBUG " (thread shift is %d)\n", threads_shift); } @@ -457,11 +458,8 @@ void __init smp_setup_cpu_maps(void) DBG("smp_setup_cpu_maps()\n"); - cpu_to_phys_id = memblock_alloc(nr_cpu_ids * sizeof(u32), + cpu_to_phys_id = memblock_alloc_or_panic(nr_cpu_ids * sizeof(u32), __alignof__(u32)); - if (!cpu_to_phys_id) - panic("%s: Failed to allocate %zu bytes align=0x%zx\n", - __func__, nr_cpu_ids * sizeof(u32), __alignof__(u32)); for_each_node_by_type(dn, "cpu") { const __be32 *intserv; @@ -560,7 +558,9 @@ void __init smp_setup_cpu_maps(void) out: of_node_put(dn); } - vdso_data->processorCount = num_present_cpus(); +#endif +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount = num_present_cpus(); #endif /* CONFIG_PPC64 */ /* Initialize CPU <=> thread mapping/ @@ -831,8 +831,8 @@ static int __init check_cache_coherency(void) if (devtree_coherency != KERNEL_COHERENCY) { printk(KERN_ERR "kernel coherency:%s != device tree_coherency:%s\n", - KERNEL_COHERENCY ? "on" : "off", - devtree_coherency ? "on" : "off"); + str_on_off(KERNEL_COHERENCY), + str_on_off(devtree_coherency)); BUG(); } @@ -957,8 +957,7 @@ void __init setup_arch(char **cmdline_p) /* Parse memory topology */ mem_topology_setup(); - /* Set max_mapnr before paging_init() */ - set_max_mapnr(max_pfn); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that @@ -996,9 +995,11 @@ void __init setup_arch(char **cmdline_p) initmem_init(); /* - * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must - * be called after initmem_init(), so that pageblock_order is initialised. + * Reserve large chunks of memory for use by CMA for fadump, KVM and + * hugetlb. These must be called after initmem_init(), so that + * pageblock_order is initialised. */ + fadump_cma_init(); kvm_cma_reserve(); gigantic_hugetlb_cma_reserve(); diff --git a/arch/powerpc/kernel/setup.h b/arch/powerpc/kernel/setup.h index 7912bb50a7cb..385a00a2e2ca 100644 --- a/arch/powerpc/kernel/setup.h +++ b/arch/powerpc/kernel/setup.h @@ -29,7 +29,7 @@ void setup_tlb_core_data(void); static inline void setup_tlb_core_data(void) { } #endif -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void exc_lvl_early_init(void); #else static inline void exc_lvl_early_init(void) { } diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index b761cc1a403c..5a1bf501fbe1 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -40,7 +40,7 @@ #include <asm/time.h> #include <asm/serial.h> #include <asm/udbg.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/cpu_has_feature.h> #include <asm/asm-prototypes.h> #include <asm/kdump.h> @@ -140,13 +140,7 @@ arch_initcall(ppc_init); static void *__init alloc_stack(void) { - void *ptr = memblock_alloc(THREAD_SIZE, THREAD_ALIGN); - - if (!ptr) - panic("cannot allocate %d bytes for stack at %pS\n", - THREAD_SIZE, (void *)_RET_IP_); - - return ptr; + return memblock_alloc_or_panic(THREAD_SIZE, THREAD_ALIGN); } void __init irqstack_early_init(void) @@ -176,7 +170,7 @@ void __init emergency_stack_init(void) } #endif -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE void __init exc_lvl_early_init(void) { unsigned int i, hw_cpu; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2f19d5e94485..7284c8021eeb 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -60,7 +60,7 @@ #include <asm/xmon.h> #include <asm/udbg.h> #include <asm/kexec.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/opal.h> #include <asm/cputhreads.h> @@ -696,11 +696,7 @@ __init u64 ppc64_bolted_size(void) { #ifdef CONFIG_PPC_BOOK3E_64 /* Freescale BookE bolts the entire linear mapping */ - /* XXX: BookE ppc64_rma_limit setup seems to disagree? */ - if (early_mmu_has_feature(MMU_FTR_TYPE_FSL_E)) - return linear_map_top; - /* Other BookE, we assume the first GB is bolted */ - return 1ul << 30; + return linear_map_top; #else /* BookS radix, does not take faults on linear mapping */ if (early_radix_enabled()) @@ -834,6 +830,7 @@ static __init int pcpu_cpu_to_node(int cpu) unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; EXPORT_SYMBOL(__per_cpu_offset); +DEFINE_STATIC_KEY_FALSE(__percpu_first_chunk_is_paged); void __init setup_per_cpu_areas(void) { @@ -876,6 +873,7 @@ void __init setup_per_cpu_areas(void) if (rc < 0) panic("cannot initialize percpu area (err=%d)", rc); + static_key_enable(&__percpu_first_chunk_is_paged.key); delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; for_each_possible_cpu(cpu) { __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu]; @@ -894,7 +892,7 @@ unsigned long memory_block_size_bytes(void) } #endif -#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO) +#ifdef CONFIG_PPC_INDIRECT_PIO struct ppc_pci_io ppc_pci_io; EXPORT_SYMBOL(ppc_pci_io); #endif @@ -922,6 +920,7 @@ static int __init disable_hardlockup_detector(void) hardlockup_detector_disable(); #else if (firmware_has_feature(FW_FEATURE_LPAR)) { + check_kvm_guest(); if (is_kvm_guest()) hardlockup_detector_disable(); } diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 12e53b3d7923..5ac7084eebc0 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -61,6 +61,7 @@ #include <asm/ftrace.h> #include <asm/kup.h> #include <asm/fadump.h> +#include <asm/systemcfg.h> #include <trace/events/ipi.h> @@ -1166,7 +1167,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpu_smt_set_num_threads(num_threads, threads_per_core); } -void smp_prepare_boot_cpu(void) +void __init smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); #ifdef CONFIG_PPC64 @@ -1186,8 +1187,8 @@ int generic_cpu_disable(void) return -EBUSY; set_cpu_online(cpu, false); -#ifdef CONFIG_PPC64 - vdso_data->processorCount--; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->processorCount--; #endif /* Update affinity of all IRQs previously aimed at this CPU */ irq_migrate_all_off_this_cpu(); @@ -1567,7 +1568,7 @@ static void add_cpu_to_masks(int cpu) /* * This CPU will not be in the online mask yet so we need to manually - * add it to it's own thread sibling mask. + * add it to its own thread sibling mask. */ map_cpu_to_node(cpu, cpu_to_node(cpu)); cpumask_set_cpu(cpu, cpu_sibling_mask(cpu)); @@ -1642,10 +1643,12 @@ void start_secondary(void *unused) secondary_cpu_time_init(); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG if (system_state == SYSTEM_RUNNING) - vdso_data->processorCount++; + systemcfg->processorCount++; +#endif +#ifdef CONFIG_PPC64 vdso_getcpu_init(); #endif set_numa_node(numa_cpu_lookup_table[cpu]); diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index e6a958a5da27..90882b5175cd 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -21,6 +21,7 @@ #include <asm/processor.h> #include <linux/ftrace.h> #include <asm/kprobes.h> +#include <linux/rethook.h> #include <asm/paca.h> @@ -133,12 +134,13 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum * arch-dependent code, they are generic. */ ip = ftrace_graph_ret_addr(task, &graph_idx, ip, stack); -#ifdef CONFIG_KPROBES + /* * Mark stacktraces with kretprobed functions on them * as unreliable. */ - if (ip == (unsigned long)__kretprobe_trampoline) +#ifdef CONFIG_RETHOOK + if (ip == (unsigned long)arch_rethook_trampoline) return -EINVAL; #endif diff --git a/arch/powerpc/kernel/static_call.c b/arch/powerpc/kernel/static_call.c index 863a7aa24650..ec3101f95e53 100644 --- a/arch/powerpc/kernel/static_call.c +++ b/arch/powerpc/kernel/static_call.c @@ -2,32 +2,60 @@ #include <linux/memory.h> #include <linux/static_call.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> void arch_static_call_transform(void *site, void *tramp, void *func, bool tail) { int err; bool is_ret0 = (func == __static_call_return0); - unsigned long target = (unsigned long)(is_ret0 ? tramp + PPC_SCT_RET0 : func); - bool is_short = is_offset_in_branch_range((long)target - (long)tramp); - - if (!tramp) - return; + unsigned long _tramp = (unsigned long)tramp; + unsigned long _func = (unsigned long)func; + unsigned long _ret0 = _tramp + PPC_SCT_RET0; + bool is_short = is_offset_in_branch_range((long)func - (long)(site ? : tramp)); mutex_lock(&text_mutex); - if (func && !is_short) { - err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target)); - if (err) - goto out; + if (site && tail) { + if (!func) + err = patch_instruction(site, ppc_inst(PPC_RAW_BLR())); + else if (is_ret0) + err = patch_branch(site, _ret0, 0); + else if (is_short) + err = patch_branch(site, _func, 0); + else if (tramp) + err = patch_branch(site, _tramp, 0); + else + err = 0; + } else if (site) { + if (!func) + err = patch_instruction(site, ppc_inst(PPC_RAW_NOP())); + else if (is_ret0) + err = patch_instruction(site, ppc_inst(PPC_RAW_LI(_R3, 0))); + else if (is_short) + err = patch_branch(site, _func, BRANCH_SET_LINK); + else if (tramp) + err = patch_branch(site, _tramp, BRANCH_SET_LINK); + else + err = 0; + } else if (tramp) { + if (func && !is_short) { + err = patch_ulong(tramp + PPC_SCT_DATA, _func); + if (err) + goto out; + } + + if (!func) + err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR())); + else if (is_ret0) + err = patch_branch(tramp, _ret0, 0); + else if (is_short) + err = patch_branch(tramp, _func, 0); + else + err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP())); + } else { + err = 0; } - if (!func) - err = patch_instruction(tramp, ppc_inst(PPC_RAW_BLR())); - else if (is_short) - err = patch_branch(tramp, target, 0); - else - err = patch_instruction(tramp, ppc_inst(PPC_RAW_NOP())); out: mutex_unlock(&text_mutex); diff --git a/arch/powerpc/kernel/switch.S b/arch/powerpc/kernel/switch.S index 608c0ce7cec6..59e3ee99db0e 100644 --- a/arch/powerpc/kernel/switch.S +++ b/arch/powerpc/kernel/switch.S @@ -39,7 +39,6 @@ flush_branch_caches: // Flush the link stack .rept 64 - ANNOTATE_INTRA_FUNCTION_CALL bl .+4 .endr b 1f diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index f6f868e817e6..be159ad4b77b 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -27,7 +27,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) trace_hardirqs_off(); /* finish reconciling */ - CT_WARN_ON(ct_state() == CONTEXT_KERNEL); + CT_WARN_ON(ct_state() == CT_STATE_KERNEL); user_exit_irqoff(); BUG_ON(regs_is_unrecoverable(regs)); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 17173b82ca21..9a084bdb8926 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive @@ -548,3 +552,9 @@ 459 common lsm_get_self_attr sys_lsm_get_self_attr 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules +462 common mseal sys_mseal +463 common setxattrat sys_setxattrat +464 common getxattrat sys_getxattrat +465 common listxattrat sys_listxattrat +466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index 0f39a6b84132..6b3dd6decdf9 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include <asm/hvcall.h> #include <asm/machdep.h> #include <asm/smp.h> +#include <asm/time.h> #include <asm/pmc.h> #include <asm/firmware.h> #include <asm/idle.h> @@ -139,7 +140,7 @@ static unsigned long dscr_default; * @val: Returned cpu specific DSCR default value * * This function returns the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void read_dscr(void *val) { @@ -152,7 +153,7 @@ static void read_dscr(void *val) * @val: New cpu specific DSCR default value to update * * This function updates the per cpu DSCR default value - * for any cpu which is contained in it's PACA structure. + * for any cpu which is contained in its PACA structure. */ static void write_dscr(void *val) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index df20cf201f74..8224381c1dba 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -71,11 +71,11 @@ #include <asm/vdso_datapage.h> #include <asm/firmware.h> #include <asm/mce.h> +#include <asm/systemcfg.h> /* powerpc clocksource/clockevent code */ #include <linux/clockchips.h> -#include <linux/timekeeper_internal.h> static u64 timebase_read(struct clocksource *); static struct clocksource clocksource_timebase = { @@ -354,6 +354,28 @@ void vtime_flush(struct task_struct *tsk) acct->hardirq_time = 0; acct->softirq_time = 0; } + +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void vtime_task_switch(struct task_struct *prev) +{ + if (is_idle_task(prev)) + vtime_account_idle(prev); + else + vtime_account_kernel(prev); + + vtime_flush(prev); + + if (!IS_ENABLED(CONFIG_PPC64)) { + struct cpu_accounting_data *acct = get_accounting(current); + struct cpu_accounting_data *acct0 = get_accounting(prev); + + acct->starttime = acct0->starttime; + } +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ void __no_kcsan __delay(unsigned long loops) @@ -673,7 +695,7 @@ static int __init get_freq(char *name, int cells, unsigned long *val) static void start_cpu_decrementer(void) { -#ifdef CONFIG_BOOKE_OR_40x +#ifdef CONFIG_BOOKE unsigned int tcr; /* Clear any pending timer interrupts */ @@ -879,6 +901,38 @@ void secondary_cpu_time_init(void) register_decrementer_clockevent(smp_processor_id()); } +/* + * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit + * result. + */ +static __init void div128_by_32(u64 dividend_high, u64 dividend_low, + unsigned int divisor, struct div_result *dr) +{ + unsigned long a, b, c, d; + unsigned long w, x, y, z; + u64 ra, rb, rc; + + a = dividend_high >> 32; + b = dividend_high & 0xffffffff; + c = dividend_low >> 32; + d = dividend_low & 0xffffffff; + + w = a / divisor; + ra = ((u64)(a - (w * divisor)) << 32) + b; + + rb = ((u64)do_div(ra, divisor) << 32) + c; + x = ra; + + rc = ((u64)do_div(rb, divisor) << 32) + d; + y = rb; + + do_div(rc, divisor); + z = rc; + + dr->result_high = ((u64)w << 32) + x; + dr->result_low = ((u64)y << 32) + z; +} + /* This function is only called on the boot processor */ void __init time_init(void) { @@ -928,7 +982,10 @@ void __init time_init(void) sys_tz.tz_dsttime = 0; } - vdso_data->tb_ticks_per_sec = tb_ticks_per_sec; + vdso_k_arch_data->tb_ticks_per_sec = tb_ticks_per_sec; +#ifdef CONFIG_PPC64_PROC_SYSTEMCFG + systemcfg->tb_ticks_per_sec = tb_ticks_per_sec; +#endif /* initialise and enable the large decrementer (if we have one) */ set_decrementer_max(); @@ -949,39 +1006,6 @@ void __init time_init(void) enable_sched_clock_irqtime(); } -/* - * Divide a 128-bit dividend by a 32-bit divisor, leaving a 128 bit - * result. - */ -void div128_by_32(u64 dividend_high, u64 dividend_low, - unsigned divisor, struct div_result *dr) -{ - unsigned long a, b, c, d; - unsigned long w, x, y, z; - u64 ra, rb, rc; - - a = dividend_high >> 32; - b = dividend_high & 0xffffffff; - c = dividend_low >> 32; - d = dividend_low & 0xffffffff; - - w = a / divisor; - ra = ((u64)(a - (w * divisor)) << 32) + b; - - rb = ((u64) do_div(ra, divisor) << 32) + c; - x = ra; - - rc = ((u64) do_div(rb, divisor) << 32) + d; - y = rb; - - do_div(rc, divisor); - z = rc; - - dr->result_high = ((u64)w << 32) + x; - dr->result_low = ((u64)y << 32) + z; - -} - /* We don't need to calibrate delay, we use the CPU timebase for that */ void calibrate_delay(void) { diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 125f4ca588b9..d6c3885453bd 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -9,12 +9,15 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o -ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o +ifdef CONFIG_FUNCTION_TRACER +obj32-y += ftrace.o ftrace_entry.o +ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),) +obj64-y += ftrace_64_pg.o ftrace_64_pg_entry.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_64_pg_entry.o +obj64-y += ftrace.o ftrace_entry.o +endif endif + obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d8d6b4fd9a14..6dca92d5a6e8 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -23,7 +23,7 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> @@ -37,8 +37,12 @@ unsigned long ftrace_call_adjust(unsigned long addr) if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) return 0; - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } return addr; } @@ -82,7 +86,7 @@ static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_ { int ret = ftrace_validate_inst(ip, old); - if (!ret) + if (!ret && !ppc_inst_equal(old, new)) ret = patch_instruction((u32 *)ip, new); return ret; @@ -106,28 +110,66 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } +#ifdef CONFIG_MODULES +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + struct module *mod = NULL; + + scoped_guard(rcu) + mod = __module_text_address(ip); + if (!mod) + pr_err("No module loaded at addr=%lx\n", ip); + + return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); +} +#else +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + return 0; +} +#endif + +static unsigned long ftrace_get_ool_stub(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + return rec->arch.ool_stub; +#else + BUILD_BUG(); +#endif +} + static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) { - unsigned long ip = rec->ip; + unsigned long ip; unsigned long stub; - if (is_offset_in_branch_range(addr - ip)) { + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + else + ip = rec->ip; + + if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR && + addr != FTRACE_REGS_ADDR) { + /* This can only happen with ftrace direct */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + pr_err("0x%lx (0x%lx): Unexpected target address 0x%lx\n", + ip, rec->ip, addr); + return -EINVAL; + } + addr = FTRACE_ADDR; + } + + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; -#ifdef CONFIG_MODULES - } else if (rec->arch.mod) { - /* Module code would be going to one of the module stubs */ - stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp : - rec->arch.mod->arch.tramp_regs); -#endif - } else if (core_kernel_text(ip)) { + else if (core_kernel_text(ip)) /* We would be branching to one of our ftrace stubs */ stub = find_ftrace_tramp(ip); - if (!stub) { - pr_err("0x%lx: No ftrace stubs reachable\n", ip); - return -EINVAL; - } - } else { + else + stub = ftrace_lookup_module_stub(ip, addr); + + if (!stub) { + pr_err("0x%lx (0x%lx): No ftrace stubs reachable\n", ip, rec->ip); return -EINVAL; } @@ -135,6 +177,145 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ return 0; } +static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + static int ool_stub_text_index, ool_stub_text_end_index, ool_stub_inittext_index; + int ret = 0, ool_stub_count, *ool_stub_index; + ppc_inst_t inst; + /* + * See ftrace_entry.S if changing the below instruction sequence, as we rely on + * decoding the last branch instruction here to recover the correct function ip. + */ + struct ftrace_ool_stub *ool_stub, ool_stub_template = { + .insn = { + PPC_RAW_MFLR(_R0), + PPC_RAW_NOP(), /* bl ftrace_caller */ + PPC_RAW_MTLR(_R0), + PPC_RAW_NOP() /* b rec->ip + 4 */ + } + }; + + WARN_ON(rec->arch.ool_stub); + + if (is_kernel_inittext(rec->ip)) { + ool_stub = ftrace_ool_stub_inittext; + ool_stub_index = &ool_stub_inittext_index; + ool_stub_count = ftrace_ool_stub_inittext_count; + } else if (is_kernel_text(rec->ip)) { + /* + * ftrace records are sorted, so we first use up the stub area within .text + * (ftrace_ool_stub_text) before using the area at the end of .text + * (ftrace_ool_stub_text_end), unless the stub is out of range of the record. + */ + if (ool_stub_text_index >= ftrace_ool_stub_text_count || + !is_offset_in_branch_range((long)rec->ip - + (long)&ftrace_ool_stub_text[ool_stub_text_index])) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; + } else { + ool_stub = ftrace_ool_stub_text; + ool_stub_index = &ool_stub_text_index; + ool_stub_count = ftrace_ool_stub_text_count; + } +#ifdef CONFIG_MODULES + } else if (mod) { + ool_stub = mod->arch.ool_stubs; + ool_stub_index = &mod->arch.ool_stub_index; + ool_stub_count = mod->arch.ool_stub_count; +#endif + } else { + return -EINVAL; + } + + ool_stub += (*ool_stub_index)++; + + if (WARN_ON(*ool_stub_index > ool_stub_count)) + return -EINVAL; + + if (!is_offset_in_branch_range((long)rec->ip - (long)&ool_stub->insn[0]) || + !is_offset_in_branch_range((long)(rec->ip + MCOUNT_INSN_SIZE) - + (long)&ool_stub->insn[3])) { + pr_err("%s: ftrace ool stub out of range (%p -> %p).\n", + __func__, (void *)rec->ip, (void *)&ool_stub->insn[0]); + return -EINVAL; + } + + rec->arch.ool_stub = (unsigned long)&ool_stub->insn[0]; + + /* bl ftrace_caller */ + if (!mod) + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &inst); +#ifdef CONFIG_MODULES + else + /* + * We can't use ftrace_get_call_inst() since that uses + * __module_text_address(rec->ip) to look up the module. + * But, since the module is not fully formed at this stage, + * the lookup fails. We know the target though, so generate + * the branch inst directly. + */ + inst = ftrace_create_branch_inst(ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE, + mod->arch.tramp, 1); +#endif + ool_stub_template.insn[1] = ppc_inst_val(inst); + + /* b rec->ip + 4 */ + if (!ret && create_branch(&inst, &ool_stub->insn[3], rec->ip + MCOUNT_INSN_SIZE, 0)) + return -EINVAL; + ool_stub_template.insn[3] = ppc_inst_val(inst); + + if (!ret) + ret = patch_instructions((u32 *)ool_stub, (u32 *)&ool_stub_template, + sizeof(ool_stub_template), false); + + return ret; +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + BUILD_BUG(); +#endif +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *powerpc_rec_get_ops(struct dyn_ftrace *rec) +{ + const struct ftrace_ops *ops = NULL; + + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); + } + + if (!ops) + ops = &ftrace_list_ops; + + return ops; +} + +static int ftrace_rec_set_ops(struct dyn_ftrace *rec, const struct ftrace_ops *ops) +{ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return patch_ulong((void *)(ftrace_get_ool_stub(rec) - sizeof(unsigned long)), + (unsigned long)ops); + else + return patch_ulong((void *)(rec->ip - MCOUNT_INSN_SIZE - sizeof(unsigned long)), + (unsigned long)ops); +} + +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} + +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, powerpc_rec_get_ops(rec)); +} +#else +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -147,18 +328,33 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { ppc_inst_t old, new; - int ret; + unsigned long ip = rec->ip; + int ret = 0; /* This can only ever be called during module load */ - if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip))) + if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(ip))) return -EINVAL; old = ppc_inst(PPC_RAW_NOP()); - ret = ftrace_get_call_inst(rec, addr, &new); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &old); + } + + ret |= ftrace_get_call_inst(rec, addr, &new); + + if (!ret) + ret = ftrace_modify_code(ip, old, new); + + ret = ftrace_rec_update_ops(rec); if (ret) return ret; - return ftrace_modify_code(rec->ip, old, new); + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), + ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); + + return ret; } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -191,6 +387,13 @@ void ftrace_replace_code(int enable) new_addr = ftrace_get_addr_new(rec); update = ftrace_update_record(rec, enable); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && update != FTRACE_UPDATE_IGNORE) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &nop_inst); + if (ret) + goto out; + } + switch (update) { case FTRACE_UPDATE_IGNORE: default: @@ -198,16 +401,19 @@ void ftrace_replace_code(int enable) case FTRACE_UPDATE_MODIFY_CALL: ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst); ret |= ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = call_inst; new = new_call_inst; break; case FTRACE_UPDATE_MAKE_NOP: ret = ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_set_nop_ops(rec); old = call_inst; new = nop_inst; break; case FTRACE_UPDATE_MAKE_CALL: ret = ftrace_get_call_inst(rec, new_addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = nop_inst; new = call_inst; break; @@ -215,6 +421,24 @@ void ftrace_replace_code(int enable) if (!ret) ret = ftrace_modify_code(ip, old, new); + + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && + (update == FTRACE_UPDATE_MAKE_NOP || update == FTRACE_UPDATE_MAKE_CALL)) { + /* Update the actual ftrace location */ + call_inst = ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - + (long)rec->ip)); + nop_inst = ppc_inst(PPC_RAW_NOP()); + ip = rec->ip; + + if (update == FTRACE_UPDATE_MAKE_NOP) + ret = ftrace_modify_code(ip, call_inst, nop_inst); + else + ret = ftrace_modify_code(ip, nop_inst, call_inst); + + if (ret) + goto out; + } + if (ret) goto out; } @@ -234,20 +458,27 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) /* Verify instructions surrounding the ftrace location */ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { /* Expect nops */ - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); if (!ret) ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_PPC32)) { /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - if (!ret) - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)), + ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ ret = ftrace_read_inst(ip - 4, &old); if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { + /* Gcc v5.x emit the additional 'std' instruction, gcc v6.x don't */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)), + ppc_inst(PPC_RAW_NOP())); } } else { return -EINVAL; @@ -256,13 +487,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) if (ret) return ret; - if (!core_kernel_text(ip)) { - if (!mod) { - pr_err("0x%lx: No module provided for non-kernel address\n", ip); - return -EFAULT; - } - rec->arch.mod = mod; - } + /* Set up out-of-line stub */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return ftrace_init_ool_stub(mod, rec); /* Nop-out the ftrace location */ new = ppc_inst(PPC_RAW_NOP()); @@ -302,6 +529,13 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ppc_inst_t old, new; int ret; + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); @@ -421,8 +655,7 @@ int __init ftrace_dyn_arch_init(void) void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long sp = fregs->regs.gpr[1]; - int bit; + unsigned long sp = arch_ftrace_regs(fregs)->regs.gpr[1]; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -430,15 +663,10 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - bit = ftrace_test_recursion_trylock(ip, parent_ip); - if (bit < 0) - goto out; - - if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp)) + if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, fregs)) parent_ip = ppc_function_entry(return_to_handler); - ftrace_test_recursion_unlock(bit); out: - fregs->regs.link = parent_ip; + arch_ftrace_regs(fregs)->regs.link = parent_ip; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 12fab1803bcf..5c6e545d1708 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -23,7 +23,7 @@ #include <linux/list.h> #include <asm/cacheflush.h> -#include <asm/code-patching.h> +#include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/syscall.h> #include <asm/inst.h> @@ -116,6 +116,18 @@ static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) } #ifdef CONFIG_MODULES +static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) +{ + struct module *mod; + + scoped_guard(rcu) + mod = __module_text_address(rec->ip); + if (!mod) + pr_err("No module loaded at addr=%lx\n", rec->ip); + + return mod; +} + static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -124,6 +136,12 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; ppc_inst_t op, pop; + if (!mod) { + mod = ftrace_lookup_module(rec); + if (!mod) + return -EINVAL; + } + /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); @@ -366,27 +384,6 @@ int ftrace_make_nop(struct module *mod, return -EINVAL; } - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - pr_err("No module loaded addr=%lx\n", addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - pr_err("Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; - return __ftrace_make_nop(mod, rec, addr); } @@ -411,7 +408,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ppc_inst_t op[2]; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* read where this goes */ if (copy_inst_from_kernel_nofault(op, ip)) @@ -533,16 +533,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_make_call(rec, addr); } @@ -555,7 +545,10 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, ppc_inst_t op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* If we never set up ftrace trampolines, then bail */ if (!mod->arch.tramp || !mod->arch.tramp_regs) { @@ -668,14 +661,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return -EINVAL; } - /* - * Out of range jumps are called from modules. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_modify_call(rec, old_addr, addr); } #endif @@ -800,10 +785,10 @@ int ftrace_disable_ftrace_graph_caller(void) * in current thread info. Return the address we want to divert to. */ static unsigned long -__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) +__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp, + struct ftrace_regs *fregs) { unsigned long return_hooker; - int bit; if (unlikely(ftrace_graph_is_dead())) goto out; @@ -811,16 +796,11 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - bit = ftrace_test_recursion_trylock(ip, parent); - if (bit < 0) - goto out; - return_hooker = ppc_function_entry(return_to_handler); - if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) + if (!function_graph_enter_regs(parent, ip, 0, (unsigned long *)sp, fregs)) parent = return_hooker; - ftrace_test_recursion_unlock(bit); out: return parent; } @@ -829,13 +809,14 @@ out: void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct ftrace_regs *fregs) { - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); + arch_ftrace_regs(fregs)->regs.link = __prepare_ftrace_return(parent_ip, ip, + arch_ftrace_regs(fregs)->regs.gpr[1], fregs); } #else unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) { - return __prepare_ftrace_return(parent, ip, sp); + return __prepare_ftrace_return(parent, ip, sp, NULL); } #endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 76dbe9fd2c0f..2c1b24100eca 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -39,13 +39,37 @@ /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) + .if \allregs == 1 + SAVE_GPRS(11, 12, r1) + .endif + + /* Get the _mcount() call site out of LR */ + mflr r11 + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Load the ftrace_op */ + PPC_LL r12, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + + /* Load direct_call from the ftrace_op */ + PPC_LL r12, FTRACE_OPS_DIRECT_CALL(r12) + PPC_LCMPI r12, 0 + .if \allregs == 1 + bne .Lftrace_direct_call_regs + .else + bne .Lftrace_direct_call + .endif +#endif + + /* Save the previous LR in pt_regs->link */ + PPC_STL r0, _LINK(r1) + /* Also save it in A's stack frame */ + PPC_STL r0, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE+LRSAVE(r1) + /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 - /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -54,9 +78,9 @@ .if \allregs == 1 SAVE_GPR(2, r1) - SAVE_GPRS(11, 31, r1) + SAVE_GPRS(13, 31, r1) .else -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) SAVE_GPR(14, r1) #endif .endif @@ -67,80 +91,143 @@ .if \allregs == 1 /* Load special regs for save below */ + mfcr r7 mfmsr r8 mfctr r9 mfxer r10 - mfcr r11 .else /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ li r8, 0 .endif - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save it as pt_regs->nip */ - PPC_STL r7, _NIP(r1) - /* Also save it in B's stackframe header for proper unwind */ - PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) - /* Save the read LR in pt_regs->link */ - PPC_STL r0, _LINK(r1) - #ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, STK_GOT(r1) LOAD_PACA_TOC() /* get kernel TOC in r2 */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* r11 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + PPC_LL r12, FTRACE_OPS_FUNC(r5) + mtctr r12 +#else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ +#ifdef CONFIG_PPC64 LOAD_REG_ADDR(r3, function_trace_op) ld r5,0(r3) #else lis r3,function_trace_op@ha lwz r5,function_trace_op@l(r3) #endif - -#ifdef CONFIG_LIVEPATCH_64 - mr r14, r7 /* remember old NIP */ #endif - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - /* Save special regs */ PPC_STL r8, _MSR(r1) .if \allregs == 1 + PPC_STL r7, _CCR(r1) PPC_STL r9, _CTR(r1) PPC_STL r10, _XER(r1) - PPC_STL r11, _CCR(r1) .endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Clear orig_gpr3 to later detect ftrace_direct call */ + li r7, 0 + PPC_STL r7, ORIG_GPR3(r1) +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Save our real return address in nvr for return */ + .if \allregs == 0 + SAVE_GPR(15, r1) + .endif + mr r15, r11 + /* + * We want the ftrace location in the function, but our lr (in r11) + * points at the 'mtlr r0' instruction in the out of line stub. To + * recover the ftrace location, we read the branch instruction in the + * stub, and adjust our lr by the branch offset. + * + * See ftrace_init_ool_stub() for the profile sequence. + */ + lwz r8, MCOUNT_INSN_SIZE(r11) + slwi r8, r8, 6 + srawi r8, r8, 6 + add r3, r11, r8 + /* + * Override our nip to point past the branch in the original function. + * This allows reliable stack trace and the ftrace stack tracer to work as-is. + */ + addi r11, r3, MCOUNT_INSN_SIZE +#else + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r11, MCOUNT_INSN_SIZE +#endif + + /* Save NIP as pt_regs->nip */ + PPC_STL r11, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r11, LRSAVE+SWITCH_FRAME_SIZE(r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + mr r14, r11 /* remember old NIP */ +#endif + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + /* Load &pt_regs in r6 for call below */ addi r6, r1, STACK_INT_FRAME_REGS .endm .macro ftrace_regs_exit allregs - /* Load ctr with the possibly modified NIP */ - PPC_LL r3, _NIP(r1) +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Check orig_gpr3 to detect ftrace_direct call */ + PPC_LL r3, ORIG_GPR3(r1) + PPC_LCMPI cr1, r3, 0 mtctr r3 +#endif + /* Restore possibly modified LR */ + PPC_LL r0, _LINK(r1) + +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Load ctr with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + beq cr1,2f + mtlr r3 + b 3f +#endif +2: mtctr r3 + mtlr r0 +3: + +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + /* Load LR with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) + cmpd r14, r3 /* has NIP been altered? */ + bne- 1f + + mr r3, r15 + .if \allregs == 0 + REST_GPR(15, r1) + .endif +1: mtlr r3 +#endif + /* Restore gprs */ .if \allregs == 1 REST_GPRS(2, 31, r1) .else REST_GPRS(3, 10, r1) -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) REST_GPR(14, r1) #endif .endif - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) - mtlr r0 - #ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, STK_GOT(r1) @@ -153,23 +240,46 @@ /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - bctr /* jump after _mcount site */ + + /* jump after _mcount site */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnectr cr1 +#endif + /* + * Return with blr to keep the link stack balanced. The function profiling sequence + * uses 'mtlr r0' to restore LR. + */ + blr +#else + bctr +#endif .endm -_GLOBAL(ftrace_regs_caller) - ftrace_regs_entry 1 - /* ftrace_call(r3, r4, r5, r6) */ +.macro ftrace_regs_func allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + bctrl +#else + .if \allregs == 1 .globl ftrace_regs_call ftrace_regs_call: + .else +.globl ftrace_call +ftrace_call: + .endif + /* ftrace_call(r3, r4, r5, r6) */ bl ftrace_stub +#endif +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + ftrace_regs_func 1 ftrace_regs_exit 1 _GLOBAL(ftrace_caller) ftrace_regs_entry 0 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub + ftrace_regs_func 0 ftrace_regs_exit 0 _GLOBAL(ftrace_stub) @@ -177,6 +287,11 @@ _GLOBAL(ftrace_stub) #ifdef CONFIG_PPC64 ftrace_no_trace: +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + blr +#else mflr r3 mtctr r3 REST_GPR(3, r1) @@ -184,6 +299,22 @@ ftrace_no_trace: mtlr r0 bctr #endif +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +.Lftrace_direct_call_regs: + mtctr r12 + REST_GPRS(11, 12, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +.Lftrace_direct_call: + mtctr r12 + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +SYM_FUNC_START(ftrace_stub_direct_tramp) + blr +SYM_FUNC_END(ftrace_stub_direct_tramp) +#endif #ifdef CONFIG_LIVEPATCH_64 /* @@ -194,11 +325,17 @@ ftrace_no_trace: * We get here when a function A, calls another function B, but B has * been live patched with a new function C. * - * On entry: - * - we have no stack frame and can not allocate one + * On entry, we have no stack frame and can not allocate one. + * + * With PPC_FTRACE_OUT_OF_LINE=n, on entry: * - LR points back to the original caller (in A) * - CTR holds the new NIP in C * - r0, r11 & r12 are free + * + * With PPC_FTRACE_OUT_OF_LINE=y, on entry: + * - r0 points back to the original caller (in A) + * - LR holds the new NIP in C + * - r11 & r12 are free */ livepatch_handler: ld r12, PACA_THREAD_INFO(r13) @@ -208,18 +345,23 @@ livepatch_handler: addi r11, r11, 24 std r11, TI_livepatch_sp(r12) - /* Save toc & real LR on livepatch stack */ - std r2, -24(r11) - mflr r12 - std r12, -16(r11) - /* Store stack end marker */ lis r12, STACK_END_MAGIC@h ori r12, r12, STACK_END_MAGIC@l std r12, -8(r11) - /* Put ctr in r12 for global entry and branch there */ + /* Save toc & real LR on livepatch stack */ + std r2, -24(r11) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + mflr r12 + std r12, -16(r11) mfctr r12 +#else + std r0, -16(r11) + mflr r12 + /* Put ctr in r12 for global entry and branch there */ + mtctr r12 +#endif bctrl /* @@ -308,6 +450,14 @@ _GLOBAL(return_to_handler) blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) + +SYM_START(ftrace_ool_stub_text, SYM_L_GLOBAL, .balign SZL) + .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text) +#endif + .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f23430adb68a..cb8e9357383e 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -121,7 +121,7 @@ static void pmac_backlight_unblank(void) props = &pmac_backlight->props; props->brightness = props->max_brightness; - props->power = FB_BLANK_UNBLANK; + props->power = BACKLIGHT_POWER_ON; backlight_update_status(pmac_backlight); } mutex_unlock(&pmac_backlight_mutex); @@ -263,10 +263,9 @@ static int __die(const char *str, struct pt_regs *regs, long err) { printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); - printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n", + printk("%s PAGE_SIZE=%luK%s %s%s%s%s %s\n", IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE", PAGE_SIZE / 1024, get_mmu_str(), - IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", IS_ENABLED(CONFIG_SMP) ? " SMP" : "", IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "", debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "", @@ -2244,7 +2243,7 @@ void __noreturn unrecoverable_exception(struct pt_regs *regs) ; } -#if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) +#ifdef CONFIG_BOOKE_WDT DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException) { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 92b3fc258d11..862b22b2b616 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -36,12 +36,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_PANEL) /* RTAS panel debug */ udbg_init_rtas_panel(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE) - /* RTAS console debug */ - udbg_init_rtas_console(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) - /* Maple real mode debug */ - udbg_init_maple_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) @@ -49,9 +43,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_44x) /* PPC44x debug */ udbg_init_44x_as1(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_40x) - /* PPC40x debug */ - udbg_init_40x_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_CPM) udbg_init_cpm(); #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index a0467e528b70..dfe8ed2192e8 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -205,29 +205,6 @@ void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) udbg_use_uart(); } -#ifdef CONFIG_PPC_MAPLE - -#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8) - -static u8 udbg_uart_in_maple(unsigned int reg) -{ - return real_readb(UDBG_UART_MAPLE_ADDR + reg); -} - -static void udbg_uart_out_maple(unsigned int reg, u8 val) -{ - real_writeb(val, UDBG_UART_MAPLE_ADDR + reg); -} - -void __init udbg_init_maple_realmode(void) -{ - udbg_uart_in = udbg_uart_in_maple; - udbg_uart_out = udbg_uart_out_maple; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_MAPLE */ - #ifdef CONFIG_PPC_PASEMI #define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL) @@ -274,29 +251,6 @@ void __init udbg_init_44x_as1(void) #endif /* CONFIG_PPC_EARLY_DEBUG_44x */ -#ifdef CONFIG_PPC_EARLY_DEBUG_40x - -static u8 udbg_uart_in_40x(unsigned int reg) -{ - return real_readb((void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR - + reg); -} - -static void udbg_uart_out_40x(unsigned int reg, u8 val) -{ - real_writeb(val, (void __iomem *)CONFIG_PPC_EARLY_DEBUG_40x_PHYSADDR - + reg); -} - -void __init udbg_init_40x_realmode(void) -{ - udbg_uart_in = udbg_uart_in_40x; - udbg_uart_out = udbg_uart_out_40x; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_EARLY_DEBUG_40x */ - #ifdef CONFIG_PPC_EARLY_DEBUG_16550 static void __iomem *udbg_uart_early_addr; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 7a2ff9010f17..219d67bcf747 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -16,9 +16,8 @@ #include <linux/user.h> #include <linux/elf.h> #include <linux/security.h> -#include <linux/memblock.h> #include <linux/syscalls.h> -#include <linux/time_namespace.h> +#include <linux/vdso_datastore.h> #include <vdso/datapage.h> #include <asm/syscall.h> @@ -33,6 +32,8 @@ #include <asm/vdso_datapage.h> #include <asm/setup.h> +static_assert(__VDSO_PAGES == VDSO_NR_PAGES); + /* The alignment of the vDSO */ #define VDSO_ALIGNMENT (1 << 16) @@ -41,23 +42,6 @@ extern char vdso64_start, vdso64_end; long sys_ni_syscall(void); -/* - * The vdso data page (aka. systemcfg for old ppc64 fans) is here. - * Once the early boot kernel code no longer needs to muck around - * with it, it will become dynamically allocated - */ -static union { - struct vdso_arch_data data; - u8 page[PAGE_SIZE]; -} vdso_data_store __page_aligned_data; -struct vdso_arch_data *vdso_data = &vdso_data_store.data; - -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma, unsigned long text_size) { @@ -81,88 +65,33 @@ static int vdso64_mremap(const struct vm_special_mapping *sm, struct vm_area_str return vdso_mremap(sm, new_vma, &vdso64_end - &vdso64_start); } -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf); +static void vdso_close(const struct vm_special_mapping *sm, struct vm_area_struct *vma) +{ + struct mm_struct *mm = vma->vm_mm; + + /* + * close() is called for munmap() but also for mremap(). In the mremap() + * case the vdso pointer has already been updated by the mremap() hook + * above, so it must not be set to NULL here. + */ + if (vma->vm_start != (unsigned long)mm->context.vdso) + return; -static struct vm_special_mapping vvar_spec __ro_after_init = { - .name = "[vvar]", - .fault = vvar_fault, -}; + mm->context.vdso = NULL; +} static struct vm_special_mapping vdso32_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso32_mremap, + .close = vdso_close, }; static struct vm_special_mapping vdso64_spec __ro_after_init = { .name = "[vdso]", .mremap = vdso64_mremap, + .close = vdso_close, }; -#ifdef CONFIG_TIME_NS -struct vdso_data *arch_get_vdso_data(void *vvar_page) -{ - return ((struct vdso_arch_data *)vvar_page)->data; -} - -/* - * The vvar mapping contains data for a specific time namespace, so when a task - * changes namespace we must unmap its vvar data for the old namespace. - * Subsequent faults will map in data for the new namespace. - * - * For more details see timens_setup_vdso_data(). - */ -int vdso_join_timens(struct task_struct *task, struct time_namespace *ns) -{ - struct mm_struct *mm = task->mm; - VMA_ITERATOR(vmi, mm, 0); - struct vm_area_struct *vma; - - mmap_read_lock(mm); - for_each_vma(vmi, vma) { - if (vma_is_special_mapping(vma, &vvar_spec)) - zap_vma_pages(vma); - } - mmap_read_unlock(mm); - - return 0; -} -#endif - -static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, - struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct page *timens_page = find_timens_vvar_page(vma); - unsigned long pfn; - - switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: - if (timens_page) - pfn = page_to_pfn(timens_page); - else - pfn = virt_to_pfn(vdso_data); - break; -#ifdef CONFIG_TIME_NS - case VVAR_TIMENS_PAGE_OFFSET: - /* - * If a task belongs to a time namespace then a namespace - * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and - * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET - * offset. - * See also the comment near timens_setup_vdso_data(). - */ - if (!timens_page) - return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); - break; -#endif /* CONFIG_TIME_NS */ - default: - return VM_FAULT_SIGBUS; - } - - return vmf_insert_pfn(vma, vmf->address, pfn); -} - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -171,7 +100,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int { unsigned long vdso_size, vdso_base, mappings_size; struct vm_special_mapping *vdso_spec; - unsigned long vvar_size = VVAR_NR_PAGES * PAGE_SIZE; + unsigned long vvar_size = VDSO_NR_PAGES * PAGE_SIZE; struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -197,16 +126,7 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int /* Add required alignment. */ vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT); - /* - * Put vDSO base into mm struct. We need to do this before calling - * install_special_mapping or the perf counter mmap tracking code - * will fail to recognise it as a vDSO. - */ - mm->context.vdso = (void __user *)vdso_base + vvar_size; - - vma = _install_special_mapping(mm, vdso_base, vvar_size, - VM_READ | VM_MAYREAD | VM_IO | - VM_DONTDUMP | VM_PFNMAP, &vvar_spec); + vma = vdso_install_vvar_mapping(mm, vdso_base); if (IS_ERR(vma)) return PTR_ERR(vma); @@ -223,10 +143,15 @@ static int __arch_setup_additional_pages(struct linux_binprm *bprm, int uses_int vma = _install_special_mapping(mm, vdso_base + vvar_size, vdso_size, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, vdso_spec); - if (IS_ERR(vma)) + if (IS_ERR(vma)) { do_munmap(mm, vdso_base, vvar_size, NULL); + return PTR_ERR(vma); + } + + // Now that the mappings are in place, set the mm VDSO pointer + mm->context.vdso = (void __user *)vdso_base + vvar_size; - return PTR_ERR_OR_ZERO(vma); + return 0; } int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) @@ -240,8 +165,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) return -EINTR; rc = __arch_setup_additional_pages(bprm, uses_interp); - if (rc) - mm->context.vdso = NULL; mmap_write_unlock(mm); return rc; @@ -283,10 +206,10 @@ static void __init vdso_setup_syscall_map(void) for (i = 0; i < NR_syscalls; i++) { if (sys_call_table[i] != (void *)&sys_ni_syscall) - vdso_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + vdso_k_arch_data->syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); if (IS_ENABLED(CONFIG_COMPAT) && compat_sys_call_table[i] != (void *)&sys_ni_syscall) - vdso_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); + vdso_k_arch_data->compat_syscall_map[i >> 5] |= 0x80000000UL >> (i & 0x1f); } } @@ -336,29 +259,10 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) static int __init vdso_init(void) { #ifdef CONFIG_PPC64 - /* - * Fill up the "systemcfg" stuff for backward compatibility - */ - strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64"); - vdso_data->version.major = SYSTEMCFG_MAJOR; - vdso_data->version.minor = SYSTEMCFG_MINOR; - vdso_data->processor = mfspr(SPRN_PVR); - /* - * Fake the old platform number for pSeries and add - * in LPAR bit if necessary - */ - vdso_data->platform = 0x100; - if (firmware_has_feature(FW_FEATURE_LPAR)) - vdso_data->platform |= 1; - vdso_data->physicalMemorySize = memblock_phys_mem_size(); - vdso_data->dcache_size = ppc64_caches.l1d.size; - vdso_data->dcache_line_size = ppc64_caches.l1d.line_size; - vdso_data->icache_size = ppc64_caches.l1i.size; - vdso_data->icache_line_size = ppc64_caches.l1i.line_size; - vdso_data->dcache_block_size = ppc64_caches.l1d.block_size; - vdso_data->icache_block_size = ppc64_caches.l1i.block_size; - vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; - vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; + vdso_k_arch_data->dcache_block_size = ppc64_caches.l1d.block_size; + vdso_k_arch_data->icache_block_size = ppc64_caches.l1i.block_size; + vdso_k_arch_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size; + vdso_k_arch_data->icache_log_block_size = ppc64_caches.l1i.log_block_size; #endif /* CONFIG_PPC64 */ vdso_setup_syscall_map(); diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 1b93655c2857..e8824f933326 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -3,35 +3,26 @@ # List of files in the vdso, has to be asm only for now # Include the generic Makefile to check the built vdso. -include $(srctree)/lib/vdso/Makefile +include $(srctree)/lib/vdso/Makefile.include obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o +obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o +obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o + ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) - CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc - # This flag is supported by clang for 64-bit but not 32-bit so it will cause - # an unused command line flag warning for this file. - ifdef CONFIG_CC_IS_CLANG - CFLAGS_REMOVE_vgettimeofday-32.o += -fno-stack-clash-protection - endif - CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) # Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true # by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is # compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code # generation is minimal, it will just use r29 instead. - CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) $(call cc-option, -ffixed-r30) +endif + +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y) + CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) endif # Build rules @@ -42,26 +33,36 @@ else VDSOCC := $(CC) endif -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o +targets += crtsavres-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) -targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -UBSAN_SANITIZE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n - -ccflags-y := -fno-common -fno-builtin +ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO +ccflags-y += $(DISABLE_LATENT_ENTROPY_PLUGIN) +ccflags-y += $(call cc-option, -fno-stack-protector) +ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -ffreestanding -fasynchronous-unwind-tables +ccflags-remove-y := $(CC_FLAGS_FTRACE) ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS) ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) # Filter flags that clang will warn are unused for linking -ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS)) +ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) CC32FLAGS := -m32 +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc +ifdef CONFIG_CC_IS_CLANG +# This flag is supported by clang for 64-bit but not 32-bit so it will cause +# an unused command line flag warning for this file. +CC32FLAGSREMOVE += -fno-stack-clash-protection +# -mstack-protector-guard values from the 64-bit build are not valid for the +# 32-bit one. clang validates the values passed to these arguments during +# parsing, even when -fno-stack-protector is passed afterwards. +CC32FLAGSREMOVE += -mstack-protector-guard% +endif LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 AS32FLAGS := -D__VDSO32__ @@ -74,26 +75,32 @@ targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(src)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE +$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE $(call if_changed,vdso32ld_and_check) -$(obj)/vdso64.so.dbg: $(src)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files $(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/crtsavres-32.o: %-32.o: $(srctree)/arch/powerpc/lib/crtsavres.S FORCE + $(call if_changed_dep,vdso32as) $(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) +$(obj)/vgetrandom-32.o: %-32.o: %.c FORCE + $(call if_changed_dep,vdso32cc) $(obj-vdso64): %-64.o: %.S FORCE $(call if_changed_dep,vdso64as) $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE $(call if_changed_dep,cc_o_c) +$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) # Generate VDSO offsets using helper script -gen-vdso32sym := $(srctree)/$(src)/gen_vdso32_offsets.sh +gen-vdso32sym := $(src)/gen_vdso32_offsets.sh quiet_cmd_vdso32sym = VDSO32SYM $@ cmd_vdso32sym = $(NM) $< | $(gen-vdso32sym) | LC_ALL=C sort > $@ -gen-vdso64sym := $(srctree)/$(src)/gen_vdso64_offsets.sh +gen-vdso64sym := $(src)/gen_vdso64_offsets.sh quiet_cmd_vdso64sym = VDSO64SYM $@ cmd_vdso64sym = $(NM) $< | $(gen-vdso64sym) | LC_ALL=C sort > $@ @@ -108,11 +115,9 @@ quiet_cmd_vdso32ld_and_check = VDSO32L $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + cmd_vdso32cc = $(VDSOCC) $(filter-out $(CC32FLAGSREMOVE), $(c_flags)) $(CC32FLAGS) -c -o $@ $< quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) quiet_cmd_vdso64as = VDSO64A $@ cmd_vdso64as = $(VDSOCC) $(a_flags) $(AS64FLAGS) -c -o $@ $< - -OBJECT_FILES_NON_STANDARD := y diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index 0085ae464dac..488d3ade11e6 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -30,7 +30,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10 + get_datapage r10 vdso_u_arch_data mtlr r12 .cfi_restore lr #endif diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index db8e167f0166..d23b2e8e2a34 100644 --- a/arch/powerpc/kernel/vdso/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -28,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3 + get_datapage r3 vdso_u_arch_data mtlr r12 #ifdef __powerpc64__ addi r3,r3,CFG_SYSCALL_MAP64 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3 + get_datapage r3 vdso_u_arch_data #ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) #endif diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S new file mode 100644 index 000000000000..a80d9fb436f7 --- /dev/null +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Userland implementation of getrandom() for processes + * for use in the vDSO + * + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/vdso.h> +#include <asm/vdso_datapage.h> +#include <asm/asm-offsets.h> +#include <asm/unistd.h> + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + mflr r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT +#endif + bl CFUNC(DOTSYM(\funct)) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_restore r2 +#endif + cmpwi r3, 0 + mtlr r0 + addi r1, r1, 2 * PPC_MIN_STKFRM + .cfi_restore lr + .cfi_def_cfa_offset 0 + crclr so + bgelr+ + crset so + neg r3, r3 + blr + .cfi_endproc +.endm + + .text +V_FUNCTION_BEGIN(__kernel_getrandom) + cvdso_call __c_kernel_getrandom +V_FUNCTION_END(__kernel_getrandom) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 48fc6658053a..79c967212444 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -32,17 +32,12 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_datapage r5 .ifeq \call_time - addi r5, r5, VDSO_DATA_OFFSET + get_datapage r5 vdso_u_time_data .else - addi r4, r5, VDSO_DATA_OFFSET + get_datapage r4 vdso_u_time_data .endif -#ifdef __powerpc64__ bl CFUNC(DOTSYM(\funct)) -#else - bl \funct -#endif PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) @@ -118,16 +113,3 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) - -/* Routines for restoring integer registers, called by the compiler. */ -/* Called with r11 pointing to the stack header word of the caller of the */ -/* function, just beyond the end of the integer restore area. */ -#ifndef __powerpc64__ -_GLOBAL(_restgpr_31_x) -_GLOBAL(_rest32gpr_31_x) - lwz r0,4(r11) - lwz r31,-4(r11) - mtlr r0 - mr r1,r11 - blr -#endif diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index 426e1ccc6971..72a1012b8a20 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -6,6 +6,7 @@ #include <asm/vdso.h> #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf32-powerpcle", "elf32-powerpcle", "elf32-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -74,6 +76,8 @@ SECTIONS .got : { *(.got) } :text .plt : { *(.plt) } + .rela.dyn : { *(.rela .rela*) } + _end = .; __end = .; PROVIDE(end = .); @@ -87,7 +91,7 @@ SECTIONS *(.branch_lt) *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) - *(.got1 .glink .iplt .rela*) + *(.got1 .glink .iplt) } } @@ -128,6 +132,7 @@ VERSION #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) __kernel_getcpu; #endif + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index bda6c8cdd459..32102a05eaa7 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -6,6 +6,7 @@ #include <asm/vdso.h> #include <asm/page.h> #include <asm-generic/vmlinux.lds.h> +#include <vdso/datapage.h> #ifdef __LITTLE_ENDIAN__ OUTPUT_FORMAT("elf64-powerpcle", "elf64-powerpcle", "elf64-powerpcle") @@ -16,7 +17,8 @@ OUTPUT_ARCH(powerpc:common64) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + VDSO_VVAR_SYMS + . = SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -69,7 +71,7 @@ SECTIONS .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr .eh_frame : { KEEP (*(.eh_frame)) } :text .gcc_except_table : { *(.gcc_except_table) } - .rela.dyn ALIGN(8) : { *(.rela.dyn) } + .rela.dyn ALIGN(8) : { *(.rela .rela*) } .got ALIGN(8) : { *(.got .toc) } @@ -86,7 +88,7 @@ SECTIONS *(.data .data.* .gnu.linkonce.d.* .sdata*) *(.bss .sbss .dynbss .dynsbss) *(.opd) - *(.glink .iplt .plt .rela*) + *(.glink .iplt .plt) } } @@ -123,6 +125,7 @@ VERSION __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..7f9061a9e8b4 --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ + +#include <linux/linkage.h> + +#include <asm/ppc_asm.h> + +#define dst_bytes r3 +#define key r4 +#define counter r5 +#define nblocks r6 + +#define idx_r0 r0 +#define val4 r4 + +#define const0 0x61707865 +#define const1 0x3320646e +#define const2 0x79622d32 +#define const3 0x6b206574 + +#define key0 r5 +#define key1 r6 +#define key2 r7 +#define key3 r8 +#define key4 r9 +#define key5 r10 +#define key6 r11 +#define key7 r12 + +#define counter0 r14 +#define counter1 r15 + +#define state0 r16 +#define state1 r17 +#define state2 r18 +#define state3 r19 +#define state4 r20 +#define state5 r21 +#define state6 r22 +#define state7 r23 +#define state8 r24 +#define state9 r25 +#define state10 r26 +#define state11 r27 +#define state12 r28 +#define state13 r29 +#define state14 r30 +#define state15 r31 + +.macro quarterround4 a1 b1 c1 d1 a2 b2 c2 d2 a3 b3 c3 d3 a4 b4 c4 d4 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 16 + rotlwi \d2, \d2, 16 + rotlwi \d3, \d3, 16 + rotlwi \d4, \d4, 16 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 12 + rotlwi \b2, \b2, 12 + rotlwi \b3, \b3, 12 + rotlwi \b4, \b4, 12 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 8 + rotlwi \d2, \d2, 8 + rotlwi \d3, \d3, 8 + rotlwi \d4, \d4, 8 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 7 + rotlwi \b2, \b2, 7 + rotlwi \b3, \b3, 7 + rotlwi \b4, \b4, 7 +.endm + +#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4) \ + quarterround4 state##a1 state##b1 state##c1 state##d1 \ + state##a2 state##b2 state##c2 state##d2 \ + state##a3 state##b3 state##c3 state##d3 \ + state##a4 state##b4 state##c4 state##d4 + +/* + * Very basic 32 bits implementation of ChaCha20. Produces a given positive number + * of blocks of output with a nonce of 0, taking an input key and 8-byte + * counter. Importantly does not spill to the stack. Its arguments are: + * + * r3: output bytes + * r4: 32-byte key input + * r5: 8-byte counter input/output (saved on stack) + * r6: number of 64-byte blocks to write to output + * + * r0: counter of blocks (initialised with r6) + * r4: Value '4' after key has been read. + * r5-r12: key + * r14-r15: counter + * r16-r31: state + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) +#ifdef __powerpc64__ + std counter, -216(r1) + + std r14, -144(r1) + std r15, -136(r1) + std r16, -128(r1) + std r17, -120(r1) + std r18, -112(r1) + std r19, -104(r1) + std r20, -96(r1) + std r21, -88(r1) + std r22, -80(r1) + std r23, -72(r1) + std r24, -64(r1) + std r25, -56(r1) + std r26, -48(r1) + std r27, -40(r1) + std r28, -32(r1) + std r29, -24(r1) + std r30, -16(r1) + std r31, -8(r1) +#else + stwu r1, -96(r1) + stw counter, 20(r1) +#ifdef __BIG_ENDIAN__ + stmw r14, 24(r1) +#else + stw r14, 24(r1) + stw r15, 28(r1) + stw r16, 32(r1) + stw r17, 36(r1) + stw r18, 40(r1) + stw r19, 44(r1) + stw r20, 48(r1) + stw r21, 52(r1) + stw r22, 56(r1) + stw r23, 60(r1) + stw r24, 64(r1) + stw r25, 68(r1) + stw r26, 72(r1) + stw r27, 76(r1) + stw r28, 80(r1) + stw r29, 84(r1) + stw r30, 88(r1) + stw r31, 92(r1) +#endif +#endif /* __powerpc64__ */ + + lwz counter0, 0(counter) + lwz counter1, 4(counter) +#ifdef __powerpc64__ + rldimi counter0, counter1, 32, 0 +#endif + mr idx_r0, nblocks + subi dst_bytes, dst_bytes, 4 + + lwz key0, 0(key) + lwz key1, 4(key) + lwz key2, 8(key) + lwz key3, 12(key) + lwz key4, 16(key) + lwz key5, 20(key) + lwz key6, 24(key) + lwz key7, 28(key) + + li val4, 4 +.Lblock: + li r31, 10 + + lis state0, const0@ha + lis state1, const1@ha + lis state2, const2@ha + lis state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + mtctr r31 + + mr state4, key0 + mr state5, key1 + mr state6, key2 + mr state7, key3 + mr state8, key4 + mr state9, key5 + mr state10, key6 + mr state11, key7 + + mr state12, counter0 + mr state13, counter1 + + li state14, 0 + li state15, 0 + +.Lpermute: + QUARTERROUND4( 0, 4, 8,12, 1, 5, 9,13, 2, 6,10,14, 3, 7,11,15) + QUARTERROUND4( 0, 5,10,15, 1, 6,11,12, 2, 7, 8,13, 3, 4, 9,14) + + bdnz .Lpermute + + addis state0, state0, const0@ha + addis state1, state1, const1@ha + addis state2, state2, const2@ha + addis state3, state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + add state4, state4, key0 + add state5, state5, key1 + add state6, state6, key2 + add state7, state7, key3 + add state8, state8, key4 + add state9, state9, key5 + add state10, state10, key6 + add state11, state11, key7 + + add state12, state12, counter0 + add state13, state13, counter1 + +#ifdef __BIG_ENDIAN__ + stwbrx state0, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state1, 0, dst_bytes + stwbrx state2, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state3, 0, dst_bytes + stwbrx state4, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state5, 0, dst_bytes + stwbrx state6, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state7, 0, dst_bytes + stwbrx state8, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state9, 0, dst_bytes + stwbrx state10, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state11, 0, dst_bytes + stwbrx state12, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state13, 0, dst_bytes + stwbrx state14, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state15, 0, dst_bytes +#else + stw state0, 4(dst_bytes) + stw state1, 8(dst_bytes) + stw state2, 12(dst_bytes) + stw state3, 16(dst_bytes) + stw state4, 20(dst_bytes) + stw state5, 24(dst_bytes) + stw state6, 28(dst_bytes) + stw state7, 32(dst_bytes) + stw state8, 36(dst_bytes) + stw state9, 40(dst_bytes) + stw state10, 44(dst_bytes) + stw state11, 48(dst_bytes) + stw state12, 52(dst_bytes) + stw state13, 56(dst_bytes) + stw state14, 60(dst_bytes) + stwu state15, 64(dst_bytes) +#endif + + subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */ + +#ifdef __powerpc64__ + addi counter0, counter0, 1 + srdi counter1, counter0, 32 +#else + addic counter0, counter0, 1 + addze counter1, counter1 +#endif + + bne .Lblock + +#ifdef __powerpc64__ + ld counter, -216(r1) +#else + lwz counter, 20(r1) +#endif + stw counter0, 0(counter) + stw counter1, 4(counter) + + li r6, 0 + li r7, 0 + li r8, 0 + li r9, 0 + li r10, 0 + li r11, 0 + li r12, 0 + +#ifdef __powerpc64__ + ld r14, -144(r1) + ld r15, -136(r1) + ld r16, -128(r1) + ld r17, -120(r1) + ld r18, -112(r1) + ld r19, -104(r1) + ld r20, -96(r1) + ld r21, -88(r1) + ld r22, -80(r1) + ld r23, -72(r1) + ld r24, -64(r1) + ld r25, -56(r1) + ld r26, -48(r1) + ld r27, -40(r1) + ld r28, -32(r1) + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) +#else +#ifdef __BIG_ENDIAN__ + lmw r14, 24(r1) +#else + lwz r14, 24(r1) + lwz r15, 28(r1) + lwz r16, 32(r1) + lwz r17, 36(r1) + lwz r18, 40(r1) + lwz r19, 44(r1) + lwz r20, 48(r1) + lwz r21, 52(r1) + lwz r22, 56(r1) + lwz r23, 60(r1) + lwz r24, 64(r1) + lwz r25, 68(r1) + lwz r26, 72(r1) + lwz r27, 76(r1) + lwz r28, 80(r1) + lwz r29, 84(r1) + lwz r30, 88(r1) + lwz r31, 92(r1) +#endif + addi r1, r1, 96 +#endif /* __powerpc64__ */ + blr +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..cc79b960a541 --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementation of getrandom() + * + * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France + */ +#include <linux/time.h> +#include <linux/types.h> + +ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, + size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/powerpc/kernel/vdso/vgettimeofday.c b/arch/powerpc/kernel/vdso/vgettimeofday.c index 55a287c9a736..6f5167d81af5 100644 --- a/arch/powerpc/kernel/vdso/vgettimeofday.c +++ b/arch/powerpc/kernel/vdso/vgettimeofday.c @@ -7,43 +7,43 @@ #ifdef __powerpc64__ int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_data(vd, clock_id, res); } #else int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime32_data(vd, clock, ts); } int __c_kernel_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_gettime_data(vd, clock, ts); } int __c_kernel_clock_getres(clockid_t clock_id, struct old_timespec32 *res, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_clock_getres_time32_data(vd, clock_id, res); } #endif int __c_kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz, - const struct vdso_data *vd) + const struct vdso_time_data *vd) { return __cvdso_gettimeofday_data(vd, tv, tz); } -__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_data *vd) +__kernel_old_time_t __c_kernel_time(__kernel_old_time_t *time, const struct vdso_time_data *vd) { return __cvdso_time_data(vd, time); } diff --git a/arch/powerpc/kernel/vdso32_wrapper.S b/arch/powerpc/kernel/vdso32_wrapper.S index 10f92f265d51..20bca3548b44 100644 --- a/arch/powerpc/kernel/vdso32_wrapper.S +++ b/arch/powerpc/kernel/vdso32_wrapper.S @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso32_start, vdso32_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vdso64_wrapper.S b/arch/powerpc/kernel/vdso64_wrapper.S index 839d1a61411d..1912936fa227 100644 --- a/arch/powerpc/kernel/vdso64_wrapper.S +++ b/arch/powerpc/kernel/vdso64_wrapper.S @@ -2,7 +2,7 @@ #include <linux/linkage.h> #include <asm/page.h> - __PAGE_ALIGNED_DATA + .section ".data..ro_after_init", "aw" .globl vdso64_start, vdso64_end .balign PAGE_SIZE diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index f420df7888a7..de6ee7d35cff 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -1,10 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef CONFIG_PPC64 -#define PROVIDE32(x) PROVIDE(__unused__##x) -#else -#define PROVIDE32(x) PROVIDE(x) -#endif - #define BSS_FIRST_SECTIONS *(.bss.prominit) #define EMITS_PT_NOTE #define RO_EXCEPTION_TABLE_ALIGN 0 @@ -123,13 +117,10 @@ SECTIONS */ *(.sfpr); *(.text.asan.* .text.tsan.*) - MEM_KEEP(init.text) - MEM_KEEP(exit.text) } :text . = ALIGN(PAGE_SIZE); _etext = .; - PROVIDE32 (etext = .); /* Read-only data */ RO_DATA(PAGE_SIZE) @@ -267,14 +258,13 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT - + *(.tramp.ftrace.init); /* *.init.text might be RO so we must ensure this section ends on * a page boundary. */ . = ALIGN(PAGE_SIZE); _einittext = .; - *(.tramp.ftrace.init); } :text /* .exit.text is discarded at runtime, not link time, @@ -397,7 +387,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); _edata = .; - PROVIDE32 (edata = .); /* * And finally the bss @@ -407,7 +396,6 @@ SECTIONS . = ALIGN(PAGE_SIZE); _end = . ; - PROVIDE32 (end = .); DWARF_DEBUG ELF_DETAILS diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 8c464a5d8246..2429cb1c7baa 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -495,8 +495,7 @@ static void start_watchdog(void *arg) *this_cpu_ptr(&wd_timer_tb) = get_tb(); - hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hrtimer->function = watchdog_timer_fn; + hrtimer_setup(hrtimer, watchdog_timer_fn, CLOCK_MONOTONIC, HRTIMER_MODE_REL); hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms), HRTIMER_MODE_REL_PINNED); } |