diff options
author | Paul Mackerras <paulus@ozlabs.org> | 2018-01-19 12:09:57 +1100 |
---|---|---|
committer | Paul Mackerras <paulus@ozlabs.org> | 2018-01-19 12:09:57 +1100 |
commit | d27998185da8fbdc35911307ae13518d168778d7 (patch) | |
tree | 4c5a99fd7cff7c9da1f858fdfccf8dc1cc6c597b /arch | |
parent | 00608e1f007e4cf6031485c5630e0e504bceef9b (diff) | |
parent | d075745d893c78730e4a3b7a60fca23c2f764081 (diff) | |
download | linux-d27998185da8fbdc35911307ae13518d168778d7.tar.gz linux-d27998185da8fbdc35911307ae13518d168778d7.tar.bz2 linux-d27998185da8fbdc35911307ae13518d168778d7.zip |
Merge remote-tracking branch 'remotes/powerpc/topic/ppc-kvm' into kvm-ppc-next
This merges in the ppc-kvm topic branch of the powerpc tree to get
two patches which are prerequisites for the following patch series,
plus another patch which touches both powerpc and KVM code.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Diffstat (limited to 'arch')
348 files changed, 5090 insertions, 2289 deletions
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h index 6bf730063e3f..2dbdf59258d9 100644 --- a/arch/alpha/include/uapi/asm/mman.h +++ b/arch/alpha/include/uapi/asm/mman.h @@ -12,6 +12,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ #define MAP_TYPE 0x0f /* Mask for type of mapping (OSF/1 is _wrong_) */ #define MAP_FIXED 0x100 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x10 /* don't use a file */ diff --git a/arch/alpha/kernel/srmcons.c b/arch/alpha/kernel/srmcons.c index 5da0aec8ce90..438b10c44d73 100644 --- a/arch/alpha/kernel/srmcons.c +++ b/arch/alpha/kernel/srmcons.c @@ -65,9 +65,9 @@ srmcons_do_receive_chars(struct tty_port *port) } static void -srmcons_receive_chars(unsigned long data) +srmcons_receive_chars(struct timer_list *t) { - struct srmcons_private *srmconsp = (struct srmcons_private *)data; + struct srmcons_private *srmconsp = from_timer(srmconsp, t, timer); struct tty_port *port = &srmconsp->port; unsigned long flags; int incr = 10; @@ -206,8 +206,7 @@ static const struct tty_operations srmcons_ops = { static int __init srmcons_init(void) { - setup_timer(&srmcons_singleton.timer, srmcons_receive_chars, - (unsigned long)&srmcons_singleton); + timer_setup(&srmcons_singleton.timer, srmcons_receive_chars, 0); if (srm_is_registered_console) { struct tty_driver *driver; int err; diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5c7adf100a58..9d5fd00d9e91 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -39,7 +39,7 @@ config ARC select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM - select PERF_USE_VMALLOC + select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING select HAVE_DEBUG_STACKOVERFLOW select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index e114000a84f5..74d070cd3c13 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -16,6 +16,12 @@ ranges = <0x00000000 0x0 0xe0000000 0x10000000>; interrupt-parent = <&mb_intc>; + creg_rst: reset-controller@11220 { + compatible = "snps,axs10x-reset"; + #reset-cells = <1>; + reg = <0x11220 0x4>; + }; + i2sclk: i2sclk@100a0 { compatible = "snps,axs10x-i2s-pll-clock"; reg = <0x100a0 0x10>; @@ -73,6 +79,8 @@ clocks = <&apbclk>; clock-names = "stmmaceth"; max-speed = <100>; + resets = <&creg_rst 5>; + reset-names = "stmmaceth"; }; ehci@0x40000 { diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index b1c56d35f2a9..49bfbd879caa 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -11,12 +11,14 @@ /* Build Configuration Registers */ #define ARC_REG_AUX_DCCM 0x18 /* DCCM Base Addr ARCv2 */ +#define ARC_REG_ERP_CTRL 0x3F /* ARCv2 Error protection control */ #define ARC_REG_DCCM_BASE_BUILD 0x61 /* DCCM Base Addr ARCompact */ #define ARC_REG_CRC_BCR 0x62 #define ARC_REG_VECBASE_BCR 0x68 #define ARC_REG_PERIBASE_BCR 0x69 #define ARC_REG_FP_BCR 0x6B /* ARCompact: Single-Precision FPU */ #define ARC_REG_DPFP_BCR 0x6C /* ARCompact: Dbl Precision FPU */ +#define ARC_REG_ERP_BUILD 0xc7 /* ARCv2 Error protection Build: ECC/Parity */ #define ARC_REG_FP_V2_BCR 0xc8 /* ARCv2 FPU */ #define ARC_REG_SLC_BCR 0xce #define ARC_REG_DCCM_BUILD 0x74 /* DCCM size (common) */ @@ -32,11 +34,14 @@ #define ARC_REG_D_UNCACH_BCR 0x6A #define ARC_REG_BPU_BCR 0xc0 #define ARC_REG_ISA_CFG_BCR 0xc1 +#define ARC_REG_LPB_BUILD 0xE9 /* ARCv2 Loop Buffer Build */ #define ARC_REG_RTT_BCR 0xF2 #define ARC_REG_IRQ_BCR 0xF3 +#define ARC_REG_MICRO_ARCH_BCR 0xF9 /* ARCv2 Product revision */ #define ARC_REG_SMART_BCR 0xFF #define ARC_REG_CLUSTER_BCR 0xcf #define ARC_REG_AUX_ICCM 0x208 /* ICCM Base Addr (ARCv2) */ +#define ARC_REG_LPB_CTRL 0x488 /* ARCv2 Loop Buffer control */ /* Common for ARCompact and ARCv2 status register */ #define ARC_REG_STATUS32 0x0A @@ -229,6 +234,32 @@ struct bcr_bpu_arcv2 { #endif }; +/* Error Protection Build: ECC/Parity */ +struct bcr_erp { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad3:5, mmu:3, pad2:4, ic:3, dc:3, pad1:6, ver:8; +#else + unsigned int ver:8, pad1:6, dc:3, ic:3, pad2:4, mmu:3, pad3:5; +#endif +}; + +/* Error Protection Control */ +struct ctl_erp { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad2:27, mpd:1, pad1:2, dpd:1, dpi:1; +#else + unsigned int dpi:1, dpd:1, pad1:2, mpd:1, pad2:27; +#endif +}; + +struct bcr_lpb { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad:16, entries:8, ver:8; +#else + unsigned int ver:8, entries:8, pad:16; +#endif +}; + struct bcr_generic { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int info:24, ver:8; @@ -270,7 +301,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_ccm iccm, dccm; struct { unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, - fpu_sp:1, fpu_dp:1, dual_iss_enb:1, dual_iss_exist:1, pad2:4, + fpu_sp:1, fpu_dp:1, dual:1, dual_enb:1, pad2:4, debug:1, ap:1, smart:1, rtt:1, pad3:4, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; } extn; diff --git a/arch/arc/kernel/perf_event.c b/arch/arc/kernel/perf_event.c index 2ce24e74f879..8aec462d90fb 100644 --- a/arch/arc/kernel/perf_event.c +++ b/arch/arc/kernel/perf_event.c @@ -336,15 +336,12 @@ static int arc_pmu_add(struct perf_event *event, int flags) struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - if (__test_and_set_bit(idx, pmu_cpu->used_mask)) { - idx = find_first_zero_bit(pmu_cpu->used_mask, - arc_pmu->n_counters); - if (idx == arc_pmu->n_counters) - return -EAGAIN; - - __set_bit(idx, pmu_cpu->used_mask); - hwc->idx = idx; - } + idx = ffz(pmu_cpu->used_mask[0]); + if (idx == arc_pmu->n_counters) + return -EAGAIN; + + __set_bit(idx, pmu_cpu->used_mask); + hwc->idx = idx; write_aux_reg(ARC_REG_PCT_INDEX, idx); @@ -377,21 +374,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) struct perf_sample_data data; struct arc_pmu_cpu *pmu_cpu = this_cpu_ptr(&arc_pmu_cpu); struct pt_regs *regs; - int active_ints; + unsigned int active_ints; int idx; arc_pmu_disable(&arc_pmu->pmu); active_ints = read_aux_reg(ARC_REG_PCT_INT_ACT); + if (!active_ints) + goto done; regs = get_irq_regs(); - for (idx = 0; idx < arc_pmu->n_counters; idx++) { - struct perf_event *event = pmu_cpu->act_counter[idx]; + do { + struct perf_event *event; struct hw_perf_event *hwc; - if (!(active_ints & (1 << idx))) - continue; + idx = __ffs(active_ints); /* Reset interrupt flag by writing of 1 */ write_aux_reg(ARC_REG_PCT_INT_ACT, 1 << idx); @@ -404,19 +402,22 @@ static irqreturn_t arc_pmu_intr(int irq, void *dev) write_aux_reg(ARC_REG_PCT_INT_CTRL, read_aux_reg(ARC_REG_PCT_INT_CTRL) | (1 << idx)); + event = pmu_cpu->act_counter[idx]; hwc = &event->hw; WARN_ON_ONCE(hwc->idx != idx); arc_perf_event_update(event, &event->hw, event->hw.idx); perf_sample_data_init(&data, 0, hwc->last_period); - if (!arc_pmu_event_set_period(event)) - continue; + if (arc_pmu_event_set_period(event)) { + if (perf_event_overflow(event, &data, regs)) + arc_pmu_stop(event, 0); + } - if (perf_event_overflow(event, &data, regs)) - arc_pmu_stop(event, 0); - } + active_ints &= ~(1U << idx); + } while (active_ints); +done: arc_pmu_enable(&arc_pmu->pmu); return IRQ_HANDLED; @@ -461,6 +462,7 @@ static int arc_pmu_device_probe(struct platform_device *pdev) pr_err("This core does not have performance counters!\n"); return -ENODEV; } + BUILD_BUG_ON(ARC_PERF_MAX_COUNTERS > 32); BUG_ON(pct_bcr.c > ARC_PERF_MAX_COUNTERS); READ_BCR(ARC_REG_CC_BUILD, cc_bcr); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index fb83844daeea..7ef7d9a8ff89 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -199,8 +199,10 @@ static void read_arc_build_cfg_regs(void) unsigned int exec_ctrl; READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - cpu->extn.dual_iss_exist = 1; - cpu->extn.dual_iss_enb = exec_ctrl & 1; + cpu->extn.dual_enb = exec_ctrl & 1; + + /* dual issue always present for this core */ + cpu->extn.dual = 1; } } @@ -253,7 +255,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) cpu_id, cpu->name, cpu->details, is_isa_arcompact() ? "ARCompact" : "ARCv2", IS_AVAIL1(cpu->isa.be, "[Big-Endian]"), - IS_AVAIL3(cpu->extn.dual_iss_exist, cpu->extn.dual_iss_enb, " Dual-Issue")); + IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue ")); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->extn.timer0, "Timer0 "), @@ -293,11 +295,26 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) if (cpu->bpu.ver) n += scnprintf(buf + n, len - n, - "BPU\t\t: %s%s match, cache:%d, Predict Table:%d\n", + "BPU\t\t: %s%s match, cache:%d, Predict Table:%d", IS_AVAIL1(cpu->bpu.full, "full"), IS_AVAIL1(!cpu->bpu.full, "partial"), cpu->bpu.num_cache, cpu->bpu.num_pred); + if (is_isa_arcv2()) { + struct bcr_lpb lpb; + + READ_BCR(ARC_REG_LPB_BUILD, lpb); + if (lpb.ver) { + unsigned int ctl; + ctl = read_aux_reg(ARC_REG_LPB_CTRL); + + n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s", + lpb.entries, + IS_DISABLED_RUN(!ctl)); + } + } + + n += scnprintf(buf + n, len - n, "\n"); return buf; } @@ -326,6 +343,24 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) cpu->dccm.base_addr, TO_KB(cpu->dccm.sz), cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); + if (is_isa_arcv2()) { + + /* Error Protection: ECC/Parity */ + struct bcr_erp erp; + READ_BCR(ARC_REG_ERP_BUILD, erp); + + if (erp.ver) { + struct ctl_erp ctl; + READ_BCR(ARC_REG_ERP_CTRL, ctl); + + /* inverted bits: 0 means enabled */ + n += scnprintf(buf + n, len - n, "Extn [ECC]\t: %s%s%s%s%s%s\n", + IS_AVAIL3(erp.ic, !ctl.dpi, "IC "), + IS_AVAIL3(erp.dc, !ctl.dpd, "DC "), + IS_AVAIL3(erp.mmu, !ctl.mpd, "MMU ")); + } + } + n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n", EF_ARC_OSABI_CURRENT >> 8, EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ? diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index 8ceefbf72fb0..4097764fea23 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -762,21 +762,23 @@ void read_decode_mmu_bcr(void) tmp = read_aux_reg(ARC_REG_MMU_BCR); mmu->ver = (tmp >> 24); - if (mmu->ver <= 2) { - mmu2 = (struct bcr_mmu_1_2 *)&tmp; - mmu->pg_sz_k = TO_KB(0x2000); - mmu->sets = 1 << mmu2->sets; - mmu->ways = 1 << mmu2->ways; - mmu->u_dtlb = mmu2->u_dtlb; - mmu->u_itlb = mmu2->u_itlb; - } else if (mmu->ver == 3) { - mmu3 = (struct bcr_mmu_3 *)&tmp; - mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); - mmu->sets = 1 << mmu3->sets; - mmu->ways = 1 << mmu3->ways; - mmu->u_dtlb = mmu3->u_dtlb; - mmu->u_itlb = mmu3->u_itlb; - mmu->sasid = mmu3->sasid; + if (is_isa_arcompact()) { + if (mmu->ver <= 2) { + mmu2 = (struct bcr_mmu_1_2 *)&tmp; + mmu->pg_sz_k = TO_KB(0x2000); + mmu->sets = 1 << mmu2->sets; + mmu->ways = 1 << mmu2->ways; + mmu->u_dtlb = mmu2->u_dtlb; + mmu->u_itlb = mmu2->u_itlb; + } else { + mmu3 = (struct bcr_mmu_3 *)&tmp; + mmu->pg_sz_k = 1 << (mmu3->pg_sz - 1); + mmu->sets = 1 << mmu3->sets; + mmu->ways = 1 << mmu3->ways; + mmu->u_dtlb = mmu3->u_dtlb; + mmu->u_itlb = mmu3->u_itlb; + mmu->sasid = mmu3->sasid; + } } else { mmu4 = (struct bcr_mmu_4 *)&tmp; mmu->pg_sz_k = 1 << (mmu4->sz0 - 1); @@ -818,8 +820,9 @@ int pae40_exist_but_not_enab(void) void arc_mmu_init(void) { - char str[256]; struct cpuinfo_arc_mmu *mmu = &cpuinfo_arc700[smp_processor_id()].mmu; + char str[256]; + int compat = 0; pr_info("%s", arc_mmu_mumbojumbo(0, str, sizeof(str))); @@ -834,15 +837,21 @@ void arc_mmu_init(void) */ BUILD_BUG_ON(!IS_ALIGNED(STACK_TOP, PMD_SIZE)); - /* For efficiency sake, kernel is compile time built for a MMU ver - * This must match the hardware it is running on. - * Linux built for MMU V2, if run on MMU V1 will break down because V1 - * hardware doesn't understand cmds such as WriteNI, or IVUTLB - * On the other hand, Linux built for V1 if run on MMU V2 will do - * un-needed workarounds to prevent memcpy thrashing. - * Similarly MMU V3 has new features which won't work on older MMU + /* + * Ensure that MMU features assumed by kernel exist in hardware. + * For older ARC700 cpus, it has to be exact match, since the MMU + * revisions were not backwards compatible (MMUv3 TLB layout changed + * so even if kernel for v2 didn't use any new cmds of v3, it would + * still not work. + * For HS cpus, MMUv4 was baseline and v5 is backwards compatible + * (will run older software). */ - if (mmu->ver != CONFIG_ARC_MMU_VER) { + if (is_isa_arcompact() && mmu->ver == CONFIG_ARC_MMU_VER) + compat = 1; + else if (is_isa_arcv2() && mmu->ver >= CONFIG_ARC_MMU_VER) + compat = 1; + + if (!compat) { panic("MMU ver %d doesn't match kernel built for %d...\n", mmu->ver, CONFIG_ARC_MMU_VER); } diff --git a/arch/arc/plat-axs10x/Kconfig b/arch/arc/plat-axs10x/Kconfig index c54d1ae57fe0..4e0df7b7a248 100644 --- a/arch/arc/plat-axs10x/Kconfig +++ b/arch/arc/plat-axs10x/Kconfig @@ -14,6 +14,8 @@ menuconfig ARC_PLAT_AXS10X select MIGHT_HAVE_PCI select GENERIC_IRQ_CHIP select GPIOLIB + select AXS101 if ISA_ARCOMPACT + select AXS103 if ISA_ARCV2 help Support for the ARC AXS10x Software Development Platforms. diff --git a/arch/arc/plat-axs10x/axs10x.c b/arch/arc/plat-axs10x/axs10x.c index cf14ebc36916..f1ac6790da5f 100644 --- a/arch/arc/plat-axs10x/axs10x.c +++ b/arch/arc/plat-axs10x/axs10x.c @@ -111,13 +111,6 @@ static void __init axs10x_early_init(void) axs10x_enable_gpio_intc_wire(); - /* - * Reset ethernet IP core. - * TODO: get rid of this quirk after axs10x reset driver (or simple - * reset driver) will be available in upstream. - */ - iowrite32((1 << 5), (void __iomem *) CREG_MB_SW_RESET); - scnprintf(mb, 32, "MainBoard v%d", mb_rev); axs10x_print_board_ver(CREG_MB_VER, mb); } diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 12b8c8f8ec07..17685e19aed8 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1776,9 +1776,9 @@ config DEBUG_UART_8250_FLOW_CONTROL default y if ARCH_EBSA110 || DEBUG_FOOTBRIDGE_COM1 || DEBUG_GEMINI || ARCH_RPC config DEBUG_UNCOMPRESS - bool + bool "Enable decompressor debugging via DEBUG_LL output" depends on ARCH_MULTIPLATFORM || PLAT_SAMSUNG || ARM_SINGLE_ARMV7M - default y if DEBUG_LL && !DEBUG_OMAP2PLUS_UART && \ + depends on DEBUG_LL && !DEBUG_OMAP2PLUS_UART && \ (!DEBUG_TEGRA_UART || !ZBOOT_ROM) && \ !DEBUG_BRCMSTB_UART help diff --git a/arch/arm/Makefile b/arch/arm/Makefile index def8824fc71c..80351e505fd5 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -16,11 +16,11 @@ LDFLAGS := LDFLAGS_vmlinux :=-p --no-undefined -X --pic-veneer ifeq ($(CONFIG_CPU_ENDIAN_BE8),y) LDFLAGS_vmlinux += --be8 -LDFLAGS_MODULE += --be8 +KBUILD_LDFLAGS_MODULE += --be8 endif ifeq ($(CONFIG_ARM_MODULE_PLTS),y) -LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds +KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds endif GZFLAGS :=-9 @@ -122,7 +122,7 @@ CFLAGS_ISA :=-mthumb $(AFLAGS_AUTOIT) $(AFLAGS_NOWARN) AFLAGS_ISA :=$(CFLAGS_ISA) -Wa$(comma)-mthumb # Work around buggy relocation from gas if requested: ifeq ($(CONFIG_THUMB2_AVOID_R_ARM_THM_JUMP11),y) -CFLAGS_MODULE +=-fno-optimize-sibling-calls +KBUILD_CFLAGS_MODULE +=-fno-optimize-sibling-calls endif else CFLAGS_ISA :=$(call cc-option,-marm,) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index ad301f107dd2..bc8d4bbd82e2 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -518,4 +518,22 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .macro bug, msg, line +#ifdef CONFIG_THUMB2_KERNEL +1: .inst 0xde02 +#else +1: .inst 0xe7f001f2 +#endif +#ifdef CONFIG_DEBUG_BUGVERBOSE + .pushsection .rodata.str, "aMS", %progbits, 1 +2: .asciz "\msg" + .popsection + .pushsection __bug_table, "aw" + .align 2 + .word 1b, 2b + .hword \line + .popsection +#endif + .endm + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 2a029bceaf2f..1a7a17b2a1ba 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -221,7 +221,6 @@ static inline pte_t pte_mkspecial(pte_t pte) } #define __HAVE_ARCH_PTE_SPECIAL -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY)) #define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY)) #define pud_page(pud) pmd_page(__pmd(pud_val(pud))) diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 1c462381c225..150ece66ddf3 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -232,6 +232,18 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pte_valid_user(pte) \ (pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte)) +static inline bool pte_access_permitted(pte_t pte, bool write) +{ + pteval_t mask = L_PTE_PRESENT | L_PTE_USER; + pteval_t needed = mask; + + if (write) + mask |= L_PTE_RDONLY; + + return (pte_val(pte) & mask) == needed; +} +#define pte_access_permitted pte_access_permitted + #if __LINUX_ARM_ARCH__ < 6 static inline void __sync_icache_dcache(pte_t pteval) { diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index d523cd8439a3..0f07579af472 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -300,6 +300,8 @@ mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc + tst r1, #PSR_I_BIT | 0x0f + bne 1f msr spsr_cxsf, r1 @ save in spsr_svc #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -314,6 +316,7 @@ @ after ldm {}^ add sp, sp, #\offset + PT_REGS_SIZE movs pc, lr @ return & move spsr_svc into cpsr +1: bug "Returning to usermode but unexpected PSR bits set?", \@ #elif defined(CONFIG_CPU_V7M) @ V7M restore. @ Note that we don't need to do clrex here as clearing the local @@ -329,6 +332,8 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC] @ get pc add sp, sp, #\offset + S_SP + tst r1, #PSR_I_BIT | 0x0f + bne 1f msr spsr_cxsf, r1 @ save in spsr_svc @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -341,6 +346,7 @@ .endif add sp, sp, #PT_REGS_SIZE - S_SP movs pc, lr @ return & move spsr_svc into cpsr +1: bug "Returning to usermode but unexpected PSR bits set?", \@ #endif /* !CONFIG_THUMB2_KERNEL */ .endm diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index f24628db5409..e2bd35b6780c 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -4,6 +4,7 @@ # source "virt/kvm/Kconfig" +source "virt/lib/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -23,6 +24,8 @@ config KVM select PREEMPT_NOTIFIERS select ANON_INODES select ARM_GIC + select ARM_GIC_V3 + select ARM_GIC_V3_ITS select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO @@ -36,6 +39,8 @@ config KVM select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING select HAVE_KVM_MSI + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index f550abd64a25..48de846f2246 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o obj-y += $(KVM)/arm/vgic/vgic-irqfd.o obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-v3.o +obj-y += $(KVM)/arm/vgic/vgic-v4.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/arch/arm/mach-iop32x/n2100.c b/arch/arm/mach-iop32x/n2100.c index c1cd80ecc219..3b73813c6b04 100644 --- a/arch/arm/mach-iop32x/n2100.c +++ b/arch/arm/mach-iop32x/n2100.c @@ -305,7 +305,7 @@ static void n2100_restart(enum reboot_mode mode, const char *cmd) static struct timer_list power_button_poll_timer; -static void power_button_poll(unsigned long dummy) +static void power_button_poll(struct timer_list *unused) { if (gpio_get_value(N2100_POWER_BUTTON) == 0) { ctrl_alt_del(); @@ -336,8 +336,7 @@ static int __init n2100_request_gpios(void) pr_err("could not set power GPIO as input\n"); } /* Set up power button poll timer */ - init_timer(&power_button_poll_timer); - power_button_poll_timer.function = power_button_poll; + timer_setup(&power_button_poll_timer, power_button_poll, 0); power_button_poll_timer.expires = jiffies + (HZ / 10); add_timer(&power_button_poll_timer); return 0; diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c index ac97a4599034..0f5c99941a7d 100644 --- a/arch/arm/mach-ixp4xx/dsmg600-setup.c +++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c @@ -179,10 +179,10 @@ static int power_button_countdown; /* Must hold the button down for at least this many counts to be processed */ #define PBUTTON_HOLDDOWN_COUNT 4 /* 2 secs */ -static void dsmg600_power_handler(unsigned long data); +static void dsmg600_power_handler(struct timer_list *unused); static DEFINE_TIMER(dsmg600_power_timer, dsmg600_power_handler); -static void dsmg600_power_handler(unsigned long data) +static void dsmg600_power_handler(struct timer_list *unused) { /* This routine is called twice per second to check the * state of the power button. diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c index 435602085408..76dfff03cb71 100644 --- a/arch/arm/mach-ixp4xx/nas100d-setup.c +++ b/arch/arm/mach-ixp4xx/nas100d-setup.c @@ -202,10 +202,10 @@ static int power_button_countdown; /* Must hold the button down for at least this many counts to be processed */ #define PBUTTON_HOLDDOWN_COUNT 4 /* 2 secs */ -static void nas100d_power_handler(unsigned long data); +static void nas100d_power_handler(struct timer_list *unused); static DEFINE_TIMER(nas100d_power_timer, nas100d_power_handler); -static void nas100d_power_handler(unsigned long data) +static void nas100d_power_handler(struct timer_list *unused) { /* This routine is called twice per second to check the * state of the power button. diff --git a/arch/arm/mach-orion5x/db88f5281-setup.c b/arch/arm/mach-orion5x/db88f5281-setup.c index 3f5863de766a..39eae10ac8de 100644 --- a/arch/arm/mach-orion5x/db88f5281-setup.c +++ b/arch/arm/mach-orion5x/db88f5281-setup.c @@ -172,7 +172,7 @@ static struct platform_device db88f5281_nand_flash = { static void __iomem *db88f5281_7seg; static struct timer_list db88f5281_timer; -static void db88f5281_7seg_event(unsigned long data) +static void db88f5281_7seg_event(struct timer_list *unused) { static int count = 0; writel(0, db88f5281_7seg + (count << 4)); @@ -189,7 +189,7 @@ static int __init db88f5281_7seg_init(void) printk(KERN_ERR "Failed to ioremap db88f5281_7seg\n"); return -EIO; } - setup_timer(&db88f5281_timer, db88f5281_7seg_event, 0); + timer_setup(&db88f5281_timer, db88f5281_7seg_event, 0); mod_timer(&db88f5281_timer, jiffies + 2 * HZ); } diff --git a/arch/arm/mach-pxa/cm-x255.c b/arch/arm/mach-pxa/cm-x255.c index b592f79a1742..fa8e7dd4d898 100644 --- a/arch/arm/mach-pxa/cm-x255.c +++ b/arch/arm/mach-pxa/cm-x255.c @@ -14,7 +14,7 @@ #include <linux/mtd/partitions.h> #include <linux/mtd/physmap.h> #include <linux/mtd/nand-gpio.h> - +#include <linux/gpio/machine.h> #include <linux/spi/spi.h> #include <linux/spi/pxa2xx_spi.h> @@ -176,6 +176,17 @@ static inline void cmx255_init_nor(void) {} #endif #if defined(CONFIG_MTD_NAND_GPIO) || defined(CONFIG_MTD_NAND_GPIO_MODULE) + +static struct gpiod_lookup_table cmx255_nand_gpiod_table = { + .dev_id = "gpio-nand", + .table = { + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_CS, "nce", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_CLE, "cle", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_ALE, "ale", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-pxa", GPIO_NAND_RB, "rdy", GPIO_ACTIVE_HIGH), + }, +}; + static struct resource cmx255_nand_resource[] = { [0] = { .start = PXA_CS1_PHYS, @@ -198,11 +209,6 @@ static struct mtd_partition cmx255_nand_parts[] = { }; static struct gpio_nand_platdata cmx255_nand_platdata = { - .gpio_nce = GPIO_NAND_CS, - .gpio_cle = GPIO_NAND_CLE, - .gpio_ale = GPIO_NAND_ALE, - .gpio_rdy = GPIO_NAND_RB, - .gpio_nwp = -1, .parts = cmx255_nand_parts, .num_parts = ARRAY_SIZE(cmx255_nand_parts), .chip_delay = 25, @@ -220,6 +226,7 @@ static struct platform_device cmx255_nand = { static void __init cmx255_init_nand(void) { + gpiod_add_lookup_table(&cmx255_nand_gpiod_table); platform_device_register(&cmx255_nand); } #else diff --git a/arch/arm/mach-uniphier/Makefile b/arch/arm/mach-uniphier/Makefile index 6bea3d3a2dd7..e69de29bb2d1 100644 --- a/arch/arm/mach-uniphier/Makefile +++ b/arch/arm/mach-uniphier/Makefile @@ -1 +0,0 @@ -obj- += dummy.o diff --git a/arch/arm/mm/dump.c b/arch/arm/mm/dump.c index 35ff45470dbf..fc3b44028cfb 100644 --- a/arch/arm/mm/dump.c +++ b/arch/arm/mm/dump.c @@ -129,8 +129,8 @@ static const struct prot_bits section_bits[] = { .val = PMD_SECT_USER, .set = "USR", }, { - .mask = L_PMD_SECT_RDONLY, - .val = L_PMD_SECT_RDONLY, + .mask = L_PMD_SECT_RDONLY | PMD_SECT_AP2, + .val = L_PMD_SECT_RDONLY | PMD_SECT_AP2, .set = "ro", .clear = "RW", #elif __LINUX_ARM_ARCH__ >= 6 diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 81d4482b6861..a1f11a7ee81b 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -629,8 +629,8 @@ static struct section_perm ro_perms[] = { .start = (unsigned long)_stext, .end = (unsigned long)__init_begin, #ifdef CONFIG_ARM_LPAE - .mask = ~L_PMD_SECT_RDONLY, - .prot = L_PMD_SECT_RDONLY, + .mask = ~(L_PMD_SECT_RDONLY | PMD_SECT_AP2), + .prot = L_PMD_SECT_RDONLY | PMD_SECT_AP2, #else .mask = ~(PMD_SECT_APX | PMD_SECT_AP_WRITE), .prot = PMD_SECT_APX | PMD_SECT_AP_WRITE, diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index b35788c909f1..b481b4a7c011 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -83,9 +83,6 @@ endif ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds -ifeq ($(CONFIG_DYNAMIC_FTRACE),y) -KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o -endif endif # Default value diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index 76d1cc85d5b1..955130762a3c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -38,7 +38,7 @@ * * See Documentation/cachetlb.txt for more information. Please note that * the implementation assumes non-aliasing VIPT D-cache and (aliasing) - * VIPT or ASID-tagged VIVT I-cache. + * VIPT I-cache. * * flush_cache_mm(mm) * diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index e39d487bf724..a3c7f271ad4c 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -215,7 +215,6 @@ typedef struct compat_siginfo { } compat_siginfo_t; #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL /* * A pointer passed in from user mode. This should not diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 19bd97671bb8..4f766178fa6f 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -32,7 +32,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - void *ftrace_trampoline; + struct plt_entry *ftrace_trampoline; }; #endif @@ -45,4 +45,48 @@ extern u64 module_alloc_base; #define module_alloc_base ((u64)_etext - MODULES_VSIZE) #endif +struct plt_entry { + /* + * A program that conforms to the AArch64 Procedure Call Standard + * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or + * IP1 (x17) may be inserted at any branch instruction that is + * exposed to a relocation that supports long branches. Since that + * is exactly what we are dealing with here, we are free to use x16 + * as a scratch register in the PLT veneers. + */ + __le32 mov0; /* movn x16, #0x.... */ + __le32 mov1; /* movk x16, #0x...., lsl #16 */ + __le32 mov2; /* movk x16, #0x...., lsl #32 */ + __le32 br; /* br x16 */ +}; + +static inline struct plt_entry get_plt_entry(u64 val) +{ + /* + * MOVK/MOVN/MOVZ opcode: + * +--------+------------+--------+-----------+-------------+---------+ + * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | + * +--------+------------+--------+-----------+-------------+---------+ + * + * Rd := 0x10 (x16) + * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) + * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) + * sf := 1 (64-bit variant) + */ + return (struct plt_entry){ + cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), + cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), + cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), + cpu_to_le32(0xd61f0200) + }; +} + +static inline bool plt_entries_equal(const struct plt_entry *a, + const struct plt_entry *b) +{ + return a->mov0 == b->mov0 && + a->mov1 == b->mov1 && + a->mov2 == b->mov2; +} + #endif /* __ASM_MODULE_H */ diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index c9530b5b5ca8..149d05fb9421 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -345,7 +345,6 @@ static inline int pmd_protnone(pmd_t pmd) #define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd)) -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT)) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 8265dd790895..067baace74a0 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds ifeq ($(CONFIG_DEBUG_EFI),y) AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\"" endif - -# will be included by each individual module but not by the core kernel itself -extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d16978213c5b..ea001241bdd4 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops; const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; -static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = { &smp_spin_table_ops, &cpu_psci_ops, NULL, }; -static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { +static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = { #ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL &acpi_parking_protocol_ops, #endif @@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = { static const struct cpu_operations * __init cpu_get_ops(const char *name) { - const struct cpu_operations **ops; + const struct cpu_operations *const *ops; ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 143b3e72c25e..5084e699447a 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state) local_bh_disable(); - if (system_supports_sve() && test_thread_flag(TIF_SVE)) { - current->thread.fpsimd_state = *state; + current->thread.fpsimd_state = *state; + if (system_supports_sve() && test_thread_flag(TIF_SVE)) fpsimd_to_sve(current); - } + task_fpsimd_load(); if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { diff --git a/arch/arm64/kernel/ftrace-mod.S b/arch/arm64/kernel/ftrace-mod.S deleted file mode 100644 index 00c4025be4ff..000000000000 --- a/arch/arm64/kernel/ftrace-mod.S +++ /dev/null @@ -1,18 +0,0 @@ -/* - * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/assembler.h> - - .section ".text.ftrace_trampoline", "ax" - .align 3 -0: .quad 0 -__ftrace_trampoline: - ldr x16, 0b - br x16 -ENDPROC(__ftrace_trampoline) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index c13b1fca0e5b..50986e388d2b 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - unsigned long *trampoline; + struct plt_entry trampoline; struct module *mod; /* @@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * is added in the future, but for now, the pr_err() below * deals with a theoretical issue only. */ - trampoline = (unsigned long *)mod->arch.ftrace_trampoline; - if (trampoline[0] != addr) { - if (trampoline[0] != 0) { + trampoline = get_plt_entry(addr); + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &trampoline)) { + if (!plt_entries_equal(mod->arch.ftrace_trampoline, + &(struct plt_entry){})) { pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); return -EINVAL; } /* point the trampoline to our ftrace entry point */ module_disable_ro(mod); - trampoline[0] = addr; + *mod->arch.ftrace_trampoline = trampoline; module_enable_ro(mod, true); /* update trampoline before patching in the branch */ smp_wmb(); } - addr = (unsigned long)&trampoline[1]; + addr = (unsigned long)(void *)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index d05dbe658409..ea640f92fe5a 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -11,21 +11,6 @@ #include <linux/module.h> #include <linux/sort.h> -struct plt_entry { - /* - * A program that conforms to the AArch64 Procedure Call Standard - * (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or - * IP1 (x17) may be inserted at any branch instruction that is - * exposed to a relocation that supports long branches. Since that - * is exactly what we are dealing with here, we are free to use x16 - * as a scratch register in the PLT veneers. - */ - __le32 mov0; /* movn x16, #0x.... */ - __le32 mov1; /* movk x16, #0x...., lsl #16 */ - __le32 mov2; /* movk x16, #0x...., lsl #32 */ - __le32 br; /* br x16 */ -}; - static bool in_init(const struct module *mod, void *loc) { return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size; @@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela, int i = pltsec->plt_num_entries; u64 val = sym->st_value + rela->r_addend; - /* - * MOVK/MOVN/MOVZ opcode: - * +--------+------------+--------+-----------+-------------+---------+ - * | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] | - * +--------+------------+--------+-----------+-------------+---------+ - * - * Rd := 0x10 (x16) - * hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32) - * opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ) - * sf := 1 (64-bit variant) - */ - plt[i] = (struct plt_entry){ - cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5), - cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5), - cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5), - cpu_to_le32(0xd61f0200) - }; + plt[i] = get_plt_entry(val); /* * Check if the entry we just created is a duplicate. Given that the * relocations are sorted, this will be the last entry we allocated. * (if one exists). */ - if (i > 0 && - plt[i].mov0 == plt[i - 1].mov0 && - plt[i].mov1 == plt[i - 1].mov1 && - plt[i].mov2 == plt[i - 1].mov2) + if (i > 0 && plt_entries_equal(plt + i, plt + i - 1)) return (u64)&plt[i - 1]; pltsec->plt_num_entries++; @@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, unsigned long core_plts = 0; unsigned long init_plts = 0; Elf64_Sym *syms = NULL; + Elf_Shdr *tramp = NULL; int i; /* @@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.core.plt = sechdrs + i; else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt")) mod->arch.init.plt = sechdrs + i; + else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && + !strcmp(secstrings + sechdrs[i].sh_name, + ".text.ftrace_trampoline")) + tramp = sechdrs + i; else if (sechdrs[i].sh_type == SHT_SYMTAB) syms = (Elf64_Sym *)sechdrs[i].sh_addr; } @@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, mod->arch.init.plt_num_entries = 0; mod->arch.init.plt_max_entries = init_plts; + if (tramp) { + tramp->sh_type = SHT_NOBITS; + tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + tramp->sh_addralign = __alignof__(struct plt_entry); + tramp->sh_size = sizeof(struct plt_entry); + } + return 0; } diff --git a/arch/arm64/kernel/module.lds b/arch/arm64/kernel/module.lds index f7c9781a9d48..22e36a21c113 100644 --- a/arch/arm64/kernel/module.lds +++ b/arch/arm64/kernel/module.lds @@ -1,4 +1,5 @@ SECTIONS { .plt (NOLOAD) : { BYTE(0) } .init.plt (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..3affca3dd96a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] [C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD, [C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, - - [C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD, - [C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR, }; static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 13f81f971390..2257dfcc44cc 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -4,6 +4,7 @@ # source "virt/kvm/Kconfig" +source "virt/lib/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -36,6 +37,8 @@ config KVM select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING + select IRQ_BYPASS_MANAGER + select HAVE_KVM_IRQ_BYPASS ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 861acbbac385..87c4f7ae24de 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index ab9f5f0fb2c7..6f4017046323 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu) set_reserved_asid_bits(); - /* - * Ensure the generation bump is observed before we xchg the - * active_asids. - */ - smp_wmb(); - for_each_possible_cpu(i) { asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0); /* @@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu) per_cpu(reserved_asids, i) = asid; } - /* Queue a TLB invalidate and flush the I-cache if necessary. */ + /* + * Queue a TLB invalidation for each CPU to perform on next + * context-switch + */ cpumask_setall(&tlb_flush_pending); } @@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) asid = atomic64_read(&mm->context.id); /* - * The memory ordering here is subtle. We rely on the control - * dependency between the generation read and the update of - * active_asids to ensure that we are synchronised with a - * parallel rollover (i.e. this pairs with the smp_wmb() in - * flush_context). + * The memory ordering here is subtle. + * If our ASID matches the current generation, then we update + * our active_asids entry with a relaxed xchg. Racing with a + * concurrent rollover means that either: + * + * - We get a zero back from the xchg and end up waiting on the + * lock. Taking the lock synchronises with the rollover and so + * we are forced to see the updated generation. + * + * - We get a valid ASID back from the xchg, which means the + * relaxed xchg in flush_context will treat us as reserved + * because atomic RmWs are totally ordered for a given location. */ if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid)) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 371c5f03a170..051e71ec3335 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -26,7 +26,7 @@ #include <asm/page.h> #include <asm/tlbflush.h> -static struct kmem_cache *pgd_cache; +static struct kmem_cache *pgd_cache __ro_after_init; pgd_t *pgd_alloc(struct mm_struct *mm) { diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c index 1e714329fe8a..8a211d95821f 100644 --- a/arch/blackfin/kernel/nmi.c +++ b/arch/blackfin/kernel/nmi.c @@ -166,7 +166,7 @@ int check_nmi_wdt_touched(void) return 1; } -static void nmi_wdt_timer(unsigned long data) +static void nmi_wdt_timer(struct timer_list *unused) { if (check_nmi_wdt_touched()) nmi_wdt_keepalive(); @@ -180,8 +180,7 @@ static int __init init_nmi_wdt(void) nmi_wdt_start(); nmi_active = true; - init_timer(&ntimer); - ntimer.function = nmi_wdt_timer; + timer_setup(&ntimer, nmi_wdt_timer, 0); ntimer.expires = jiffies + NMI_CHECK_TIMEOUT; add_timer(&ntimer); diff --git a/arch/c6x/Makefile b/arch/c6x/Makefile index 6b0be670ddfa..6f6096ff05a4 100644 --- a/arch/c6x/Makefile +++ b/arch/c6x/Makefile @@ -12,7 +12,7 @@ cflags-y += -mno-dsbt -msdata=none -D__linux__ cflags-$(CONFIG_C6X_BIG_KERNEL) += -mlong-calls -CFLAGS_MODULE += -mlong-calls -mno-dsbt -msdata=none +KBUILD_CFLAGS_MODULE += -mlong-calls -mno-dsbt -msdata=none CHECKFLAGS += diff --git a/arch/frv/kernel/.gitignore b/arch/frv/kernel/.gitignore new file mode 100644 index 000000000000..c5f676c3c224 --- /dev/null +++ b/arch/frv/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/hexagon/Makefile b/arch/hexagon/Makefile index 48fe08230a80..2efaa18e995a 100644 --- a/arch/hexagon/Makefile +++ b/arch/hexagon/Makefile @@ -12,9 +12,9 @@ KBUILD_CFLAGS += -fno-short-enums # Modules must use either long-calls, or use pic/plt. # Use long-calls for now, it's easier. And faster. -# CFLAGS_MODULE += -fPIC -# LDFLAGS_MODULE += -shared -CFLAGS_MODULE += -mlong-calls +# KBUILD_CFLAGS_MODULE += -fPIC +# KBUILD_LDFLAGS_MODULE += -shared +KBUILD_CFLAGS_MODULE += -mlong-calls cflags-y += $(call cc-option,-mv${CONFIG_HEXAGON_ARCH_VERSION}) aflags-y += $(call cc-option,-mv${CONFIG_HEXAGON_ARCH_VERSION}) diff --git a/arch/hexagon/kernel/ptrace.c b/arch/hexagon/kernel/ptrace.c index ecd75e2e8eb3..fa76493c1745 100644 --- a/arch/hexagon/kernel/ptrace.c +++ b/arch/hexagon/kernel/ptrace.c @@ -18,8 +18,6 @@ * 02110-1301, USA. */ -#include <generated/compile.h> - #include <linux/kernel.h> #include <linux/sched.h> #include <linux/sched/task_stack.h> @@ -180,7 +178,7 @@ static const struct user_regset hexagon_regsets[] = { }; static const struct user_regset_view hexagon_user_view = { - .name = UTS_MACHINE, + .name = "hexagon", .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, .regsets = hexagon_regsets, diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h index 3ad8f6988363..82f9bf702804 100644 --- a/arch/ia64/include/asm/topology.h +++ b/arch/ia64/include/asm/topology.h @@ -34,13 +34,6 @@ &node_to_cpu_mask[node]) /* - * Returns the number of the node containing Node 'nid'. - * Not implemented here. Multi-level hierarchies detected with - * the help of node_distance(). - */ -#define parent_node(nid) (nid) - -/* * Determines the node for a given pci bus */ #define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index f7693f49c573..f4db2168d1b8 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -31,8 +31,8 @@ void foo(void) DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); - BUILD_BUG_ON(sizeof(struct upid) != 32); - DEFINE(IA64_UPID_SHIFT, 5); + BUILD_BUG_ON(sizeof(struct upid) != 16); + DEFINE(IA64_UPID_SHIFT, 4); BLANK(); diff --git a/arch/m68k/amiga/amisound.c b/arch/m68k/amiga/amisound.c index a23f48181fd6..442bdeee6bd7 100644 --- a/arch/m68k/amiga/amisound.c +++ b/arch/m68k/amiga/amisound.c @@ -65,7 +65,7 @@ void __init amiga_init_sound(void) #endif } -static void nosound( unsigned long ignored ); +static void nosound(struct timer_list *unused); static DEFINE_TIMER(sound_timer, nosound); void amiga_mksound( unsigned int hz, unsigned int ticks ) @@ -107,7 +107,7 @@ void amiga_mksound( unsigned int hz, unsigned int ticks ) } -static void nosound( unsigned long ignored ) +static void nosound(struct timer_list *unused) { /* turn off DMA for audio channel 2 */ custom.dmacon = DMAF_AUD2; diff --git a/arch/m68k/mac/macboing.c b/arch/m68k/mac/macboing.c index d17668649641..388780797f7d 100644 --- a/arch/m68k/mac/macboing.c +++ b/arch/m68k/mac/macboing.c @@ -48,9 +48,9 @@ static unsigned long mac_bell_phasepersample; * some function protos */ static void mac_init_asc( void ); -static void mac_nosound( unsigned long ); +static void mac_nosound(struct timer_list *); static void mac_quadra_start_bell( unsigned int, unsigned int, unsigned int ); -static void mac_quadra_ring_bell( unsigned long ); +static void mac_quadra_ring_bell(struct timer_list *); static void mac_av_start_bell( unsigned int, unsigned int, unsigned int ); static void ( *mac_special_bell )( unsigned int, unsigned int, unsigned int ); @@ -216,7 +216,7 @@ void mac_mksound( unsigned int freq, unsigned int length ) /* * regular ASC: stop whining .. */ -static void mac_nosound( unsigned long ignored ) +static void mac_nosound(struct timer_list *unused) { mac_asc_regs[ ASC_ENABLE ] = 0; } @@ -270,7 +270,7 @@ static void mac_quadra_start_bell( unsigned int freq, unsigned int length, unsig * already load the wave table, or at least call this one... * This piece keeps reloading the wave table until done. */ -static void mac_quadra_ring_bell( unsigned long ignored ) +static void mac_quadra_ring_bell(struct timer_list *unused) { int i, count = mac_asc_samplespersec / HZ; unsigned long flags; diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h index 99472d2ca340..97559fe0b953 100644 --- a/arch/microblaze/include/asm/mmu_context_mm.h +++ b/arch/microblaze/include/asm/mmu_context_mm.h @@ -13,6 +13,7 @@ #include <linux/atomic.h> #include <linux/mm_types.h> +#include <linux/sched.h> #include <asm/bitops.h> #include <asm/mmu.h> diff --git a/arch/mips/boot/dts/brcm/Makefile b/arch/mips/boot/dts/brcm/Makefile index 09ba7e894bad..d8787c9a499e 100644 --- a/arch/mips/boot/dts/brcm/Makefile +++ b/arch/mips/boot/dts/brcm/Makefile @@ -35,6 +35,3 @@ dtb-$(CONFIG_DT_NONE) += \ bcm97435svmb.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/cavium-octeon/Makefile b/arch/mips/boot/dts/cavium-octeon/Makefile index f5d01b31df50..24a8efcd7b03 100644 --- a/arch/mips/boot/dts/cavium-octeon/Makefile +++ b/arch/mips/boot/dts/cavium-octeon/Makefile @@ -2,6 +2,3 @@ dtb-$(CONFIG_CAVIUM_OCTEON_SOC) += octeon_3xxx.dtb octeon_68xx.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/img/Makefile b/arch/mips/boot/dts/img/Makefile index 3eb2597a4d6c..441a3c16efb0 100644 --- a/arch/mips/boot/dts/img/Makefile +++ b/arch/mips/boot/dts/img/Makefile @@ -3,6 +3,3 @@ dtb-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += boston.dtb dtb-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb obj-$(CONFIG_MACH_PISTACHIO) += pistachio_marduk.dtb.o - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/ingenic/Makefile b/arch/mips/boot/dts/ingenic/Makefile index 035769269cbc..6a31759839b4 100644 --- a/arch/mips/boot/dts/ingenic/Makefile +++ b/arch/mips/boot/dts/ingenic/Makefile @@ -3,6 +3,3 @@ dtb-$(CONFIG_JZ4740_QI_LB60) += qi_lb60.dtb dtb-$(CONFIG_JZ4780_CI20) += ci20.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/lantiq/Makefile b/arch/mips/boot/dts/lantiq/Makefile index 00e2e540ed3f..51ab9c1dff42 100644 --- a/arch/mips/boot/dts/lantiq/Makefile +++ b/arch/mips/boot/dts/lantiq/Makefile @@ -2,6 +2,3 @@ dtb-$(CONFIG_DT_EASY50712) += easy50712.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/mti/Makefile b/arch/mips/boot/dts/mti/Makefile index 480af498a9dd..3508720cb6d9 100644 --- a/arch/mips/boot/dts/mti/Makefile +++ b/arch/mips/boot/dts/mti/Makefile @@ -3,6 +3,3 @@ dtb-$(CONFIG_MIPS_MALTA) += malta.dtb dtb-$(CONFIG_LEGACY_BOARD_SEAD3) += sead3.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/netlogic/Makefile b/arch/mips/boot/dts/netlogic/Makefile index 2b99450d7433..d630b27950f0 100644 --- a/arch/mips/boot/dts/netlogic/Makefile +++ b/arch/mips/boot/dts/netlogic/Makefile @@ -6,6 +6,3 @@ dtb-$(CONFIG_DT_XLP_GVP) += xlp_gvp.dtb dtb-$(CONFIG_DT_XLP_RVP) += xlp_rvp.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/ni/Makefile b/arch/mips/boot/dts/ni/Makefile index 6cd9c606f025..9e2c9faede47 100644 --- a/arch/mips/boot/dts/ni/Makefile +++ b/arch/mips/boot/dts/ni/Makefile @@ -1,4 +1 @@ dtb-$(CONFIG_FIT_IMAGE_FDT_NI169445) += 169445.dtb - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/pic32/Makefile b/arch/mips/boot/dts/pic32/Makefile index a139a0fbd7b7..ba9bcef8fde9 100644 --- a/arch/mips/boot/dts/pic32/Makefile +++ b/arch/mips/boot/dts/pic32/Makefile @@ -5,6 +5,3 @@ dtb-$(CONFIG_DTB_PIC32_NONE) += \ pic32mzda_sk.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/qca/Makefile b/arch/mips/boot/dts/qca/Makefile index 639adeac90af..4451cf45b0ad 100644 --- a/arch/mips/boot/dts/qca/Makefile +++ b/arch/mips/boot/dts/qca/Makefile @@ -5,6 +5,3 @@ dtb-$(CONFIG_ATH79) += ar9331_dpt_module.dtb dtb-$(CONFIG_ATH79) += ar9331_dragino_ms14.dtb dtb-$(CONFIG_ATH79) += ar9331_omega.dtb dtb-$(CONFIG_ATH79) += ar9331_tl_mr3020.dtb - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/ralink/Makefile b/arch/mips/boot/dts/ralink/Makefile index 323c8bcfb602..94bee5b38b53 100644 --- a/arch/mips/boot/dts/ralink/Makefile +++ b/arch/mips/boot/dts/ralink/Makefile @@ -7,6 +7,3 @@ dtb-$(CONFIG_DTB_OMEGA2P) += omega2p.dtb dtb-$(CONFIG_DTB_VOCORE2) += vocore2.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile index 616322405ade..9987e0e378c5 100644 --- a/arch/mips/boot/dts/xilfpga/Makefile +++ b/arch/mips/boot/dts/xilfpga/Makefile @@ -2,6 +2,3 @@ dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += nexys4ddr.dtb obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) - -# Force kbuild to make empty built-in.o if necessary -obj- += dummy.o diff --git a/arch/mips/include/asm/compat-signal.h b/arch/mips/include/asm/compat-signal.h index e87cd243b0f4..c3b7a2550d1d 100644 --- a/arch/mips/include/asm/compat-signal.h +++ b/arch/mips/include/asm/compat-signal.h @@ -14,45 +14,16 @@ static inline int __copy_conv_sigset_to_user(compat_sigset_t __user *d, const sigset_t *s) { - int err; + BUILD_BUG_ON(sizeof(*d) != sizeof(*s)); + BUILD_BUG_ON(_NSIG_WORDS != 2); - BUG_ON(sizeof(*d) != sizeof(*s)); - BUG_ON(_NSIG_WORDS != 2); - - err = __put_user(s->sig[0], &d->sig[0]); - err |= __put_user(s->sig[0] >> 32, &d->sig[1]); - err |= __put_user(s->sig[1], &d->sig[2]); - err |= __put_user(s->sig[1] >> 32, &d->sig[3]); - - return err; + return put_compat_sigset(d, s, sizeof(*d)); } static inline int __copy_conv_sigset_from_user(sigset_t *d, const compat_sigset_t __user *s) { - int err; - union sigset_u { - sigset_t s; - compat_sigset_t c; - } *u = (union sigset_u *) d; - - BUG_ON(sizeof(*d) != sizeof(*s)); - BUG_ON(_NSIG_WORDS != 2); - -#ifdef CONFIG_CPU_BIG_ENDIAN - err = __get_user(u->c.sig[1], &s->sig[0]); - err |= __get_user(u->c.sig[0], &s->sig[1]); - err |= __get_user(u->c.sig[3], &s->sig[2]); - err |= __get_user(u->c.sig[2], &s->sig[3]); -#endif -#ifdef CONFIG_CPU_LITTLE_ENDIAN - err = __get_user(u->c.sig[0], &s->sig[0]); - err |= __get_user(u->c.sig[1], &s->sig[1]); - err |= __get_user(u->c.sig[2], &s->sig[2]); - err |= __get_user(u->c.sig[3], &s->sig[3]); -#endif - - return err; + return get_compat_sigset(d, s); } #endif /* __ASM_COMPAT_SIGNAL_H */ diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 8e2b5b556488..49691331ada4 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -200,7 +200,6 @@ typedef struct compat_siginfo { } compat_siginfo_t; #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL /* * A pointer passed in from user mode. This should not diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index 9e9e94415d08..1a508a74d48d 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -552,7 +552,7 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_WRITE); diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h index 20c3df7a8fdd..606e02ca4b6c 100644 --- a/arch/mips/include/uapi/asm/mman.h +++ b/arch/mips/include/uapi/asm/mman.h @@ -29,6 +29,7 @@ */ #define MAP_SHARED 0x001 /* Share changes */ #define MAP_PRIVATE 0x002 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ #define MAP_TYPE 0x00f /* Mask for type of mapping */ #define MAP_FIXED 0x010 /* Interpret addr exactly */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index d535edc01434..75fdeaa8c62f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -445,10 +445,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r = -EINTR; - sigset_t sigsaved; - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (vcpu->mmio_needed) { if (!vcpu->mmio_is_write) @@ -480,8 +478,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) local_irq_enable(); out: - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c index a8103f6972cd..5d89e1ec5fcc 100644 --- a/arch/mips/lasat/picvue_proc.c +++ b/arch/mips/lasat/picvue_proc.c @@ -156,7 +156,7 @@ static const struct file_operations pvc_scroll_proc_fops = { .write = pvc_scroll_proc_write, }; -void pvc_proc_timerfunc(unsigned long data) +void pvc_proc_timerfunc(struct timer_list *unused) { if (scroll_dir < 0) pvc_move(DISPLAY|RIGHT); @@ -197,7 +197,7 @@ static int __init pvc_proc_init(void) if (proc_entry == NULL) goto error; - setup_timer(&timer, pvc_proc_timerfunc, 0UL); + timer_setup(&timer, pvc_proc_timerfunc, 0); return 0; error: diff --git a/arch/mips/mti-malta/malta-display.c b/arch/mips/mti-malta/malta-display.c index 063de44675ce..ee0bd50f754b 100644 --- a/arch/mips/mti-malta/malta-display.c +++ b/arch/mips/mti-malta/malta-display.c @@ -36,10 +36,10 @@ void mips_display_message(const char *str) } } -static void scroll_display_message(unsigned long unused); +static void scroll_display_message(struct timer_list *unused); static DEFINE_TIMER(mips_scroll_timer, scroll_display_message); -static void scroll_display_message(unsigned long unused) +static void scroll_display_message(struct timer_list *unused) { mips_display_message(&display_string[display_count++]); if (display_count == max_display_count) diff --git a/arch/mn10300/mm/fault.c b/arch/mn10300/mm/fault.c index f23781d6bbb3..f0bfa1448744 100644 --- a/arch/mn10300/mm/fault.c +++ b/arch/mn10300/mm/fault.c @@ -60,7 +60,7 @@ void bust_spinlocks(int yes) void do_BUG(const char *file, int line) { bust_spinlocks(1); - printk(KERN_EMERG "------------[ cut here ]------------\n"); + printk(KERN_EMERG CUT_HERE); printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 1fd3eb5b66c6..9792d8cf4f56 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -32,6 +32,7 @@ config PARISC select GENERIC_PCI_IOMAP select ARCH_HAVE_NMI_SAFE_CMPXCHG select GENERIC_SMP_IDLE_THREAD + select GENERIC_CPU_DEVICES select GENERIC_STRNCPY_FROM_USER select SYSCTL_ARCH_UNALIGN_ALLOW select SYSCTL_EXCEPTION_TRACE @@ -60,9 +61,6 @@ config PARISC config CPU_BIG_ENDIAN def_bool y -config CPU_BIG_ENDIAN - def_bool y - config MMU def_bool y @@ -288,6 +286,21 @@ config SMP If you don't know what to do here, say N. +config PARISC_CPU_TOPOLOGY + bool "Support cpu topology definition" + depends on SMP + default y + help + Support PARISC cpu topology definition. + +config SCHED_MC + bool "Multi-core scheduler support" + depends on PARISC_CPU_TOPOLOGY && PA8X00 + help + Multi-core scheduler support improves the CPU scheduler's decision + making when dealing with multi-core CPU chips at a cost of slightly + increased overhead in some places. If unsure say N here. + config IRQSTACKS bool "Use separate kernel stacks when processing interrupts" default y diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 01946ebaff72..e2364ff59180 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -22,7 +22,7 @@ KBUILD_IMAGE := vmlinuz KBUILD_DEFCONFIG := default_defconfig NM = sh $(srctree)/arch/parisc/nm -CHECKFLAGS += -D__hppa__=1 +CHECKFLAGS += -D__hppa__=1 -mbig-endian LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) export LIBGCC diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 07f48827afda..acf8aa07cbe0 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -195,7 +195,6 @@ typedef struct compat_siginfo { } compat_siginfo_t; #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL struct compat_ipc64_perm { compat_key_t key; diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h index efee44a5e063..339e83ddb39e 100644 --- a/arch/parisc/include/asm/pdc.h +++ b/arch/parisc/include/asm/pdc.h @@ -18,261 +18,6 @@ extern unsigned long parisc_cell_loc; /* cell location of CPU (PAT) */ #define PDC_TYPE_SYSTEM_MAP 1 /* 32-bit, but supports PDC_SYSTEM_MAP */ #define PDC_TYPE_SNAKE 2 /* Doesn't support SYSTEM_MAP */ -struct pdc_chassis_info { /* for PDC_CHASSIS_INFO */ - unsigned long actcnt; /* actual number of bytes returned */ - unsigned long maxcnt; /* maximum number of bytes that could be returned */ -}; - -struct pdc_coproc_cfg { /* for PDC_COPROC_CFG */ - unsigned long ccr_functional; - unsigned long ccr_present; - unsigned long revision; - unsigned long model; -}; - -struct pdc_model { /* for PDC_MODEL */ - unsigned long hversion; - unsigned long sversion; - unsigned long hw_id; - unsigned long boot_id; - unsigned long sw_id; - unsigned long sw_cap; - unsigned long arch_rev; - unsigned long pot_key; - unsigned long curr_key; -}; - -struct pdc_cache_cf { /* for PDC_CACHE (I/D-caches) */ - unsigned long -#ifdef CONFIG_64BIT - cc_padW:32, -#endif - cc_alias: 4, /* alias boundaries for virtual addresses */ - cc_block: 4, /* to determine most efficient stride */ - cc_line : 3, /* maximum amount written back as a result of store (multiple of 16 bytes) */ - cc_shift: 2, /* how much to shift cc_block left */ - cc_wt : 1, /* 0 = WT-Dcache, 1 = WB-Dcache */ - cc_sh : 2, /* 0 = separate I/D-cache, else shared I/D-cache */ - cc_cst : 3, /* 0 = incoherent D-cache, 1=coherent D-cache */ - cc_pad1 : 10, /* reserved */ - cc_hv : 3; /* hversion dependent */ -}; - -struct pdc_tlb_cf { /* for PDC_CACHE (I/D-TLB's) */ - unsigned long tc_pad0:12, /* reserved */ -#ifdef CONFIG_64BIT - tc_padW:32, -#endif - tc_sh : 2, /* 0 = separate I/D-TLB, else shared I/D-TLB */ - tc_hv : 1, /* HV */ - tc_page : 1, /* 0 = 2K page-size-machine, 1 = 4k page size */ - tc_cst : 3, /* 0 = incoherent operations, else coherent operations */ - tc_aid : 5, /* ITLB: width of access ids of processor (encoded!) */ - tc_sr : 8; /* ITLB: width of space-registers (encoded) */ -}; - -struct pdc_cache_info { /* main-PDC_CACHE-structure (caches & TLB's) */ - /* I-cache */ - unsigned long ic_size; /* size in bytes */ - struct pdc_cache_cf ic_conf; /* configuration */ - unsigned long ic_base; /* base-addr */ - unsigned long ic_stride; - unsigned long ic_count; - unsigned long ic_loop; - /* D-cache */ - unsigned long dc_size; /* size in bytes */ - struct pdc_cache_cf dc_conf; /* configuration */ - unsigned long dc_base; /* base-addr */ - unsigned long dc_stride; - unsigned long dc_count; - unsigned long dc_loop; - /* Instruction-TLB */ - unsigned long it_size; /* number of entries in I-TLB */ - struct pdc_tlb_cf it_conf; /* I-TLB-configuration */ - unsigned long it_sp_base; - unsigned long it_sp_stride; - unsigned long it_sp_count; - unsigned long it_off_base; - unsigned long it_off_stride; - unsigned long it_off_count; - unsigned long it_loop; - /* data-TLB */ - unsigned long dt_size; /* number of entries in D-TLB */ - struct pdc_tlb_cf dt_conf; /* D-TLB-configuration */ - unsigned long dt_sp_base; - unsigned long dt_sp_stride; - unsigned long dt_sp_count; - unsigned long dt_off_base; - unsigned long dt_off_stride; - unsigned long dt_off_count; - unsigned long dt_loop; -}; - -#if 0 -/* If you start using the next struct, you'll have to adjust it to - * work with 64-bit firmware I think -PB - */ -struct pdc_iodc { /* PDC_IODC */ - unsigned char hversion_model; - unsigned char hversion; - unsigned char spa; - unsigned char type; - unsigned int sversion_rev:4; - unsigned int sversion_model:19; - unsigned int sversion_opt:8; - unsigned char rev; - unsigned char dep; - unsigned char features; - unsigned char pad1; - unsigned int checksum:16; - unsigned int length:16; - unsigned int pad[15]; -} __attribute__((aligned(8))) ; -#endif - -#ifndef CONFIG_PA20 -/* no BLTBs in pa2.0 processors */ -struct pdc_btlb_info_range { - __u8 res00; - __u8 num_i; - __u8 num_d; - __u8 num_comb; -}; - -struct pdc_btlb_info { /* PDC_BLOCK_TLB, return of PDC_BTLB_INFO */ - unsigned int min_size; /* minimum size of BTLB in pages */ - unsigned int max_size; /* maximum size of BTLB in pages */ - struct pdc_btlb_info_range fixed_range_info; - struct pdc_btlb_info_range variable_range_info; -}; - -#endif /* !CONFIG_PA20 */ - -struct pdc_mem_retinfo { /* PDC_MEM/PDC_MEM_MEMINFO (return info) */ - unsigned long pdt_size; - unsigned long pdt_entries; - unsigned long pdt_status; - unsigned long first_dbe_loc; - unsigned long good_mem; -}; - -struct pdc_mem_read_pdt { /* PDC_MEM/PDC_MEM_READ_PDT (return info) */ - unsigned long pdt_entries; -}; - -#ifdef CONFIG_64BIT -struct pdc_memory_table_raddr { /* PDC_MEM/PDC_MEM_TABLE (return info) */ - unsigned long entries_returned; - unsigned long entries_total; -}; - -struct pdc_memory_table { /* PDC_MEM/PDC_MEM_TABLE (arguments) */ - unsigned long paddr; - unsigned int pages; - unsigned int reserved; -}; -#endif /* CONFIG_64BIT */ - -struct pdc_system_map_mod_info { /* PDC_SYSTEM_MAP/FIND_MODULE */ - unsigned long mod_addr; - unsigned long mod_pgs; - unsigned long add_addrs; -}; - -struct pdc_system_map_addr_info { /* PDC_SYSTEM_MAP/FIND_ADDRESS */ - unsigned long mod_addr; - unsigned long mod_pgs; -}; - -struct pdc_initiator { /* PDC_INITIATOR */ - int host_id; - int factor; - int width; - int mode; -}; - -struct hardware_path { - char flags; /* see bit definitions below */ - char bc[6]; /* Bus Converter routing info to a specific */ - /* I/O adaptor (< 0 means none, > 63 resvd) */ - char mod; /* fixed field of specified module */ -}; - -/* - * Device path specifications used by PDC. - */ -struct pdc_module_path { - struct hardware_path path; - unsigned int layers[6]; /* device-specific info (ctlr #, unit # ...) */ -}; - -#ifndef CONFIG_PA20 -/* Only used on some pre-PA2.0 boxes */ -struct pdc_memory_map { /* PDC_MEMORY_MAP */ - unsigned long hpa; /* mod's register set address */ - unsigned long more_pgs; /* number of additional I/O pgs */ -}; -#endif - -struct pdc_tod { - unsigned long tod_sec; - unsigned long tod_usec; -}; - -/* architected results from PDC_PIM/transfer hpmc on a PA1.1 machine */ - -struct pdc_hpmc_pim_11 { /* PDC_PIM */ - __u32 gr[32]; - __u32 cr[32]; - __u32 sr[8]; - __u32 iasq_back; - __u32 iaoq_back; - __u32 check_type; - __u32 cpu_state; - __u32 rsvd1; - __u32 cache_check; - __u32 tlb_check; - __u32 bus_check; - __u32 assists_check; - __u32 rsvd2; - __u32 assist_state; - __u32 responder_addr; - __u32 requestor_addr; - __u32 path_info; - __u64 fr[32]; -}; - -/* - * architected results from PDC_PIM/transfer hpmc on a PA2.0 machine - * - * Note that PDC_PIM doesn't care whether or not wide mode was enabled - * so the results are different on PA1.1 vs. PA2.0 when in narrow mode. - * - * Note also that there are unarchitected results available, which - * are hversion dependent. Do a "ser pim 0 hpmc" after rebooting, since - * the firmware is probably the best way of printing hversion dependent - * data. - */ - -struct pdc_hpmc_pim_20 { /* PDC_PIM */ - __u64 gr[32]; - __u64 cr[32]; - __u64 sr[8]; - __u64 iasq_back; - __u64 iaoq_back; - __u32 check_type; - __u32 cpu_state; - __u32 cache_check; - __u32 tlb_check; - __u32 bus_check; - __u32 assists_check; - __u32 assist_state; - __u32 path_info; - __u64 responder_addr; - __u64 requestor_addr; - __u64 fr[32]; -}; - void pdc_console_init(void); /* in pdc_console.c */ void pdc_console_restart(void); diff --git a/arch/parisc/include/asm/topology.h b/arch/parisc/include/asm/topology.h new file mode 100644 index 000000000000..6f0750c74e47 --- /dev/null +++ b/arch/parisc/include/asm/topology.h @@ -0,0 +1,36 @@ +#ifndef _ASM_PARISC_TOPOLOGY_H +#define _ASM_PARISC_TOPOLOGY_H + +#ifdef CONFIG_PARISC_CPU_TOPOLOGY + +#include <linux/cpumask.h> + +struct cputopo_parisc { + int thread_id; + int core_id; + int socket_id; + cpumask_t thread_sibling; + cpumask_t core_sibling; +}; + +extern struct cputopo_parisc cpu_topology[NR_CPUS]; + +#define topology_physical_package_id(cpu) (cpu_topology[cpu].socket_id) +#define topology_core_id(cpu) (cpu_topology[cpu].core_id) +#define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) +#define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) + +void init_cpu_topology(void); +void store_cpu_topology(unsigned int cpuid); +const struct cpumask *cpu_coregroup_mask(int cpu); + +#else + +static inline void init_cpu_topology(void) { } +static inline void store_cpu_topology(unsigned int cpuid) { } + +#endif + +#include <asm-generic/topology.h> + +#endif /* _ASM_ARM_TOPOLOGY_H */ diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h index d1af0d74a188..80510ba44c08 100644 --- a/arch/parisc/include/uapi/asm/mman.h +++ b/arch/parisc/include/uapi/asm/mman.h @@ -12,6 +12,7 @@ #define MAP_SHARED 0x01 /* Share changes */ #define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ #define MAP_TYPE 0x03 /* Mask for type of mapping */ #define MAP_FIXED 0x04 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x10 /* don't use a file */ diff --git a/arch/parisc/include/uapi/asm/pdc.h b/arch/parisc/include/uapi/asm/pdc.h index 0ad117617f1a..593eeb573138 100644 --- a/arch/parisc/include/uapi/asm/pdc.h +++ b/arch/parisc/include/uapi/asm/pdc.h @@ -16,6 +16,7 @@ #define PDC_ERROR -3 /* Call could not complete without an error */ #define PDC_NE_MOD -5 /* Module not found */ #define PDC_NE_CELL_MOD -7 /* Cell module not found */ +#define PDC_NE_BOOTDEV -9 /* Cannot locate a console device or boot device */ #define PDC_INVALID_ARG -10 /* Called with an invalid argument */ #define PDC_BUS_POW_WARN -12 /* Call could not complete in allowed power budget */ #define PDC_NOT_NARROW -17 /* Narrow mode not supported */ @@ -340,9 +341,6 @@ #if !defined(__ASSEMBLY__) -#include <linux/types.h> - - /* flags of the device_path */ #define PF_AUTOBOOT 0x80 #define PF_AUTOSEARCH 0x40 @@ -418,9 +416,255 @@ struct zeropage { int pad430[116]; /* [0x600] processor dependent */ - __u32 pad600[1]; - __u32 proc_sti; /* pointer to STI ROM */ - __u32 pad608[126]; + unsigned int pad600[1]; + unsigned int proc_sti; /* pointer to STI ROM */ + unsigned int pad608[126]; +}; + +struct pdc_chassis_info { /* for PDC_CHASSIS_INFO */ + unsigned long actcnt; /* actual number of bytes returned */ + unsigned long maxcnt; /* maximum number of bytes that could be returned */ +}; + +struct pdc_coproc_cfg { /* for PDC_COPROC_CFG */ + unsigned long ccr_functional; + unsigned long ccr_present; + unsigned long revision; + unsigned long model; +}; + +struct pdc_model { /* for PDC_MODEL */ + unsigned long hversion; + unsigned long sversion; + unsigned long hw_id; + unsigned long boot_id; + unsigned long sw_id; + unsigned long sw_cap; + unsigned long arch_rev; + unsigned long pot_key; + unsigned long curr_key; +}; + +struct pdc_cache_cf { /* for PDC_CACHE (I/D-caches) */ + unsigned long +#ifdef __LP64__ + cc_padW:32, +#endif + cc_alias: 4, /* alias boundaries for virtual addresses */ + cc_block: 4, /* to determine most efficient stride */ + cc_line : 3, /* maximum amount written back as a result of store (multiple of 16 bytes) */ + cc_shift: 2, /* how much to shift cc_block left */ + cc_wt : 1, /* 0 = WT-Dcache, 1 = WB-Dcache */ + cc_sh : 2, /* 0 = separate I/D-cache, else shared I/D-cache */ + cc_cst : 3, /* 0 = incoherent D-cache, 1=coherent D-cache */ + cc_pad1 : 10, /* reserved */ + cc_hv : 3; /* hversion dependent */ +}; + +struct pdc_tlb_cf { /* for PDC_CACHE (I/D-TLB's) */ + unsigned long tc_pad0:12, /* reserved */ +#ifdef __LP64__ + tc_padW:32, +#endif + tc_sh : 2, /* 0 = separate I/D-TLB, else shared I/D-TLB */ + tc_hv : 1, /* HV */ + tc_page : 1, /* 0 = 2K page-size-machine, 1 = 4k page size */ + tc_cst : 3, /* 0 = incoherent operations, else coherent operations */ + tc_aid : 5, /* ITLB: width of access ids of processor (encoded!) */ + tc_sr : 8; /* ITLB: width of space-registers (encoded) */ +}; + +struct pdc_cache_info { /* main-PDC_CACHE-structure (caches & TLB's) */ + /* I-cache */ + unsigned long ic_size; /* size in bytes */ + struct pdc_cache_cf ic_conf; /* configuration */ + unsigned long ic_base; /* base-addr */ + unsigned long ic_stride; + unsigned long ic_count; + unsigned long ic_loop; + /* D-cache */ + unsigned long dc_size; /* size in bytes */ + struct pdc_cache_cf dc_conf; /* configuration */ + unsigned long dc_base; /* base-addr */ + unsigned long dc_stride; + unsigned long dc_count; + unsigned long dc_loop; + /* Instruction-TLB */ + unsigned long it_size; /* number of entries in I-TLB */ + struct pdc_tlb_cf it_conf; /* I-TLB-configuration */ + unsigned long it_sp_base; + unsigned long it_sp_stride; + unsigned long it_sp_count; + unsigned long it_off_base; + unsigned long it_off_stride; + unsigned long it_off_count; + unsigned long it_loop; + /* data-TLB */ + unsigned long dt_size; /* number of entries in D-TLB */ + struct pdc_tlb_cf dt_conf; /* D-TLB-configuration */ + unsigned long dt_sp_base; + unsigned long dt_sp_stride; + unsigned long dt_sp_count; + unsigned long dt_off_base; + unsigned long dt_off_stride; + unsigned long dt_off_count; + unsigned long dt_loop; +}; + +/* Might need adjustment to work with 64-bit firmware */ +struct pdc_iodc { /* PDC_IODC */ + unsigned char hversion_model; + unsigned char hversion; + unsigned char spa; + unsigned char type; + unsigned int sversion_rev:4; + unsigned int sversion_model:19; + unsigned int sversion_opt:8; + unsigned char rev; + unsigned char dep; + unsigned char features; + unsigned char pad1; + unsigned int checksum:16; + unsigned int length:16; + unsigned int pad[15]; +} __attribute__((aligned(8))) ; + +/* no BLTBs in pa2.0 processors */ +struct pdc_btlb_info_range { + unsigned char res00; + unsigned char num_i; + unsigned char num_d; + unsigned char num_comb; +}; + +struct pdc_btlb_info { /* PDC_BLOCK_TLB, return of PDC_BTLB_INFO */ + unsigned int min_size; /* minimum size of BTLB in pages */ + unsigned int max_size; /* maximum size of BTLB in pages */ + struct pdc_btlb_info_range fixed_range_info; + struct pdc_btlb_info_range variable_range_info; +}; + +struct pdc_mem_retinfo { /* PDC_MEM/PDC_MEM_MEMINFO (return info) */ + unsigned long pdt_size; + unsigned long pdt_entries; + unsigned long pdt_status; + unsigned long first_dbe_loc; + unsigned long good_mem; +}; + +struct pdc_mem_read_pdt { /* PDC_MEM/PDC_MEM_READ_PDT (return info) */ + unsigned long pdt_entries; +}; + +#ifdef __LP64__ +struct pdc_memory_table_raddr { /* PDC_MEM/PDC_MEM_TABLE (return info) */ + unsigned long entries_returned; + unsigned long entries_total; +}; + +struct pdc_memory_table { /* PDC_MEM/PDC_MEM_TABLE (arguments) */ + unsigned long paddr; + unsigned int pages; + unsigned int reserved; +}; +#endif /* __LP64__ */ + +struct pdc_system_map_mod_info { /* PDC_SYSTEM_MAP/FIND_MODULE */ + unsigned long mod_addr; + unsigned long mod_pgs; + unsigned long add_addrs; +}; + +struct pdc_system_map_addr_info { /* PDC_SYSTEM_MAP/FIND_ADDRESS */ + unsigned long mod_addr; + unsigned long mod_pgs; +}; + +struct pdc_initiator { /* PDC_INITIATOR */ + int host_id; + int factor; + int width; + int mode; +}; + +struct hardware_path { + char flags; /* see bit definitions below */ + char bc[6]; /* Bus Converter routing info to a specific */ + /* I/O adaptor (< 0 means none, > 63 resvd) */ + char mod; /* fixed field of specified module */ +}; + +/* + * Device path specifications used by PDC. + */ +struct pdc_module_path { + struct hardware_path path; + unsigned int layers[6]; /* device-specific info (ctlr #, unit # ...) */ +}; + +/* Only used on some pre-PA2.0 boxes */ +struct pdc_memory_map { /* PDC_MEMORY_MAP */ + unsigned long hpa; /* mod's register set address */ + unsigned long more_pgs; /* number of additional I/O pgs */ +}; + +struct pdc_tod { + unsigned long tod_sec; + unsigned long tod_usec; +}; + +/* architected results from PDC_PIM/transfer hpmc on a PA1.1 machine */ + +struct pdc_hpmc_pim_11 { /* PDC_PIM */ + unsigned int gr[32]; + unsigned int cr[32]; + unsigned int sr[8]; + unsigned int iasq_back; + unsigned int iaoq_back; + unsigned int check_type; + unsigned int cpu_state; + unsigned int rsvd1; + unsigned int cache_check; + unsigned int tlb_check; + unsigned int bus_check; + unsigned int assists_check; + unsigned int rsvd2; + unsigned int assist_state; + unsigned int responder_addr; + unsigned int requestor_addr; + unsigned int path_info; + unsigned long long fr[32]; +}; + +/* + * architected results from PDC_PIM/transfer hpmc on a PA2.0 machine + * + * Note that PDC_PIM doesn't care whether or not wide mode was enabled + * so the results are different on PA1.1 vs. PA2.0 when in narrow mode. + * + * Note also that there are unarchitected results available, which + * are hversion dependent. Do a "ser pim 0 hpmc" after rebooting, since + * the firmware is probably the best way of printing hversion dependent + * data. + */ + +struct pdc_hpmc_pim_20 { /* PDC_PIM */ + unsigned long long gr[32]; + unsigned long long cr[32]; + unsigned long long sr[8]; + unsigned long long iasq_back; + unsigned long long iaoq_back; + unsigned int check_type; + unsigned int cpu_state; + unsigned int cache_check; + unsigned int tlb_check; + unsigned int bus_check; + unsigned int assists_check; + unsigned int assist_state; + unsigned int path_info; + unsigned long long responder_addr; + unsigned long long requestor_addr; + unsigned long long fr[32]; }; #endif /* !defined(__ASSEMBLY__) */ diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 649dc3eda448..eafd06ab59ef 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -9,8 +9,7 @@ obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \ ptrace.o hardware.o inventory.o drivers.o \ signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \ - process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \ - topology.o + process.o processor.o pdc_cons.o pdc_chassis.o unwind.o ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities @@ -30,5 +29,6 @@ obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o # only supported for PCX-W/U in 64-bit mode at the moment obj-$(CONFIG_64BIT) += perf.o perf_asm.o $(obj64-y) +obj-$(CONFIG_PARISC_CPU_TOPOLOGY) += topology.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 27a2dd616a7d..c46bf29ae412 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -91,7 +91,7 @@ static int pdc_console_setup(struct console *co, char *options) #define PDC_CONS_POLL_DELAY (30 * HZ / 1000) -static void pdc_console_poll(unsigned long unused); +static void pdc_console_poll(struct timer_list *unused); static DEFINE_TIMER(pdc_console_timer, pdc_console_poll); static struct tty_port tty_port; @@ -135,7 +135,7 @@ static const struct tty_operations pdc_console_tty_ops = { .chars_in_buffer = pdc_console_tty_chars_in_buffer, }; -static void pdc_console_poll(unsigned long unused) +static void pdc_console_poll(struct timer_list *unused) { int data, count = 0; diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index e120d63c1b28..45cc65902fce 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -184,6 +184,9 @@ static int __init processor_probe(struct parisc_device *dev) p->txn_addr = txn_addr; /* save CPU IRQ address */ p->cpu_num = cpu_info.cpu_num; p->cpu_loc = cpu_info.cpu_loc; + + store_cpu_topology(cpuid); + #ifdef CONFIG_SMP /* ** FIXME: review if any other initialization is clobbered @@ -325,6 +328,8 @@ int __init init_per_cpu(int cpunum) set_firmware_width(); ret = pdc_coproc_cfg(&coproc_cfg); + store_cpu_topology(cpunum); + if(ret >= 0 && coproc_cfg.ccr_functional) { mtctl(coproc_cfg.ccr_functional, 10); /* 10 == Coprocessor Control Reg */ @@ -388,6 +393,14 @@ show_cpuinfo (struct seq_file *m, void *v) boot_cpu_data.cpu_hz / 1000000, boot_cpu_data.cpu_hz % 1000000 ); +#ifdef CONFIG_PARISC_CPU_TOPOLOGY + seq_printf(m, "physical id\t: %d\n", + topology_physical_package_id(cpu)); + seq_printf(m, "siblings\t: %d\n", + cpumask_weight(topology_core_cpumask(cpu))); + seq_printf(m, "core id\t\t: %d\n", topology_core_id(cpu)); +#endif + seq_printf(m, "capabilities\t:"); if (boot_cpu_data.pdc.capabilities & PDC_MODEL_OS32) seq_puts(m, " os32"); diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index f7d0c3b33d70..0e9675f857a5 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -408,6 +408,8 @@ void __init start_parisc(void) cpunum = smp_processor_id(); + init_cpu_topology(); + set_firmware_width_unlocked(); ret = pdc_coproc_cfg_unlocked(&coproc_cfg); diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index f2a4038e275b..342073f44d3f 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -93,7 +93,6 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) unsigned long usp = (regs->gr[30] & ~(0x01UL)); unsigned long sigframe_size = PARISC_RT_SIGFRAME_SIZE; #ifdef CONFIG_64BIT - compat_sigset_t compat_set; struct compat_rt_sigframe __user * compat_frame; if (is_compat_task()) @@ -114,9 +113,8 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) if (is_compat_task()) { DBG(2,"sys_rt_sigreturn: ELF32 process.\n"); - if (__copy_from_user(&compat_set, &compat_frame->uc.uc_sigmask, sizeof(compat_set))) + if (get_compat_sigset(&set, &compat_frame->uc.uc_sigmask)) goto give_sigsegv; - sigset_32to64(&set,&compat_set); } else #endif { @@ -238,7 +236,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, int err = 0; #ifdef CONFIG_64BIT struct compat_rt_sigframe __user * compat_frame; - compat_sigset_t compat_set; #endif usp = (regs->gr[30] & ~(0x01UL)); @@ -261,8 +258,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &compat_frame->uc.uc_mcontext); err |= setup_sigcontext32(&compat_frame->uc.uc_mcontext, &compat_frame->regs, regs, in_syscall); - sigset_64to32(&compat_set,set); - err |= __copy_to_user(&compat_frame->uc.uc_sigmask, &compat_set, sizeof(compat_set)); + err |= put_compat_sigset(&compat_frame->uc.uc_sigmask, set, + sizeof(compat_sigset_t)); } else #endif { diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c index 9e0cb6a577d6..41afa9cd1f55 100644 --- a/arch/parisc/kernel/signal32.c +++ b/arch/parisc/kernel/signal32.c @@ -46,19 +46,6 @@ #define DBG(LEVEL, ...) #endif -inline void -sigset_32to64(sigset_t *s64, compat_sigset_t *s32) -{ - s64->sig[0] = s32->sig[0] | ((unsigned long)s32->sig[1] << 32); -} - -inline void -sigset_64to32(compat_sigset_t *s32, sigset_t *s64) -{ - s32->sig[0] = s64->sig[0] & 0xffffffffUL; - s32->sig[1] = (s64->sig[0] >> 32) & 0xffffffffUL; -} - long restore_sigcontext32(struct compat_sigcontext __user *sc, struct compat_regfile __user * rf, struct pt_regs *regs) diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index af51d4ccee42..719e7417732c 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -79,8 +79,6 @@ struct compat_rt_sigframe { #define FUNCTIONCALLFRAME32 48 #define PARISC_RT_SIGFRAME_SIZE32 (((sizeof(struct compat_rt_sigframe) + FUNCTIONCALLFRAME32) + SIGFRAME32) & -SIGFRAME32) -void sigset_32to64(sigset_t *s64, compat_sigset_t *s32); -void sigset_64to32(compat_sigset_t *s32, sigset_t *s64); long restore_sigcontext32(struct compat_sigcontext __user *sc, struct compat_regfile __user *rf, struct pt_regs *regs); diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 41e60a9c7db2..e775f80ae28c 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -690,15 +690,15 @@ cas_action: /* ELF32 Process entry path */ lws_compare_and_swap_2: #ifdef CONFIG_64BIT - /* Clip the input registers */ + /* Clip the input registers. We don't need to clip %r23 as we + only use it for word operations */ depdi 0, 31, 32, %r26 depdi 0, 31, 32, %r25 depdi 0, 31, 32, %r24 - depdi 0, 31, 32, %r23 #endif /* Check the validity of the size pointer */ - subi,>>= 4, %r23, %r0 + subi,>>= 3, %r23, %r0 b,n lws_exit_nosys /* Jump to the functions which will load the old and new values into diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c index f5159381fdd6..0a10e4ddc528 100644 --- a/arch/parisc/kernel/topology.c +++ b/arch/parisc/kernel/topology.c @@ -1,37 +1,142 @@ /* - * arch/parisc/kernel/topology.c - Populate sysfs with topology information + * arch/parisc/kernel/topology.c * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * Copyright (C) 2017 Helge Deller <deller@gmx.de> * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or - * NON INFRINGEMENT. See the GNU General Public License for more - * details. + * based on arch/arm/kernel/topology.c * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. */ -#include <linux/init.h> -#include <linux/smp.h> -#include <linux/cpu.h> -#include <linux/cache.h> +#include <linux/percpu.h> +#include <linux/sched.h> +#include <linux/sched/topology.h> -static DEFINE_PER_CPU(struct cpu, cpu_devices); +#include <asm/topology.h> -static int __init topology_init(void) + /* + * cpu topology table + */ +struct cputopo_parisc cpu_topology[NR_CPUS] __read_mostly; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) { - int num; + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_parisc *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + +static int dualcores_found __initdata; + +/* + * store_cpu_topology is called at boot when only one cpu is running + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, + * which prevents simultaneous write access to cpu_topology array + */ +void __init store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_parisc *cpuid_topo = &cpu_topology[cpuid]; + struct cpuinfo_parisc *p; + int max_socket = -1; + unsigned long cpu; + + /* If the cpu topology has been already set, just return */ + if (cpuid_topo->core_id != -1) + return; - for_each_present_cpu(num) { - register_cpu(&per_cpu(cpu_devices, num), num); + /* create cpu topology mapping */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = 0; + + p = &per_cpu(cpu_data, cpuid); + for_each_online_cpu(cpu) { + const struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu); + + if (cpu == cpuid) /* ignore current cpu */ + continue; + + if (cpuinfo->cpu_loc == p->cpu_loc) { + cpuid_topo->core_id = cpu_topology[cpu].core_id; + if (p->cpu_loc) { + cpuid_topo->core_id++; + cpuid_topo->socket_id = cpu_topology[cpu].socket_id; + dualcores_found = 1; + continue; + } + } + + if (cpuid_topo->socket_id == -1) + max_socket = max(max_socket, cpu_topology[cpu].socket_id); } - return 0; + + if (cpuid_topo->socket_id == -1) + cpuid_topo->socket_id = max_socket + 1; + + update_siblings_masks(cpuid); + + pr_info("CPU%u: thread %d, cpu %d, socket %d\n", + cpuid, cpu_topology[cpuid].thread_id, + cpu_topology[cpuid].core_id, + cpu_topology[cpuid].socket_id); } -subsys_initcall(topology_init); +static struct sched_domain_topology_level parisc_mc_topology[] = { +#ifdef CONFIG_SCHED_MC + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, +#endif + + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask and capacity */ + for_each_possible_cpu(cpu) { + struct cputopo_parisc *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->socket_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + } + smp_wmb(); + + /* Set scheduler topology descriptor */ + if (dualcores_found) + set_sched_topology(parisc_mc_topology); +} diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 9a677cd5997f..44697817ccc6 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1005,7 +1005,6 @@ static inline int pmd_protnone(pmd_t pmd) } #endif /* CONFIG_NUMA_BALANCING */ -#define __HAVE_ARCH_PMD_WRITE #define pmd_write(pmd) pte_write(pmd_pte(pmd)) #define __pmd_write(pmd) __pte_write(pmd_pte(pmd)) #define pmd_savedwrite(pmd) pte_savedwrite(pmd_pte(pmd)) diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index a035b1e5dfa7..8a2aecfe9b02 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -185,7 +185,6 @@ typedef struct compat_siginfo { } compat_siginfo_t; #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL /* * A pointer passed in from user mode. This should not diff --git a/arch/powerpc/include/asm/hmi.h b/arch/powerpc/include/asm/hmi.h index 85b7a1a21e22..9c14f7b5c46c 100644 --- a/arch/powerpc/include/asm/hmi.h +++ b/arch/powerpc/include/asm/hmi.h @@ -42,4 +42,8 @@ extern void wait_for_tb_resync(void); static inline void wait_for_subcore_guest_exit(void) { } static inline void wait_for_tb_resync(void) { } #endif + +struct pt_regs; +extern long hmi_handle_debugtrig(struct pt_regs *regs); + #endif /* __ASM_PPC64_HMI_H__ */ diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index a409177be8bd..f0461618bf7b 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -241,6 +241,7 @@ #define H_GET_HCA_INFO 0x1B8 #define H_GET_PERF_COUNT 0x1BC #define H_MANAGE_TRACE 0x1C0 +#define H_GET_CPU_CHARACTERISTICS 0x1C8 #define H_FREE_LOGICAL_LAN_BUFFER 0x1D4 #define H_QUERY_INT_STATE 0x1E4 #define H_POLL_PENDING 0x1D8 @@ -330,6 +331,17 @@ #define H_SIGNAL_SYS_RESET_ALL_OTHERS -2 /* >= 0 values are CPU number */ +/* H_GET_CPU_CHARACTERISTICS return values */ +#define H_CPU_CHAR_SPEC_BAR_ORI31 (1ull << 63) // IBM bit 0 +#define H_CPU_CHAR_BCCTRL_SERIALISED (1ull << 62) // IBM bit 1 +#define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 +#define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 +#define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 + +#define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 +#define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 +#define H_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ull << 61) // IBM bit 2 + /* Flag values used in H_REGISTER_PROC_TBL hcall */ #define PROC_TABLE_OP_MASK 0x18 #define PROC_TABLE_DEREG 0x10 @@ -436,6 +448,11 @@ static inline unsigned int get_longbusy_msecs(int longbusy_rc) } } +struct h_cpu_char_result { + u64 character; + u64 behaviour; +}; + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HVCALL_H */ diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h index 7f74c282710f..fad0e6ff460f 100644 --- a/arch/powerpc/include/asm/imc-pmu.h +++ b/arch/powerpc/include/asm/imc-pmu.h @@ -21,11 +21,6 @@ #include <asm/opal.h> /* - * For static allocation of some of the structures. - */ -#define IMC_MAX_PMUS 32 - -/* * Compatibility macros for IMC devices */ #define IMC_DTB_COMPAT "ibm,opal-in-memory-counters" @@ -125,4 +120,5 @@ enum { extern int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id); extern void thread_imc_disable(void); +extern int get_max_nest_dev(void); #endif /* __ASM_POWERPC_IMC_PMU_H */ diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 7f01b22fa6cb..55eddf50d149 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -326,4 +326,18 @@ static inline long plapr_signal_sys_reset(long cpu) return plpar_hcall_norets(H_SIGNAL_SYS_RESET, cpu); } +static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) +{ + unsigned long retbuf[PLPAR_HCALL_BUFSIZE]; + long rc; + + rc = plpar_hcall(H_GET_CPU_CHARACTERISTICS, retbuf); + if (rc == H_SUCCESS) { + p->character = retbuf[0]; + p->behaviour = retbuf[1]; + } + + return rc; +} + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index b779f3ccd412..14e41b843952 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -432,8 +432,9 @@ #define SPRN_LPID 0x13F /* Logical Partition Identifier */ #endif #define LPID_RSVD 0x3ff /* Reserved LPID for partn switching */ -#define SPRN_HMER 0x150 /* Hardware m? error recovery */ -#define SPRN_HMEER 0x151 /* Hardware m? enable error recovery */ +#define SPRN_HMER 0x150 /* Hypervisor maintenance exception reg */ +#define HMER_DEBUG_TRIG (1ul << (63 - 17)) /* Debug trigger */ +#define SPRN_HMEER 0x151 /* Hyp maintenance exception enable reg */ #define SPRN_PCR 0x152 /* Processor compatibility register */ #define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */ #define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */ diff --git a/arch/powerpc/include/asm/xive-regs.h b/arch/powerpc/include/asm/xive-regs.h index 1d3f2be5ae39..fa4288822b68 100644 --- a/arch/powerpc/include/asm/xive-regs.h +++ b/arch/powerpc/include/asm/xive-regs.h @@ -10,6 +10,41 @@ #define _ASM_POWERPC_XIVE_REGS_H /* + * "magic" Event State Buffer (ESB) MMIO offsets. + * + * Each interrupt source has a 2-bit state machine called ESB + * which can be controlled by MMIO. It's made of 2 bits, P and + * Q. P indicates that an interrupt is pending (has been sent + * to a queue and is waiting for an EOI). Q indicates that the + * interrupt has been triggered while pending. + * + * This acts as a coalescing mechanism in order to guarantee + * that a given interrupt only occurs at most once in a queue. + * + * When doing an EOI, the Q bit will indicate if the interrupt + * needs to be re-triggered. + * + * The following offsets into the ESB MMIO allow to read or + * manipulate the PQ bits. They must be used with an 8-bytes + * load instruction. They all return the previous state of the + * interrupt (atomically). + * + * Additionally, some ESB pages support doing an EOI via a + * store at 0 and some ESBs support doing a trigger via a + * separate trigger page. + */ +#define XIVE_ESB_STORE_EOI 0x400 /* Store */ +#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ +#define XIVE_ESB_GET 0x800 /* Load */ +#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ +#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ +#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ +#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ + +#define XIVE_ESB_VAL_P 0x2 +#define XIVE_ESB_VAL_Q 0x1 + +/* * Thread Management (aka "TM") registers */ diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h index 371fbebf1ec9..b619a5585cd6 100644 --- a/arch/powerpc/include/asm/xive.h +++ b/arch/powerpc/include/asm/xive.h @@ -58,6 +58,9 @@ struct xive_irq_data { #define XIVE_IRQ_FLAG_EOI_FW 0x10 #define XIVE_IRQ_FLAG_H_INT_ESB 0x20 +/* Special flag set by KVM for excalation interrupts */ +#define XIVE_IRQ_NO_EOI 0x80 + #define XIVE_INVALID_CHIP_ID -1 /* A queue tracking structure in a CPU */ @@ -72,41 +75,6 @@ struct xive_q { atomic_t pending_count; }; -/* - * "magic" Event State Buffer (ESB) MMIO offsets. - * - * Each interrupt source has a 2-bit state machine called ESB - * which can be controlled by MMIO. It's made of 2 bits, P and - * Q. P indicates that an interrupt is pending (has been sent - * to a queue and is waiting for an EOI). Q indicates that the - * interrupt has been triggered while pending. - * - * This acts as a coalescing mechanism in order to guarantee - * that a given interrupt only occurs at most once in a queue. - * - * When doing an EOI, the Q bit will indicate if the interrupt - * needs to be re-triggered. - * - * The following offsets into the ESB MMIO allow to read or - * manipulate the PQ bits. They must be used with an 8-bytes - * load instruction. They all return the previous state of the - * interrupt (atomically). - * - * Additionally, some ESB pages support doing an EOI via a - * store at 0 and some ESBs support doing a trigger via a - * separate trigger page. - */ -#define XIVE_ESB_STORE_EOI 0x400 /* Store */ -#define XIVE_ESB_LOAD_EOI 0x000 /* Load */ -#define XIVE_ESB_GET 0x800 /* Load */ -#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */ -#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */ -#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */ -#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */ - -#define XIVE_ESB_VAL_P 0x2 -#define XIVE_ESB_VAL_Q 0x1 - /* Global enable flags for the XIVE support */ extern bool __xive_enabled; diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 602e0fde19b4..8bdc2f96c5d6 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -735,8 +735,8 @@ static __init void cpufeatures_cpu_quirks(void) */ if ((version & 0xffffff00) == 0x004e0100) cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD1; - else if ((version & 0xffffefff) == 0x004e0200) - cur_cpu_spec->cpu_features &= ~CPU_FTR_POWER9_DD2_1; + else if ((version & 0xffffefff) == 0x004e0201) + cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1; } static void __init cpufeatures_setup_finished(void) diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 742e4658c5dc..d2fecaec4fec 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -495,37 +495,123 @@ long machine_check_early(struct pt_regs *regs) return handled; } -long hmi_exception_realmode(struct pt_regs *regs) +/* Possible meanings for HMER_DEBUG_TRIG bit being set on POWER9 */ +static enum { + DTRIG_UNKNOWN, + DTRIG_VECTOR_CI, /* need to emulate vector CI load instr */ + DTRIG_SUSPEND_ESCAPE, /* need to escape from TM suspend mode */ +} hmer_debug_trig_function; + +static int init_debug_trig_function(void) { - __this_cpu_inc(irq_stat.hmi_exceptions); - -#ifdef CONFIG_PPC_BOOK3S_64 - /* Workaround for P9 vector CI loads (see p9_hmi_special_emu) */ - if (pvr_version_is(PVR_POWER9)) { - unsigned long hmer = mfspr(SPRN_HMER); - - /* Do we have the debug bit set */ - if (hmer & PPC_BIT(17)) { - hmer &= ~PPC_BIT(17); - mtspr(SPRN_HMER, hmer); - - /* - * Now to avoid problems with soft-disable we - * only do the emulation if we are coming from - * user space - */ - if (user_mode(regs)) - local_paca->hmi_p9_special_emu = 1; - - /* - * Don't bother going to OPAL if that's the - * only relevant bit. - */ - if (!(hmer & mfspr(SPRN_HMEER))) - return local_paca->hmi_p9_special_emu; + int pvr; + struct device_node *cpun; + struct property *prop = NULL; + const char *str; + + /* First look in the device tree */ + preempt_disable(); + cpun = of_get_cpu_node(smp_processor_id(), NULL); + if (cpun) { + of_property_for_each_string(cpun, "ibm,hmi-special-triggers", + prop, str) { + if (strcmp(str, "bit17-vector-ci-load") == 0) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + else if (strcmp(str, "bit17-tm-suspend-escape") == 0) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; } + of_node_put(cpun); + } + preempt_enable(); + + /* If we found the property, don't look at PVR */ + if (prop) + goto out; + + pvr = mfspr(SPRN_PVR); + /* Check for POWER9 Nimbus (scale-out) */ + if ((PVR_VER(pvr) == PVR_POWER9) && (pvr & 0xe000) == 0) { + /* DD2.2 and later */ + if ((pvr & 0xfff) >= 0x202) + hmer_debug_trig_function = DTRIG_SUSPEND_ESCAPE; + /* DD2.0 and DD2.1 - used for vector CI load emulation */ + else if ((pvr & 0xfff) >= 0x200) + hmer_debug_trig_function = DTRIG_VECTOR_CI; + } + + out: + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + pr_debug("HMI debug trigger used for vector CI load\n"); + break; + case DTRIG_SUSPEND_ESCAPE: + pr_debug("HMI debug trigger used for TM suspend escape\n"); + break; + default: + break; } -#endif /* CONFIG_PPC_BOOK3S_64 */ + return 0; +} +__initcall(init_debug_trig_function); + +/* + * Handle HMIs that occur as a result of a debug trigger. + * Return values: + * -1 means this is not a HMI cause that we know about + * 0 means no further handling is required + * 1 means further handling is required + */ +long hmi_handle_debugtrig(struct pt_regs *regs) +{ + unsigned long hmer = mfspr(SPRN_HMER); + long ret = 0; + + /* HMER_DEBUG_TRIG bit is used for various workarounds on P9 */ + if (!((hmer & HMER_DEBUG_TRIG) + && hmer_debug_trig_function != DTRIG_UNKNOWN)) + return -1; + + hmer &= ~HMER_DEBUG_TRIG; + /* HMER is a write-AND register */ + mtspr(SPRN_HMER, ~HMER_DEBUG_TRIG); + + switch (hmer_debug_trig_function) { + case DTRIG_VECTOR_CI: + /* + * Now to avoid problems with soft-disable we + * only do the emulation if we are coming from + * host user space + */ + if (regs && user_mode(regs)) + ret = local_paca->hmi_p9_special_emu = 1; + + break; + + default: + break; + } + + /* + * See if any other HMI causes remain to be handled + */ + if (hmer & mfspr(SPRN_HMEER)) + return -1; + + return ret; +} + +/* + * Return values: + */ +long hmi_exception_realmode(struct pt_regs *regs) +{ + int ret; + + __this_cpu_inc(irq_stat.hmi_exceptions); + + ret = hmi_handle_debugtrig(regs); + if (ret >= 0) + return ret; wait_for_subcore_guest_exit(); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 8ac0bd2bddb0..3280953a82cf 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -623,7 +623,9 @@ BEGIN_FTR_SECTION * NOTE, we rely on r0 being 0 from above. */ mtspr SPRN_IAMR,r0 +BEGIN_FTR_SECTION_NESTED(42) mtspr SPRN_AMOR,r0 +END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) /* save regs for local vars on new stack. diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index bfdd783e3916..5acb5a176dbe 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1569,16 +1569,22 @@ void arch_release_task_struct(struct task_struct *t) */ int set_thread_tidr(struct task_struct *t) { + int rc; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) return -EINVAL; if (t != current) return -EINVAL; - t->thread.tidr = assign_thread_tidr(); - if (t->thread.tidr < 0) - return t->thread.tidr; + if (t->thread.tidr) + return 0; + + rc = assign_thread_tidr(); + if (rc < 0) + return rc; + t->thread.tidr = rc; mtspr(SPRN_TIDR, t->thread.tidr); return 0; diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 16d16583cf11..9ffd73296f64 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -94,40 +94,13 @@ */ static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) { - compat_sigset_t cset; - - switch (_NSIG_WORDS) { - case 4: cset.sig[6] = set->sig[3] & 0xffffffffull; - cset.sig[7] = set->sig[3] >> 32; - case 3: cset.sig[4] = set->sig[2] & 0xffffffffull; - cset.sig[5] = set->sig[2] >> 32; - case 2: cset.sig[2] = set->sig[1] & 0xffffffffull; - cset.sig[3] = set->sig[1] >> 32; - case 1: cset.sig[0] = set->sig[0] & 0xffffffffull; - cset.sig[1] = set->sig[0] >> 32; - } - return copy_to_user(uset, &cset, sizeof(*uset)); + return put_compat_sigset(uset, set, sizeof(*uset)); } static inline int get_sigset_t(sigset_t *set, const compat_sigset_t __user *uset) { - compat_sigset_t s32; - - if (copy_from_user(&s32, uset, sizeof(*uset))) - return -EFAULT; - - /* - * Swap the 2 words of the 64-bit sigset_t (they are stored - * in the "wrong" endian in 32-bit user storage). - */ - switch (_NSIG_WORDS) { - case 4: set->sig[3] = s32.sig[6] | (((long)s32.sig[7]) << 32); - case 3: set->sig[2] = s32.sig[4] | (((long)s32.sig[5]) << 32); - case 2: set->sig[1] = s32.sig[2] | (((long)s32.sig[3]) << 32); - case 1: set->sig[0] = s32.sig[0] | (((long)s32.sig[1]) << 32); - } - return 0; + return get_compat_sigset(set, uset); } #define to_user_ptr(p) ptr_to_compat(p) diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index e3c5f75d137c..8cdd852aedd1 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -188,7 +188,7 @@ static void tau_timeout(void * info) local_irq_restore(flags); } -static void tau_timeout_smp(unsigned long unused) +static void tau_timeout_smp(struct timer_list *unused) { /* schedule ourselves to be run again */ @@ -230,7 +230,7 @@ int __init TAU_init(void) /* first, set up the window shrinking timer */ - setup_timer(&tau_timer, tau_timeout_smp, 0UL); + timer_setup(&tau_timer, tau_timeout_smp, 0); tau_timer.expires = jiffies + shrink_timer; add_timer(&tau_timer); diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index c356f9a40b24..c296343d0dcc 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -268,17 +268,19 @@ static void kvmppc_tb_resync_done(void) * secondary threads to proceed. * - All secondary threads will eventually call opal hmi handler on * their exit path. + * + * Returns 1 if the timebase offset should be applied, 0 if not. */ long kvmppc_realmode_hmi_handler(void) { - int ptid = local_paca->kvm_hstate.ptid; bool resync_req; - /* This is only called on primary thread. */ - BUG_ON(ptid != 0); __this_cpu_inc(irq_stat.hmi_exceptions); + if (hmi_handle_debugtrig(NULL) >= 0) + return 1; + /* * By now primary thread has already completed guest->host * partition switch but haven't signaled secondaries yet. diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 30ece4cebaf5..7daf21be33d0 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1909,16 +1909,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) bne 27f bl kvmppc_realmode_hmi_handler nop + cmpdi r3, 0 li r12, BOOK3S_INTERRUPT_HMI /* - * At this point kvmppc_realmode_hmi_handler would have resync-ed - * the TB. Hence it is not required to subtract guest timebase - * offset from timebase. So, skip it. + * At this point kvmppc_realmode_hmi_handler may have resync-ed + * the TB, and if it has, we must not subtract the guest timebase + * offset from the timebase. So, skip it. * * Also, do not call kvmppc_subcore_exit_guest() because it has * been invoked as part of kvmppc_realmode_hmi_handler(). */ - b 30f + beq 30f 27: /* Subtract timebase offset from timebase */ diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 071b87ee682f..83b485810aea 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -599,9 +599,9 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu) spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); } -void kvmppc_watchdog_func(unsigned long data) +void kvmppc_watchdog_func(struct timer_list *t) { - struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + struct kvm_vcpu *vcpu = from_timer(vcpu, t, arch.wdt_timer); u32 tsr, new_tsr; int final; @@ -1412,8 +1412,7 @@ int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) { /* setup watchdog timer once */ spin_lock_init(&vcpu->arch.wdt_lock); - setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, - (unsigned long)vcpu); + timer_setup(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, 0); /* * Clear DBSR.MRR to avoid guest debug interrupt as diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 7c9e45f54186..77eb25abc601 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1403,7 +1403,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { int r; - sigset_t sigsaved; if (vcpu->mmio_needed) { vcpu->mmio_needed = 0; @@ -1444,16 +1443,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (run->immediate_exit) r = -EINTR; else r = kvmppc_vcpu_run(run, vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index c9de03e0c1f1..d469224c4ada 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -21,6 +21,7 @@ #include <asm/tlbflush.h> #include <asm/page.h> #include <asm/code-patching.h> +#include <asm/setup.h> static int __patch_instruction(unsigned int *addr, unsigned int instr) { @@ -146,11 +147,8 @@ int patch_instruction(unsigned int *addr, unsigned int instr) * During early early boot patch_instruction is called * when text_poke_area is not ready, but we still need * to allow patching. We just do the plain old patching - * We use slab_is_available and per cpu read * via this_cpu_read - * of text_poke_area. Per-CPU areas might not be up early - * this can create problems with just using this_cpu_read() */ - if (!slab_is_available() || !this_cpu_read(text_poke_area)) + if (!this_cpu_read(*PTRRELOC(&text_poke_area))) return __patch_instruction(addr, instr); local_irq_save(flags); diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 3848af167df9..640cf566e986 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -47,7 +47,8 @@ DEFINE_RAW_SPINLOCK(native_tlbie_lock); -static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +static inline unsigned long ___tlbie(unsigned long vpn, int psize, + int apsize, int ssize) { unsigned long va; unsigned int penc; @@ -100,7 +101,15 @@ static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) : "memory"); break; } - trace_tlbie(0, 0, va, 0, 0, 0, 0); + return va; +} + +static inline void __tlbie(unsigned long vpn, int psize, int apsize, int ssize) +{ + unsigned long rb; + + rb = ___tlbie(vpn, psize, apsize, ssize); + trace_tlbie(0, 0, rb, 0, 0, 0, 0); } static inline void __tlbiel(unsigned long vpn, int psize, int apsize, int ssize) @@ -652,7 +661,7 @@ static void native_hpte_clear(void) if (hpte_v & HPTE_V_VALID) { hpte_decode(hptep, slot, &psize, &apsize, &ssize, &vpn); hptep->v = 0; - __tlbie(vpn, psize, apsize, ssize); + ___tlbie(vpn, psize, apsize, ssize); } } diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index 564fff06f5c1..23ec2c5e3b78 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -122,7 +122,8 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice) return !slice_area_is_free(mm, start, end - start); } -static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret) +static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret, + unsigned long high_limit) { unsigned long i; @@ -133,15 +134,16 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret) if (!slice_low_has_vma(mm, i)) ret->low_slices |= 1u << i; - if (mm->context.slb_addr_limit <= SLICE_LOW_TOP) + if (high_limit <= SLICE_LOW_TOP) return; - for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) + for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) if (!slice_high_has_vma(mm, i)) __set_bit(i, ret->high_slices); } -static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret) +static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_mask *ret, + unsigned long high_limit) { unsigned char *hpsizes; int index, mask_index; @@ -156,8 +158,11 @@ static void slice_mask_for_size(struct mm_struct *mm, int psize, struct slice_ma if (((lpsizes >> (i * 4)) & 0xf) == psize) ret->low_slices |= 1u << i; + if (high_limit <= SLICE_LOW_TOP) + return; + hpsizes = mm->context.high_slices_psize; - for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) { + for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++) { mask_index = i & 0x1; index = i >> 1; if (((hpsizes[index] >> (mask_index * 4)) & 0xf) == psize) @@ -169,6 +174,10 @@ static int slice_check_fit(struct mm_struct *mm, struct slice_mask mask, struct slice_mask available) { DECLARE_BITMAP(result, SLICE_NUM_HIGH); + /* + * Make sure we just do bit compare only to the max + * addr limit and not the full bit map size. + */ unsigned long slice_count = GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); bitmap_and(result, mask.high_slices, @@ -472,7 +481,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, /* First make up a "good" mask of slices that have the right size * already */ - slice_mask_for_size(mm, psize, &good_mask); + slice_mask_for_size(mm, psize, &good_mask, high_limit); slice_print_mask(" good_mask", good_mask); /* @@ -497,7 +506,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, #ifdef CONFIG_PPC_64K_PAGES /* If we support combo pages, we can allow 64k pages in 4k slices */ if (psize == MMU_PAGE_64K) { - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask); + slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); if (fixed) slice_or_mask(&good_mask, &compat_mask); } @@ -530,11 +539,11 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len, return newaddr; } } - - /* We don't fit in the good mask, check what other slices are + /* + * We don't fit in the good mask, check what other slices are * empty and thus can be converted */ - slice_mask_for_free(mm, &potential_mask); + slice_mask_for_free(mm, &potential_mask, high_limit); slice_or_mask(&potential_mask, &good_mask); slice_print_mask(" potential", potential_mask); @@ -744,17 +753,18 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, { struct slice_mask mask, available; unsigned int psize = mm->context.user_psize; + unsigned long high_limit = mm->context.slb_addr_limit; if (radix_enabled()) return 0; slice_range_to_mask(addr, len, &mask); - slice_mask_for_size(mm, psize, &available); + slice_mask_for_size(mm, psize, &available, high_limit); #ifdef CONFIG_PPC_64K_PAGES /* We need to account for 4k slices too */ if (psize == MMU_PAGE_64K) { struct slice_mask compat_mask; - slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask); + slice_mask_for_size(mm, MMU_PAGE_4K, &compat_mask, high_limit); slice_or_mask(&available, &compat_mask); } #endif diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index 264b6ab11978..b90a21bc2f3f 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -451,7 +451,7 @@ static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl) * This routine will alternate loading the virtual counters for * virtual CPUs */ -static void cell_virtual_cntr(unsigned long data) +static void cell_virtual_cntr(struct timer_list *unused) { int i, prev_hdw_thread, next_hdw_thread; u32 cpu; @@ -555,7 +555,7 @@ static void cell_virtual_cntr(unsigned long data) static void start_virt_cntrs(void) { - setup_timer(&timer_virt_cntr, cell_virtual_cntr, 0UL); + timer_setup(&timer_virt_cntr, cell_virtual_cntr, 0); timer_virt_cntr.expires = jiffies + HZ / 10; add_timer(&timer_virt_cntr); } @@ -587,7 +587,7 @@ static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr, * periodically based on kernel timer to switch which SPU is * being monitored in a round robbin fashion. */ -static void spu_evnt_swap(unsigned long data) +static void spu_evnt_swap(struct timer_list *unused) { int node; int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx; @@ -677,7 +677,7 @@ static void spu_evnt_swap(unsigned long data) static void start_spu_event_swap(void) { - setup_timer(&timer_spu_event_swap, spu_evnt_swap, 0UL); + timer_setup(&timer_spu_event_swap, spu_evnt_swap, 0); timer_spu_event_swap.expires = jiffies + HZ / 25; add_timer(&timer_spu_event_swap); } diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index 36344117c680..0ead3cd73caa 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -26,7 +26,7 @@ */ static DEFINE_MUTEX(nest_init_lock); static DEFINE_PER_CPU(struct imc_pmu_ref *, local_nest_imc_refc); -static struct imc_pmu *per_nest_pmu_arr[IMC_MAX_PMUS]; +static struct imc_pmu **per_nest_pmu_arr; static cpumask_t nest_imc_cpumask; struct imc_pmu_ref *nest_imc_refc; static int nest_pmus; @@ -286,13 +286,14 @@ static struct imc_pmu_ref *get_nest_pmu_ref(int cpu) static void nest_change_cpu_context(int old_cpu, int new_cpu) { struct imc_pmu **pn = per_nest_pmu_arr; - int i; if (old_cpu < 0 || new_cpu < 0) return; - for (i = 0; *pn && i < IMC_MAX_PMUS; i++, pn++) + while (*pn) { perf_pmu_migrate_context(&(*pn)->pmu, old_cpu, new_cpu); + pn++; + } } static int ppc_nest_imc_cpu_offline(unsigned int cpu) @@ -467,7 +468,7 @@ static int nest_imc_event_init(struct perf_event *event) * Nest HW counter memory resides in a per-chip reserve-memory (HOMER). * Get the base memory addresss for this cpu. */ - chip_id = topology_physical_package_id(event->cpu); + chip_id = cpu_to_chip_id(event->cpu); pcni = pmu->mem_info; do { if (pcni->id == chip_id) { @@ -524,19 +525,19 @@ static int nest_imc_event_init(struct perf_event *event) */ static int core_imc_mem_init(int cpu, int size) { - int phys_id, rc = 0, core_id = (cpu / threads_per_core); + int nid, rc = 0, core_id = (cpu / threads_per_core); struct imc_mem_info *mem_info; /* * alloc_pages_node() will allocate memory for core in the * local node only. */ - phys_id = topology_physical_package_id(cpu); + nid = cpu_to_node(cpu); mem_info = &core_imc_pmu->mem_info[core_id]; mem_info->id = core_id; /* We need only vbase for core counters */ - mem_info->vbase = page_address(alloc_pages_node(phys_id, + mem_info->vbase = page_address(alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | __GFP_NOWARN, get_order(size))); if (!mem_info->vbase) @@ -797,14 +798,14 @@ static int core_imc_event_init(struct perf_event *event) static int thread_imc_mem_alloc(int cpu_id, int size) { u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id); - int phys_id = topology_physical_package_id(cpu_id); + int nid = cpu_to_node(cpu_id); if (!local_mem) { /* * This case could happen only once at start, since we dont * free the memory in cpu offline path. */ - local_mem = page_address(alloc_pages_node(phys_id, + local_mem = page_address(alloc_pages_node(nid, GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE | __GFP_NOWARN, get_order(size))); if (!local_mem) @@ -1194,6 +1195,7 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr) kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]->attrs); kfree(pmu_ptr->attr_groups[IMC_EVENT_ATTR]); kfree(pmu_ptr); + kfree(per_nest_pmu_arr); return; } @@ -1218,6 +1220,13 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent, return -ENOMEM; /* Needed for hotplug/migration */ + if (!per_nest_pmu_arr) { + per_nest_pmu_arr = kcalloc(get_max_nest_dev() + 1, + sizeof(struct imc_pmu *), + GFP_KERNEL); + if (!per_nest_pmu_arr) + return -ENOMEM; + } per_nest_pmu_arr[pmu_index] = pmu_ptr; break; case IMC_DOMAIN_CORE: diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 1fbb5da17dd2..9033c8194eda 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -992,13 +992,13 @@ static void spu_calc_load(void) CALC_LOAD(spu_avenrun[2], EXP_15, active_tasks); } -static void spusched_wake(unsigned long data) +static void spusched_wake(struct timer_list *unused) { mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK); wake_up_process(spusched_task); } -static void spuloadavg_wake(unsigned long data) +static void spuloadavg_wake(struct timer_list *unused) { mod_timer(&spuloadavg_timer, jiffies + LOAD_FREQ); spu_calc_load(); @@ -1093,7 +1093,7 @@ static int show_spu_loadavg(struct seq_file *s, void *private) LOAD_INT(c), LOAD_FRAC(c), count_active_contexts(), atomic_read(&nr_spu_contexts), - task_active_pid_ns(current)->last_pid); + idr_get_cursor(&task_active_pid_ns(current)->idr)); return 0; } @@ -1124,8 +1124,8 @@ int __init spu_sched_init(void) } spin_lock_init(&spu_prio->runq_lock); - setup_timer(&spusched_timer, spusched_wake, 0); - setup_timer(&spuloadavg_timer, spuloadavg_wake, 0); + timer_setup(&spusched_timer, spusched_wake, 0); + timer_setup(&spuloadavg_timer, spuloadavg_wake, 0); spusched_task = kthread_run(spusched_thread, NULL, "spusched"); if (IS_ERR(spusched_task)) { diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c index 39a1d4225e0f..3408f315ef48 100644 --- a/arch/powerpc/platforms/powermac/low_i2c.c +++ b/arch/powerpc/platforms/powermac/low_i2c.c @@ -361,9 +361,9 @@ static irqreturn_t kw_i2c_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static void kw_i2c_timeout(unsigned long data) +static void kw_i2c_timeout(struct timer_list *t) { - struct pmac_i2c_host_kw *host = (struct pmac_i2c_host_kw *)data; + struct pmac_i2c_host_kw *host = from_timer(host, t, timeout_timer); unsigned long flags; spin_lock_irqsave(&host->lock, flags); @@ -513,7 +513,7 @@ static struct pmac_i2c_host_kw *__init kw_i2c_host_init(struct device_node *np) mutex_init(&host->mutex); init_completion(&host->complete); spin_lock_init(&host->lock); - setup_timer(&host->timeout_timer, kw_i2c_timeout, (unsigned long)host); + timer_setup(&host->timeout_timer, kw_i2c_timeout, 0); psteps = of_get_property(np, "AAPL,address-step", NULL); steps = psteps ? (*psteps) : 0x10; diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 21f6531fae20..465ea105b771 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -153,6 +153,22 @@ static void disable_core_pmu_counters(void) put_online_cpus(); } +int get_max_nest_dev(void) +{ + struct device_node *node; + u32 pmu_units = 0, type; + + for_each_compatible_node(node, NULL, IMC_DTB_UNIT_COMPAT) { + if (of_property_read_u32(node, "type", &type)) + continue; + + if (type == IMC_TYPE_CHIP) + pmu_units++; + } + + return pmu_units; +} + static int opal_imc_counters_probe(struct platform_device *pdev) { struct device_node *imc_dev = pdev->dev.of_node; @@ -191,8 +207,10 @@ static int opal_imc_counters_probe(struct platform_device *pdev) break; } - if (!imc_pmu_create(imc_dev, pmu_count, domain)) - pmu_count++; + if (!imc_pmu_create(imc_dev, pmu_count, domain)) { + if (domain == IMC_DOMAIN_NEST) + pmu_count++; + } } return 0; diff --git a/arch/powerpc/platforms/powernv/vas.c b/arch/powerpc/platforms/powernv/vas.c index c488621dbec3..aebbe95c9230 100644 --- a/arch/powerpc/platforms/powernv/vas.c +++ b/arch/powerpc/platforms/powernv/vas.c @@ -135,6 +135,7 @@ int chip_to_vas_id(int chipid) } return -1; } +EXPORT_SYMBOL(chip_to_vas_id); static int vas_probe(struct platform_device *pdev) { diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index a3b8d7d1316e..2547b6021e6a 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d) * EOI the source if it hasn't been disabled and hasn't * been passed-through to a KVM guest */ - if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d)) + if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) && + !(xd->flags & XIVE_IRQ_NO_EOI)) xive_do_source_eoi(irqd_to_hwirq(d), xd); /* diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index 18158be62a2b..970460a0b492 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -40,6 +40,7 @@ generic-y += resource.h generic-y += scatterlist.h generic-y += sections.h generic-y += sembuf.h +generic-y += serial.h generic-y += setup.h generic-y += shmbuf.h generic-y += shmparam.h diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 6cbbb6a68d76..5ad4cb622bed 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -58,17 +58,17 @@ #endif #if (__SIZEOF_INT__ == 4) -#define INT __ASM_STR(.word) -#define SZINT __ASM_STR(4) -#define LGINT __ASM_STR(2) +#define RISCV_INT __ASM_STR(.word) +#define RISCV_SZINT __ASM_STR(4) +#define RISCV_LGINT __ASM_STR(2) #else #error "Unexpected __SIZEOF_INT__" #endif #if (__SIZEOF_SHORT__ == 2) -#define SHORT __ASM_STR(.half) -#define SZSHORT __ASM_STR(2) -#define LGSHORT __ASM_STR(1) +#define RISCV_SHORT __ASM_STR(.half) +#define RISCV_SZSHORT __ASM_STR(2) +#define RISCV_LGSHORT __ASM_STR(1) #else #error "Unexpected __SIZEOF_SHORT__" #endif diff --git a/arch/riscv/include/asm/atomic.h b/arch/riscv/include/asm/atomic.h index e2e37c57cbeb..e65d1cd89e28 100644 --- a/arch/riscv/include/asm/atomic.h +++ b/arch/riscv/include/asm/atomic.h @@ -50,30 +50,30 @@ static __always_inline void atomic64_set(atomic64_t *v, long i) * have the AQ or RL bits set. These don't return anything, so there's only * one version to worry about. */ -#define ATOMIC_OP(op, asm_op, c_op, I, asm_type, c_type, prefix) \ -static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ -{ \ - __asm__ __volatile__ ( \ - "amo" #asm_op "." #asm_type " zero, %1, %0" \ - : "+A" (v->counter) \ - : "r" (I) \ - : "memory"); \ +#define ATOMIC_OP(op, asm_op, I, asm_type, c_type, prefix) \ +static __always_inline void atomic##prefix##_##op(c_type i, atomic##prefix##_t *v) \ +{ \ + __asm__ __volatile__ ( \ + "amo" #asm_op "." #asm_type " zero, %1, %0" \ + : "+A" (v->counter) \ + : "r" (I) \ + : "memory"); \ } #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, w, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I) \ + ATOMIC_OP (op, asm_op, I, w, int, ) \ + ATOMIC_OP (op, asm_op, I, d, long, 64) #endif -ATOMIC_OPS(add, add, +, i) -ATOMIC_OPS(sub, add, +, -i) -ATOMIC_OPS(and, and, &, i) -ATOMIC_OPS( or, or, |, i) -ATOMIC_OPS(xor, xor, ^, i) +ATOMIC_OPS(add, add, i) +ATOMIC_OPS(sub, add, -i) +ATOMIC_OPS(and, and, i) +ATOMIC_OPS( or, or, i) +ATOMIC_OPS(xor, xor, i) #undef ATOMIC_OP #undef ATOMIC_OPS @@ -83,7 +83,7 @@ ATOMIC_OPS(xor, xor, ^, i) * There's two flavors of these: the arithmatic ops have both fetch and return * versions, while the logical ops only have fetch versions. */ -#define ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, asm_type, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, asm_type, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op##c_or(c_type i, atomic##prefix##_t *v) \ { \ register c_type ret; \ @@ -103,13 +103,13 @@ static __always_inline c_type atomic##prefix##_##op##_return##c_or(c_type i, ato #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, w, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, asm_or, c_or, d, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, asm_or, c_or, d, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) #endif @@ -126,28 +126,28 @@ ATOMIC_OPS(sub, add, +, -i, .aqrl, ) #undef ATOMIC_OPS #ifdef CONFIG_GENERIC_ATOMIC64 -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) #else -#define ATOMIC_OPS(op, asm_op, c_op, I, asm_or, c_or) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, w, int, ) \ - ATOMIC_FETCH_OP(op, asm_op, c_op, I, asm_or, c_or, d, long, 64) +#define ATOMIC_OPS(op, asm_op, I, asm_or, c_or) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, w, int, ) \ + ATOMIC_FETCH_OP(op, asm_op, I, asm_or, c_or, d, long, 64) #endif -ATOMIC_OPS(and, and, &, i, , _relaxed) -ATOMIC_OPS(and, and, &, i, .aq , _acquire) -ATOMIC_OPS(and, and, &, i, .rl , _release) -ATOMIC_OPS(and, and, &, i, .aqrl, ) +ATOMIC_OPS(and, and, i, , _relaxed) +ATOMIC_OPS(and, and, i, .aq , _acquire) +ATOMIC_OPS(and, and, i, .rl , _release) +ATOMIC_OPS(and, and, i, .aqrl, ) -ATOMIC_OPS( or, or, |, i, , _relaxed) -ATOMIC_OPS( or, or, |, i, .aq , _acquire) -ATOMIC_OPS( or, or, |, i, .rl , _release) -ATOMIC_OPS( or, or, |, i, .aqrl, ) +ATOMIC_OPS( or, or, i, , _relaxed) +ATOMIC_OPS( or, or, i, .aq , _acquire) +ATOMIC_OPS( or, or, i, .rl , _release) +ATOMIC_OPS( or, or, i, .aqrl, ) -ATOMIC_OPS(xor, xor, ^, i, , _relaxed) -ATOMIC_OPS(xor, xor, ^, i, .aq , _acquire) -ATOMIC_OPS(xor, xor, ^, i, .rl , _release) -ATOMIC_OPS(xor, xor, ^, i, .aqrl, ) +ATOMIC_OPS(xor, xor, i, , _relaxed) +ATOMIC_OPS(xor, xor, i, .aq , _acquire) +ATOMIC_OPS(xor, xor, i, .rl , _release) +ATOMIC_OPS(xor, xor, i, .aqrl, ) #undef ATOMIC_OPS @@ -182,13 +182,13 @@ ATOMIC_OPS(add_negative, add, <, 0) #undef ATOMIC_OP #undef ATOMIC_OPS -#define ATOMIC_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_OP(op, func_op, I, c_type, prefix) \ static __always_inline void atomic##prefix##_##op(atomic##prefix##_t *v) \ { \ atomic##prefix##_##func_op(I, v); \ } -#define ATOMIC_FETCH_OP(op, func_op, c_op, I, c_type, prefix) \ +#define ATOMIC_FETCH_OP(op, func_op, I, c_type, prefix) \ static __always_inline c_type atomic##prefix##_fetch_##op(atomic##prefix##_t *v) \ { \ return atomic##prefix##_fetch_##func_op(I, v); \ @@ -202,16 +202,16 @@ static __always_inline c_type atomic##prefix##_##op##_return(atomic##prefix##_t #ifdef CONFIG_GENERIC_ATOMIC64 #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) #else #define ATOMIC_OPS(op, asm_op, c_op, I) \ - ATOMIC_OP (op, asm_op, c_op, I, int, ) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, int, ) \ + ATOMIC_OP (op, asm_op, I, int, ) \ + ATOMIC_FETCH_OP (op, asm_op, I, int, ) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, int, ) \ - ATOMIC_OP (op, asm_op, c_op, I, long, 64) \ - ATOMIC_FETCH_OP (op, asm_op, c_op, I, long, 64) \ + ATOMIC_OP (op, asm_op, I, long, 64) \ + ATOMIC_FETCH_OP (op, asm_op, I, long, 64) \ ATOMIC_OP_RETURN(op, asm_op, c_op, I, long, 64) #endif @@ -300,8 +300,13 @@ static __always_inline long atomic64_inc_not_zero(atomic64_t *v) /* * atomic_{cmp,}xchg is required to have exactly the same ordering semantics as - * {cmp,}xchg and the operations that return, so they need a barrier. We just - * use the other implementations directly. + * {cmp,}xchg and the operations that return, so they need a barrier. + */ +/* + * FIXME: atomic_cmpxchg_{acquire,release,relaxed} are all implemented by + * assigning the same barrier to both the LR and SC operations, but that might + * not make any sense. We're waiting on a memory model specification to + * determine exactly what the right thing to do is here. */ #define ATOMIC_OP(c_t, prefix, c_or, size, asm_or) \ static __always_inline c_t atomic##prefix##_cmpxchg##c_or(atomic##prefix##_t *v, c_t o, c_t n) \ diff --git a/arch/riscv/include/asm/barrier.h b/arch/riscv/include/asm/barrier.h index 183534b7c39b..773c4e039cd7 100644 --- a/arch/riscv/include/asm/barrier.h +++ b/arch/riscv/include/asm/barrier.h @@ -38,29 +38,6 @@ #define smp_rmb() RISCV_FENCE(r,r) #define smp_wmb() RISCV_FENCE(w,w) -/* - * These fences exist to enforce ordering around the relaxed AMOs. The - * documentation defines that - * " - * atomic_fetch_add(); - * is equivalent to: - * smp_mb__before_atomic(); - * atomic_fetch_add_relaxed(); - * smp_mb__after_atomic(); - * " - * So we emit full fences on both sides. - */ -#define __smb_mb__before_atomic() smp_mb() -#define __smb_mb__after_atomic() smp_mb() - -/* - * These barriers prevent accesses performed outside a spinlock from being moved - * inside a spinlock. Since RISC-V sets the aq/rl bits on our spinlock only - * enforce release consistency, we need full fences here. - */ -#define smb_mb__before_spinlock() smp_mb() -#define smb_mb__after_spinlock() smp_mb() - #include <asm-generic/barrier.h> #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 7c281ef1d583..f30daf26f08f 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -67,7 +67,7 @@ : "memory"); #define __test_and_op_bit(op, mod, nr, addr) \ - __test_and_op_bit_ord(op, mod, nr, addr, ) + __test_and_op_bit_ord(op, mod, nr, addr, .aqrl) #define __op_bit(op, mod, nr, addr) \ __op_bit_ord(op, mod, nr, addr, ) diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index c3e13764a943..bfc7f099ab1f 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -27,8 +27,8 @@ typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS -#define __BUG_ENTRY_ADDR INT " 1b - 2b" -#define __BUG_ENTRY_FILE INT " %0 - 2b" +#define __BUG_ENTRY_ADDR RISCV_INT " 1b - 2b" +#define __BUG_ENTRY_FILE RISCV_INT " %0 - 2b" #else #define __BUG_ENTRY_ADDR RISCV_PTR " 1b" #define __BUG_ENTRY_FILE RISCV_PTR " %0" @@ -38,7 +38,7 @@ typedef u32 bug_insn_t; #define __BUG_ENTRY \ __BUG_ENTRY_ADDR "\n\t" \ __BUG_ENTRY_FILE "\n\t" \ - SHORT " %1" + RISCV_SHORT " %1" #else #define __BUG_ENTRY \ __BUG_ENTRY_ADDR diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0595585013b0..efd89a88d2d0 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -18,22 +18,44 @@ #undef flush_icache_range #undef flush_icache_user_range +#undef flush_dcache_page static inline void local_flush_icache_all(void) { asm volatile ("fence.i" ::: "memory"); } +#define PG_dcache_clean PG_arch_1 + +static inline void flush_dcache_page(struct page *page) +{ + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); +} + +/* + * RISC-V doesn't have an instruction to flush parts of the instruction cache, + * so instead we just flush the whole thing. + */ +#define flush_icache_range(start, end) flush_icache_all() +#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() + #ifndef CONFIG_SMP -#define flush_icache_range(start, end) local_flush_icache_all() -#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() +#define flush_icache_all() local_flush_icache_all() +#define flush_icache_mm(mm, local) flush_icache_all() #else /* CONFIG_SMP */ -#define flush_icache_range(start, end) sbi_remote_fence_i(0) -#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) +#define flush_icache_all() sbi_remote_fence_i(0) +void flush_icache_mm(struct mm_struct *mm, bool local); #endif /* CONFIG_SMP */ +/* + * Bits in sys_riscv_flush_icache()'s flags argument. + */ +#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL +#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL) + #endif /* _ASM_RISCV_CACHEFLUSH_H */ diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index c1f32cfcc79b..a82ce599b639 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -19,6 +19,8 @@ #ifndef _ASM_RISCV_IO_H #define _ASM_RISCV_IO_H +#include <linux/types.h> + #ifdef CONFIG_MMU extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); @@ -32,7 +34,7 @@ extern void __iomem *ioremap(phys_addr_t offset, unsigned long size); #define ioremap_wc(addr, size) ioremap((addr), (size)) #define ioremap_wt(addr, size) ioremap((addr), (size)) -extern void iounmap(void __iomem *addr); +extern void iounmap(volatile void __iomem *addr); #endif /* CONFIG_MMU */ @@ -250,7 +252,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) const ctype *buf = buffer; \ \ do { \ - __raw_writeq(*buf++, addr); \ + __raw_write ## len(*buf++, addr); \ } while (--count); \ } \ afence; \ @@ -266,9 +268,9 @@ __io_reads_ins(reads, u32, l, __io_br(), __io_ar()) __io_reads_ins(ins, u8, b, __io_pbr(), __io_par()) __io_reads_ins(ins, u16, w, __io_pbr(), __io_par()) __io_reads_ins(ins, u32, l, __io_pbr(), __io_par()) -#define insb(addr, buffer, count) __insb((void __iomem *)addr, buffer, count) -#define insw(addr, buffer, count) __insw((void __iomem *)addr, buffer, count) -#define insl(addr, buffer, count) __insl((void __iomem *)addr, buffer, count) +#define insb(addr, buffer, count) __insb((void __iomem *)(long)addr, buffer, count) +#define insw(addr, buffer, count) __insw((void __iomem *)(long)addr, buffer, count) +#define insl(addr, buffer, count) __insl((void __iomem *)(long)addr, buffer, count) __io_writes_outs(writes, u8, b, __io_bw(), __io_aw()) __io_writes_outs(writes, u16, w, __io_bw(), __io_aw()) @@ -280,9 +282,9 @@ __io_writes_outs(writes, u32, l, __io_bw(), __io_aw()) __io_writes_outs(outs, u8, b, __io_pbw(), __io_paw()) __io_writes_outs(outs, u16, w, __io_pbw(), __io_paw()) __io_writes_outs(outs, u32, l, __io_pbw(), __io_paw()) -#define outsb(addr, buffer, count) __outsb((void __iomem *)addr, buffer, count) -#define outsw(addr, buffer, count) __outsw((void __iomem *)addr, buffer, count) -#define outsl(addr, buffer, count) __outsl((void __iomem *)addr, buffer, count) +#define outsb(addr, buffer, count) __outsb((void __iomem *)(long)addr, buffer, count) +#define outsw(addr, buffer, count) __outsw((void __iomem *)(long)addr, buffer, count) +#define outsl(addr, buffer, count) __outsl((void __iomem *)(long)addr, buffer, count) #ifdef CONFIG_64BIT __io_reads_ins(reads, u64, q, __io_br(), __io_ar()) diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 66805cba9a27..5df2dccdba12 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,10 @@ typedef struct { void *vdso; +#ifdef CONFIG_SMP + /* A local icache flush is needed before user execution can resume. */ + cpumask_t icache_stale_mask; +#endif } mm_context_t; #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h index de1fc1631fc4..97424834dce2 100644 --- a/arch/riscv/include/asm/mmu_context.h +++ b/arch/riscv/include/asm/mmu_context.h @@ -1,5 +1,6 @@ /* * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,11 +15,13 @@ #ifndef _ASM_RISCV_MMU_CONTEXT_H #define _ASM_RISCV_MMU_CONTEXT_H +#include <linux/mm_types.h> #include <asm-generic/mm_hooks.h> #include <linux/mm.h> #include <linux/sched.h> #include <asm/tlbflush.h> +#include <asm/cacheflush.h> static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *task) @@ -46,12 +49,54 @@ static inline void set_pgdir(pgd_t *pgd) csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); } +/* + * When necessary, performs a deferred icache flush for the given MM context, + * on the local CPU. RISC-V has no direct mechanism for instruction cache + * shoot downs, so instead we send an IPI that informs the remote harts they + * need to flush their local instruction caches. To avoid pathologically slow + * behavior in a common case (a bunch of single-hart processes on a many-hart + * machine, ie 'make -j') we avoid the IPIs for harts that are not currently + * executing a MM context and instead schedule a deferred local instruction + * cache flush to be performed before execution resumes on each hart. This + * actually performs that local instruction cache flush, which implicitly only + * refers to the current hart. + */ +static inline void flush_icache_deferred(struct mm_struct *mm) +{ +#ifdef CONFIG_SMP + unsigned int cpu = smp_processor_id(); + cpumask_t *mask = &mm->context.icache_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + /* + * Ensure the remote hart's writes are visible to this hart. + * This pairs with a barrier in flush_icache_mm. + */ + smp_mb(); + local_flush_icache_all(); + } +#endif +} + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *task) { if (likely(prev != next)) { + /* + * Mark the current MM context as inactive, and the next as + * active. This is at least used by the icache flushing + * routines in order to determine who should + */ + unsigned int cpu = smp_processor_id(); + + cpumask_clear_cpu(cpu, mm_cpumask(prev)); + cpumask_set_cpu(cpu, mm_cpumask(next)); + set_pgdir(next->pgd); local_flush_tlb_all(); + + flush_icache_deferred(next); } } diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 3399257780b2..2cbd92ed1629 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) #define pte_unmap(pte) ((void)(pte)) -/* - * Certain architectures need to do special things when PTEs within - * a page table are directly modified. Thus, the following hook is - * made available. - */ -static inline void set_pte(pte_t *ptep, pte_t pteval) -{ - *ptep = pteval; -} - -static inline void set_pte_at(struct mm_struct *mm, - unsigned long addr, pte_t *ptep, pte_t pteval) -{ - set_pte(ptep, pteval); -} - -static inline void pte_clear(struct mm_struct *mm, - unsigned long addr, pte_t *ptep) -{ - set_pte_at(mm, addr, ptep, __pte(0)); -} - static inline int pte_present(pte_t pte) { return (pte_val(pte) & _PAGE_PRESENT); @@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte) return (pte_val(pte) == 0); } -/* static inline int pte_read(pte_t pte) */ - static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_exec(pte_t pte) +{ + return pte_val(pte) & _PAGE_EXEC; +} + static inline int pte_huge(pte_t pte) { return pte_present(pte) && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); } -/* static inline int pte_exec(pte_t pte) */ - static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; @@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) return pte_val(pte_a) == pte_val(pte_b); } +/* + * Certain architectures need to do special things when PTEs within + * a page table are directly modified. Thus, the following hook is + * made available. + */ +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + *ptep = pteval; +} + +void flush_icache_pte(pte_t pte); + +static inline void set_pte_at(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, pte_t pteval) +{ + if (pte_present(pteval) && pte_exec(pteval)) + flush_icache_pte(pteval); + + set_pte(ptep, pteval); +} + +static inline void pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + set_pte_at(mm, addr, ptep, __pte(0)); +} + #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, diff --git a/arch/riscv/include/asm/spinlock.h b/arch/riscv/include/asm/spinlock.h index 04c71d938afd..2fd27e8ef1fd 100644 --- a/arch/riscv/include/asm/spinlock.h +++ b/arch/riscv/include/asm/spinlock.h @@ -24,7 +24,7 @@ /* FIXME: Replace this with a ticket lock, like MIPS. */ -#define arch_spin_is_locked(x) ((x)->lock != 0) +#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0) static inline void arch_spin_unlock(arch_spinlock_t *lock) { @@ -58,15 +58,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) } } -static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) -{ - smp_rmb(); - do { - cpu_relax(); - } while (arch_spin_is_locked(lock)); - smp_acquire__after_ctrl_dep(); -} - /***********************************************************/ static inline void arch_read_lock(arch_rwlock_t *lock) diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 3df4932d8964..2f26989cb864 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -18,7 +18,7 @@ typedef unsigned long cycles_t; -static inline cycles_t get_cycles(void) +static inline cycles_t get_cycles_inline(void) { cycles_t n; @@ -27,6 +27,7 @@ static inline cycles_t get_cycles(void) : "=r" (n)); return n; } +#define get_cycles get_cycles_inline #ifdef CONFIG_64BIT static inline uint64_t get_cycles64(void) diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 5ee4ae370b5e..715b0f10af58 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -17,7 +17,12 @@ #ifdef CONFIG_MMU -/* Flush entire local TLB */ +#include <linux/mm_types.h> + +/* + * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction + * cache as well, so a 'fence.i' is not necessary. + */ static inline void local_flush_tlb_all(void) { __asm__ __volatile__ ("sfence.vma" : : : "memory"); diff --git a/arch/riscv/include/asm/vdso-syscalls.h b/arch/riscv/include/asm/vdso-syscalls.h new file mode 100644 index 000000000000..a2ccf1894929 --- /dev/null +++ b/arch/riscv/include/asm/vdso-syscalls.h @@ -0,0 +1,28 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _ASM_RISCV_VDSO_SYSCALLS_H +#define _ASM_RISCV_VDSO_SYSCALLS_H + +#ifdef CONFIG_SMP + +/* These syscalls are only used by the vDSO and are not in the uapi. */ +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) + +#endif + +#endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index 602f61257553..541544d64c33 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -38,4 +38,8 @@ struct vdso_data { (void __user *)((unsigned long)(base) + __vdso_##name); \ }) +#ifdef CONFIG_SMP +asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t); +#endif + #endif /* _ASM_RISCV_VDSO_H */ diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 76af908f87c1..78f670d70133 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -152,6 +152,3 @@ END(_start) __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE -ENTRY(empty_zero_page) - .fill (empty_zero_page + PAGE_SIZE) - ., 1, 0x00 -END(empty_zero_page) diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c index 23cc81ec9e94..551734248748 100644 --- a/arch/riscv/kernel/riscv_ksyms.c +++ b/arch/riscv/kernel/riscv_ksyms.c @@ -12,4 +12,7 @@ /* * Assembly functions that may be used (directly or indirectly) by modules */ +EXPORT_SYMBOL(__clear_user); EXPORT_SYMBOL(__copy_user); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index de7db114c315..8fbb6749910d 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -58,7 +58,12 @@ static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; #endif /* CONFIG_CMDLINE_BOOL */ unsigned long va_pa_offset; +EXPORT_SYMBOL(va_pa_offset); unsigned long pfn_base; +EXPORT_SYMBOL(pfn_base); + +unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; +EXPORT_SYMBOL(empty_zero_page); /* The lucky hart to first increment this variable will boot the other cores */ atomic_t hart_lottery; diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index b4a71ec5906f..6d3962435720 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -38,6 +38,13 @@ enum ipi_message_type { IPI_MAX }; + +/* Unsupported */ +int setup_profiling_timer(unsigned int multiplier) +{ + return -EINVAL; +} + irqreturn_t handle_ipi(void) { unsigned long *pending_ipis = &ipi_data[smp_processor_id()].bits; @@ -108,3 +115,51 @@ void smp_send_reschedule(int cpu) { send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); } + +/* + * Performs an icache flush for the given MM context. RISC-V has no direct + * mechanism for instruction cache shoot downs, so instead we send an IPI that + * informs the remote harts they need to flush their local instruction caches. + * To avoid pathologically slow behavior in a common case (a bunch of + * single-hart processes on a many-hart machine, ie 'make -j') we avoid the + * IPIs for harts that are not currently executing a MM context and instead + * schedule a deferred local instruction cache flush to be performed before + * execution resumes on each hart. + */ +void flush_icache_mm(struct mm_struct *mm, bool local) +{ + unsigned int cpu; + cpumask_t others, *mask; + + preempt_disable(); + + /* Mark every hart's icache as needing a flush for this MM. */ + mask = &mm->context.icache_stale_mask; + cpumask_setall(mask); + /* Flush this hart's I$ now, and mark it as flushed. */ + cpu = smp_processor_id(); + cpumask_clear_cpu(cpu, mask); + local_flush_icache_all(); + + /* + * Flush the I$ of other harts concurrently executing, and mark them as + * flushed. + */ + cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu)); + local |= cpumask_empty(&others); + if (mm != current->active_mm || !local) + sbi_remote_fence_i(others.bits); + else { + /* + * It's assumed that at least one strongly ordered operation is + * performed on this hart between setting a hart's cpumask bit + * and scheduling this MM context on that hart. Sending an SBI + * remote message will do this, but in the case where no + * messages are sent we still need to order this hart's writes + * with flush_icache_deferred(). + */ + smp_mb(); + } + + preempt_enable(); +} diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 4351be7d0533..a2ae936a093e 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -14,8 +14,8 @@ */ #include <linux/syscalls.h> -#include <asm/cmpxchg.h> #include <asm/unistd.h> +#include <asm/cacheflush.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); } #endif /* !CONFIG_64BIT */ + +#ifdef CONFIG_SMP +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * sys_riscv_flush_icache() is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, + uintptr_t, flags) +{ + struct mm_struct *mm = current->mm; + bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0; + + /* Check the reserved flags. */ + if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL)) + return -EINVAL; + + flush_icache_mm(mm, local); + + return 0; +} +#endif diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index 4e30dc5fb593..a5bd6401f95e 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -15,6 +15,7 @@ #include <linux/linkage.h> #include <linux/syscalls.h> #include <asm-generic/syscalls.h> +#include <asm/vdso.h> #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), @@ -22,4 +23,5 @@ void *sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include <asm/unistd.h> +#include <asm/vdso-syscalls.h> }; diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 523d0a8ac8db..324568d33921 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -1,7 +1,12 @@ # Copied from arch/tile/kernel/vdso/Makefile # Symbols present in the vdso -vdso-syms = rt_sigreturn +vdso-syms = rt_sigreturn +vdso-syms += gettimeofday +vdso-syms += clock_gettime +vdso-syms += clock_getres +vdso-syms += getcpu +vdso-syms += flush_icache # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) diff --git a/arch/riscv/kernel/vdso/clock_getres.S b/arch/riscv/kernel/vdso/clock_getres.S new file mode 100644 index 000000000000..edf7e2339648 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_getres.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text +/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */ +ENTRY(__vdso_clock_getres) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_getres + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_getres) diff --git a/arch/riscv/kernel/vdso/clock_gettime.S b/arch/riscv/kernel/vdso/clock_gettime.S new file mode 100644 index 000000000000..aac65676c6d5 --- /dev/null +++ b/arch/riscv/kernel/vdso/clock_gettime.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text +/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */ +ENTRY(__vdso_clock_gettime) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_clock_gettime + ecall + ret + .cfi_endproc +ENDPROC(__vdso_clock_gettime) diff --git a/arch/riscv/kernel/vdso/flush_icache.S b/arch/riscv/kernel/vdso/flush_icache.S new file mode 100644 index 000000000000..b0fbad74e873 --- /dev/null +++ b/arch/riscv/kernel/vdso/flush_icache.S @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> +#include <asm/vdso-syscalls.h> + + .text +/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */ +ENTRY(__vdso_flush_icache) + .cfi_startproc +#ifdef CONFIG_SMP + li a7, __NR_riscv_flush_icache + ecall +#else + fence.i + li a0, 0 +#endif + ret + .cfi_endproc +ENDPROC(__vdso_flush_icache) diff --git a/arch/riscv/kernel/vdso/getcpu.S b/arch/riscv/kernel/vdso/getcpu.S new file mode 100644 index 000000000000..cc7e98924484 --- /dev/null +++ b/arch/riscv/kernel/vdso/getcpu.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text +/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */ +ENTRY(__vdso_getcpu) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_getcpu + ecall + ret + .cfi_endproc +ENDPROC(__vdso_getcpu) diff --git a/arch/riscv/kernel/vdso/gettimeofday.S b/arch/riscv/kernel/vdso/gettimeofday.S new file mode 100644 index 000000000000..da85d33e8990 --- /dev/null +++ b/arch/riscv/kernel/vdso/gettimeofday.S @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + + .text +/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */ +ENTRY(__vdso_gettimeofday) + .cfi_startproc + /* For now, just do the syscall. */ + li a7, __NR_gettimeofday + ecall + ret + .cfi_endproc +ENDPROC(__vdso_gettimeofday) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 8c9dce95c11d..cd1d47e0724b 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -70,8 +70,11 @@ VERSION LINUX_4.15 { global: __vdso_rt_sigreturn; - __vdso_cmpxchg32; - __vdso_cmpxchg64; + __vdso_gettimeofday; + __vdso_clock_gettime; + __vdso_clock_getres; + __vdso_getcpu; + __vdso_flush_icache; local: *; }; } diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index 1cc4ac3964b4..dce8ae24c6d3 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -84,6 +84,7 @@ void __delay(unsigned long cycles) while ((unsigned long)(get_cycles() - t0) < cycles) cpu_relax(); } +EXPORT_SYMBOL(__delay); void udelay(unsigned long usecs) { diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 81f7d9ce6d88..eb22ab49b3e0 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -2,3 +2,4 @@ obj-y += init.o obj-y += fault.o obj-y += extable.o obj-y += ioremap.o +obj-y += cacheflush.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c new file mode 100644 index 000000000000..498c0a0814fe --- /dev/null +++ b/arch/riscv/mm/cacheflush.c @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2017 SiFive + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, version 2. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <asm/pgtable.h> +#include <asm/cacheflush.h> + +void flush_icache_pte(pte_t pte) +{ + struct page *page = pte_page(pte); + + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) + flush_icache_all(); +} diff --git a/arch/riscv/mm/ioremap.c b/arch/riscv/mm/ioremap.c index e99194a4077e..70ef2724cdf6 100644 --- a/arch/riscv/mm/ioremap.c +++ b/arch/riscv/mm/ioremap.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL(ioremap); * * Caller must ensure there is only one unmapping for the same pointer. */ -void iounmap(void __iomem *addr) +void iounmap(volatile void __iomem *addr) { vunmap((void *)((unsigned long)addr & PAGE_MASK)); } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 863a62a6de3c..829c67986db7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -148,6 +148,7 @@ config S390 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_FUTEX_CMPXCHG if FUTEX + select HAVE_GCC_PLUGINS select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZ4 @@ -158,6 +159,8 @@ config S390 select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_MEMBLOCK_PHYS_MAP @@ -538,22 +541,6 @@ config ARCH_RANDOM If unsure, say Y. -config ALTERNATIVES - def_bool y - prompt "Patch optimized instructions for running CPU type" - help - When enabled the kernel code is compiled with additional - alternative instructions blocks optimized for newer CPU types. - These alternative instructions blocks are patched at kernel boot - time when running CPU supports them. This mechanism is used to - optimize some critical code paths (i.e. spinlocks) for newer CPUs - even if kernel is build to support older machine generations. - - This mechanism could be disabled by appending "noaltinstr" - option to the kernel command line. - - If unsure, say Y. - endmenu menu "Memory setup" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 6b3f41985f28..de54cfc6109d 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # s390/Makefile # @@ -6,10 +7,6 @@ # for "archclean" and "archdep" for cleaning up and making dependencies for # this architecture # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1994 by Linus Torvalds # diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index ef3fb1b9201f..cb6e8066b1ad 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Base infrastructure for Linux-z/VM Monitor Stream, Stage 1. * Exports appldata_register_ops() and appldata_unregister_ops() for the diff --git a/arch/s390/appldata/appldata_mem.c b/arch/s390/appldata/appldata_mem.c index 598df5708501..e68136c3c23a 100644 --- a/arch/s390/appldata/appldata_mem.c +++ b/arch/s390/appldata/appldata_mem.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects data related to memory management. diff --git a/arch/s390/appldata/appldata_net_sum.c b/arch/s390/appldata/appldata_net_sum.c index 66037d2622b4..8bc14b0d1def 100644 --- a/arch/s390/appldata/appldata_net_sum.c +++ b/arch/s390/appldata/appldata_net_sum.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects accumulated network statistics (Packets received/transmitted, diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 45b3178200ab..433a994b1a89 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Data gathering module for Linux-VM Monitor Stream, Stage 1. * Collects misc. OS related data (CPU utilization, running processes). diff --git a/arch/s390/boot/install.sh b/arch/s390/boot/install.sh index aed3069699bd..bed227f267ae 100644 --- a/arch/s390/boot/install.sh +++ b/arch/s390/boot/install.sh @@ -1,11 +1,8 @@ #!/bin/sh +# SPDX-License-Identifier: GPL-2.0 # # arch/s390x/boot/install.sh # -# This file is subject to the terms and conditions of the GNU General Public -# License. See the file "COPYING" in the main directory of this archive -# for more details. -# # Copyright (C) 1995 by Linus Torvalds # # Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 84eccc88c065..5af8458951cf 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -629,6 +629,7 @@ CONFIG_STACK_TRACER=y CONFIG_BLK_DEV_IO_TRACE=y CONFIG_FUNCTION_PROFILER=y CONFIG_HIST_TRIGGERS=y +CONFIG_DMA_API_DEBUG=y CONFIG_LKDTM=m CONFIG_TEST_LIST_SORT=y CONFIG_TEST_SORT=y @@ -637,14 +638,12 @@ CONFIG_RBTREE_TEST=y CONFIG_INTERVAL_TREE_TEST=m CONFIG_PERCPU_TEST=m CONFIG_ATOMIC64_SELFTEST=y -CONFIG_DMA_API_DEBUG=y CONFIG_TEST_BPF=m CONFIG_BUG_ON_DATA_CORRUPTION=y CONFIG_S390_PTDUMP=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y -CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y @@ -660,13 +659,11 @@ CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index f7202358e6d7..d52eafe57ae8 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -587,7 +587,6 @@ CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y -CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -605,13 +604,10 @@ CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 03100fe74ea8..20ed149e1137 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -585,7 +585,6 @@ CONFIG_BIG_KEYS=y CONFIG_ENCRYPTED_KEYS=m CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y -CONFIG_HARDENED_USERCOPY=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0 @@ -603,13 +602,10 @@ CONFIG_CRYPTO_PCRYPT=m CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m -CONFIG_CRYPTO_CCM=m -CONFIG_CRYPTO_GCM=m CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m -CONFIG_CRYPTO_CMAC=m CONFIG_CRYPTO_XCBC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_CRC32=m diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c index b48e20dd94e9..d60798737d86 100644 --- a/arch/s390/crypto/aes_s390.c +++ b/arch/s390/crypto/aes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * @@ -11,12 +12,6 @@ * Harald Freudenberger <freude@de.ibm.com> * * Derived from "crypto/aes_generic.c" - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #define KMSG_COMPONENT "aes_s390" diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 36aefc07d10c..8720e9203ecf 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -1,13 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 arch random implementation. * * Copyright IBM Corp. 2017 * Author(s): Harald Freudenberger <freude@de.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #include <linux/kernel.h> diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 992e630c227b..436865926c26 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Crypto-API module for CRC-32 algorithms implemented with the * z/Architecture Vector Extension Facility. diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c index 0d296662bbf0..5346b5a80bb6 100644 --- a/arch/s390/crypto/des_s390.c +++ b/arch/s390/crypto/des_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * @@ -6,12 +7,6 @@ * Copyright IBM Corp. 2003, 2011 * Author(s): Thomas Spatzier * Jan Glauber (jan.glauber@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * */ #include <linux/init.h> diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c index 564616d48d8b..3b7f96c9eead 100644 --- a/arch/s390/crypto/ghash_s390.c +++ b/arch/s390/crypto/ghash_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index a4e903ed7e21..003932db8d12 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Cryptographic API. * @@ -7,11 +8,6 @@ * Copyright IBM Corp. 2017 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> * Harald Freudenberger <freude@de.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * */ #define KMSG_COMPONENT "paes_s390" diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 3e47c4a0f18b..a97a1802cfb4 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2006, 2015 * Author(s): Jan Glauber <jan.glauber@de.ibm.com> diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index 10f200790079..d6f8258b44df 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Cryptographic API. * @@ -5,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #ifndef _CRYPTO_ARCH_S390_SHA_H #define _CRYPTO_ARCH_S390_SHA_H diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c index 53c277999a28..944aa6b237cd 100644 --- a/arch/s390/crypto/sha256_s390.c +++ b/arch/s390/crypto/sha256_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * @@ -6,12 +7,6 @@ * s390 Version: * Copyright IBM Corp. 2005, 2011 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include <crypto/internal/hash.h> #include <linux/init.h> diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c index 2f4caa1ef123..b17eded532b1 100644 --- a/arch/s390/crypto/sha512_s390.c +++ b/arch/s390/crypto/sha512_s390.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * @@ -5,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include <crypto/internal/hash.h> #include <crypto/sha.h> diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c index c740f77285b2..cf0718d121bc 100644 --- a/arch/s390/crypto/sha_common.c +++ b/arch/s390/crypto/sha_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Cryptographic API. * @@ -5,12 +6,6 @@ * * Copyright IBM Corp. 2007 * Author(s): Jan Glauber (jang@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * */ #include <crypto/internal/hash.h> diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index cf8a2d92467f..43bbe63e2992 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -1,9 +1,9 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Hypervisor filesystem for Linux on s390. * * Copyright IBM Corp. 2006, 2008 * Author(s): Michael Holzheu <holzheu@de.ibm.com> - * License: GPL */ #define KMSG_COMPONENT "hypfs" diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 6c268f6a51d3..a72002056b54 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -15,14 +15,9 @@ struct alt_instr { u8 replacementlen; /* length of new instruction */ } __packed; -#ifdef CONFIG_ALTERNATIVES -extern void apply_alternative_instructions(void); -extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); -#else -static inline void apply_alternative_instructions(void) {}; -static inline void apply_alternatives(struct alt_instr *start, - struct alt_instr *end) {}; -#endif +void apply_alternative_instructions(void); +void apply_alternatives(struct alt_instr *start, struct alt_instr *end); + /* * |661: |662: |6620 |663: * +-----------+---------------------+ @@ -109,7 +104,6 @@ static inline void apply_alternatives(struct alt_instr *start, b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" \ INSTR_LEN_SANITY_CHECK(altinstr_len(num)) -#ifdef CONFIG_ALTERNATIVES /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, altinstr, facility) \ ".pushsection .altinstr_replacement, \"ax\"\n" \ @@ -130,14 +124,6 @@ static inline void apply_alternatives(struct alt_instr *start, ALTINSTR_ENTRY(facility1, 1) \ ALTINSTR_ENTRY(facility2, 2) \ ".popsection\n" -#else -/* Alternative instructions are disabled, let's put just oldinstr in */ -#define ALTERNATIVE(oldinstr, altinstr, facility) \ - oldinstr "\n" - -#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - oldinstr "\n" -#endif /* * Alternative instructions for different CPU types or capabilities. diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 1b60eb3676d5..5e6a63641a5f 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -263,7 +263,6 @@ typedef struct compat_siginfo { #define si_overrun _sifields._timer._overrun #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL /* * A pointer passed in from user mode. This should not diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index 05480e4cc5ca..dd08db491b89 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * CPU-measurement facilities * * Copyright IBM Corp. 2012 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> * Jan Glauber <jang@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_S390_CPU_MF_H #define _ASM_S390_CPU_MF_H @@ -144,6 +141,12 @@ struct hws_trailer_entry { unsigned long long progusage2; /* */ } __packed; +/* Load program parameter */ +static inline void lpp(void *pp) +{ + asm volatile(".insn s,0xb2800000,0(%0)\n":: "a" (pp) : "memory"); +} + /* Query counter information */ static inline int qctri(struct cpumf_ctr_info *info) { @@ -167,7 +170,7 @@ static inline int lcctl(u64 ctl) " .insn s,0xb2840000,%1\n" " ipm %0\n" " srl %0,28\n" - : "=d" (cc) : "m" (ctl) : "cc"); + : "=d" (cc) : "Q" (ctl) : "cc"); return cc; } diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 9a3cb3983c01..1a61b1b997f2 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -194,13 +194,14 @@ struct arch_elf_state { #define CORE_DUMP_USE_REGSET #define ELF_EXEC_PAGESIZE PAGE_SIZE -/* - * This is the base location for PIE (ET_DYN with INTERP) loads. On - * 64-bit, this is raised to 4GB to leave the entire 32-bit address - * space open for things that want to use the area for 32-bit pointers. - */ -#define ELF_ET_DYN_BASE (is_compat_task() ? 0x000400000UL : \ - 0x100000000UL) +/* This is the location that an ET_DYN program is loaded if exec'ed. Typical + use of this is to invoke "./ld.so someprog" to test out a new version of + the loader. We need to make sure that it is out of the way of the program + that it will "exec", and that there is sufficient room for the brk. 64-bit + tasks are aligned to 4GB. */ +#define ELF_ET_DYN_BASE (is_compat_task() ? \ + (STACK_TOP / 3 * 2) : \ + (STACK_TOP / 3 * 2) & ~((1UL << 32) - 1)) /* This yields a mask that user programs can use to figure out what instruction set this CPU supports. */ diff --git a/arch/s390/include/asm/futex.h b/arch/s390/include/asm/futex.h index 9b5a3469fed9..5e97a4353147 100644 --- a/arch/s390/include/asm/futex.h +++ b/arch/s390/include/asm/futex.h @@ -26,9 +26,9 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, newval, ret; + mm_segment_t old_fs; - load_kernel_asce(); - + old_fs = enable_sacf_uaccess(); pagefault_disable(); switch (op) { case FUTEX_OP_SET: @@ -55,6 +55,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, ret = -ENOSYS; } pagefault_enable(); + disable_sacf_uaccess(old_fs); if (!ret) *oval = oldval; @@ -65,9 +66,10 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval, static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + mm_segment_t old_fs; int ret; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" "0: cs %1,%4,0(%5)\n" @@ -77,6 +79,7 @@ static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=d" (ret), "+d" (oldval), "=m" (*uaddr) : "0" (-EFAULT), "d" (newval), "a" (uaddr), "m" (*uaddr) : "cc", "memory"); + disable_sacf_uaccess(old_fs); *uval = oldval; return ret; } diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 921391f2341e..13de80cf741c 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -1,22 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ #ifndef _ASM_S390_KPROBES_H #define _ASM_S390_KPROBES_H /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f3a9b5a445b6..e14f381757f6 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for kernel virtual machines on s390 * * Copyright IBM Corp. 2008, 2009 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte <cotte@de.ibm.com> */ diff --git a/arch/s390/include/asm/kvm_para.h b/arch/s390/include/asm/kvm_para.h index 41393052ac57..74eeec9c0a80 100644 --- a/arch/s390/include/asm/kvm_para.h +++ b/arch/s390/include/asm/kvm_para.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for paravirtual devices on s390 * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> */ /* @@ -20,8 +17,6 @@ * * Copyright IBM Corp. 2007,2008 * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2. */ #ifndef __S390_KVM_PARA_H #define __S390_KVM_PARA_H diff --git a/arch/s390/include/asm/livepatch.h b/arch/s390/include/asm/livepatch.h index 6de5c6cb0061..672f95b12d40 100644 --- a/arch/s390/include/asm/livepatch.h +++ b/arch/s390/include/asm/livepatch.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * livepatch.h - s390-specific Kernel Live Patching Core * @@ -7,13 +8,6 @@ * Jiri Slaby */ -/* - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - */ - #ifndef ASM_LIVEPATCH_H #define ASM_LIVEPATCH_H diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 9eb36a1592c7..ec6592e8ba36 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -115,33 +115,28 @@ struct lowcore { /* Address space pointer. */ __u64 kernel_asce; /* 0x0378 */ __u64 user_asce; /* 0x0380 */ + __u64 vdso_asce; /* 0x0388 */ /* * The lpp and current_pid fields form a * 64-bit value that is set as program * parameter with the LPP instruction. */ - __u32 lpp; /* 0x0388 */ - __u32 current_pid; /* 0x038c */ + __u32 lpp; /* 0x0390 */ + __u32 current_pid; /* 0x0394 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0390 */ - __u32 softirq_pending; /* 0x0394 */ - __u64 percpu_offset; /* 0x0398 */ - __u64 vdso_per_cpu_data; /* 0x03a0 */ - __u64 machine_flags; /* 0x03a8 */ - __u32 preempt_count; /* 0x03b0 */ - __u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */ - __u64 gmap; /* 0x03b8 */ - __u32 spinlock_lockval; /* 0x03c0 */ - __u32 spinlock_index; /* 0x03c4 */ - __u32 fpu_flags; /* 0x03c8 */ - __u8 pad_0x03cc[0x0400-0x03cc]; /* 0x03cc */ - - /* Per cpu primary space access list */ - __u32 paste[16]; /* 0x0400 */ - - __u8 pad_0x04c0[0x0e00-0x0440]; /* 0x0440 */ + __u32 cpu_nr; /* 0x0398 */ + __u32 softirq_pending; /* 0x039c */ + __u32 preempt_count; /* 0x03a0 */ + __u32 spinlock_lockval; /* 0x03a4 */ + __u32 spinlock_index; /* 0x03a8 */ + __u32 fpu_flags; /* 0x03ac */ + __u64 percpu_offset; /* 0x03b0 */ + __u64 vdso_per_cpu_data; /* 0x03b8 */ + __u64 machine_flags; /* 0x03c0 */ + __u64 gmap; /* 0x03c8 */ + __u8 pad_0x03d0[0x0e00-0x03d0]; /* 0x03d0 */ /* * 0xe00 contains the address of the IPL Parameter Information @@ -193,14 +188,14 @@ extern struct lowcore *lowcore_ptr[]; static inline void set_prefix(__u32 address) { - asm volatile("spx %0" : : "m" (address) : "memory"); + asm volatile("spx %0" : : "Q" (address) : "memory"); } static inline __u32 store_prefix(void) { __u32 address; - asm volatile("stpx %0" : "=m" (address)); + asm volatile("stpx %0" : "=Q" (address)); return address; } diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index cf4c1cb17dcd..65154eaa3714 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -28,7 +28,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_PGSTE mm->context.alloc_pgste = page_table_allocate_pgste || test_thread_flag(TIF_PGSTE) || - current->mm->context.alloc_pgste; + (current->mm && current->mm->context.alloc_pgste); mm->context.has_pgste = 0; mm->context.use_skey = 0; mm->context.use_cmma = 0; @@ -73,41 +73,38 @@ static inline int init_new_context(struct task_struct *tsk, static inline void set_user_asce(struct mm_struct *mm) { S390_lowcore.user_asce = mm->context.asce; - if (current->thread.mm_segment.ar4) - __ctl_load(S390_lowcore.user_asce, 7, 7); - set_cpu_flag(CIF_ASCE_PRIMARY); + __ctl_load(S390_lowcore.user_asce, 1, 1); + clear_cpu_flag(CIF_ASCE_PRIMARY); } static inline void clear_user_asce(void) { S390_lowcore.user_asce = S390_lowcore.kernel_asce; - - __ctl_load(S390_lowcore.user_asce, 1, 1); - __ctl_load(S390_lowcore.user_asce, 7, 7); -} - -static inline void load_kernel_asce(void) -{ - unsigned long asce; - - __ctl_store(asce, 1, 1); - if (asce != S390_lowcore.kernel_asce) - __ctl_load(S390_lowcore.kernel_asce, 1, 1); + __ctl_load(S390_lowcore.kernel_asce, 1, 1); set_cpu_flag(CIF_ASCE_PRIMARY); } +mm_segment_t enable_sacf_uaccess(void); +void disable_sacf_uaccess(mm_segment_t old_fs); + static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { int cpu = smp_processor_id(); - S390_lowcore.user_asce = next->context.asce; if (prev == next) return; + S390_lowcore.user_asce = next->context.asce; cpumask_set_cpu(cpu, &next->context.cpu_attach_mask); - /* Clear old ASCE by loading the kernel ASCE. */ - __ctl_load(S390_lowcore.kernel_asce, 1, 1); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); + /* Clear previous user-ASCE from CR1 and CR7 */ + if (!test_cpu_flag(CIF_ASCE_PRIMARY)) { + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE_PRIMARY); + } + if (test_cpu_flag(CIF_ASCE_SECONDARY)) { + __ctl_load(S390_lowcore.vdso_asce, 7, 7); + clear_cpu_flag(CIF_ASCE_SECONDARY); + } cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); } @@ -117,7 +114,6 @@ static inline void finish_arch_post_lock_switch(void) struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; - load_kernel_asce(); if (mm) { preempt_disable(); while (atomic_read(&mm->context.flush_count)) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index 79aa6421fedb..d6c9d1e0dc2d 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -64,27 +64,10 @@ struct perf_sf_sde_regs { #define REG_OVERFLOW 1 #define OVERFLOW_REG(hwc) ((hwc)->extra_reg.config) #define SFB_ALLOC_REG(hwc) ((hwc)->extra_reg.alloc) -#define RAWSAMPLE_REG(hwc) ((hwc)->config) #define TEAR_REG(hwc) ((hwc)->last_tag) #define SAMPL_RATE(hwc) ((hwc)->event_base) #define SAMPL_FLAGS(hwc) ((hwc)->config_base) #define SAMPL_DIAG_MODE(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_DIAG_MODE) #define SDB_FULL_BLOCKS(hwc) (SAMPL_FLAGS(hwc) & PERF_CPUM_SF_FULL_BLOCKS) -/* Structure for sampling data entries to be passed as perf raw sample data - * to user space. Note that raw sample data must be aligned and, thus, might - * be padded with zeros. - */ -struct sf_raw_sample { -#define SF_RAW_SAMPLE_BASIC PERF_CPUM_SF_BASIC_MODE -#define SF_RAW_SAMPLE_DIAG PERF_CPUM_SF_DIAG_MODE - u64 format; - u32 size; /* Size of sf_raw_sample */ - u16 bsdes; /* Basic-sampling data entry size */ - u16 dsdes; /* Diagnostic-sampling data entry size */ - struct hws_basic_entry basic; /* Basic-sampling data entry */ - struct hws_diag_entry diag; /* Diagnostic-sampling data entry */ - u8 padding[]; /* Padding to next multiple of 8 */ -} __packed; - #endif /* _ASM_S390_PERF_EVENT_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d7fe9838084d..57d7bc92e0b8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -709,7 +709,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT; } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0; @@ -1264,6 +1264,12 @@ static inline pud_t pud_mkwrite(pud_t pud) return pud; } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return (pud_val(pud) & _REGION3_ENTRY_WRITE) != 0; +} + static inline pud_t pud_mkclean(pud_t pud) { if (pud_large(pud)) { diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index f25bfe888933..bfbfad482289 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -109,9 +109,7 @@ extern void execve_tail(void); #define HAVE_ARCH_PICK_MMAP_LAYOUT -typedef struct { - __u32 ar4; -} mm_segment_t; +typedef unsigned int mm_segment_t; /* * Thread structure @@ -247,7 +245,7 @@ static inline unsigned short stap(void) { unsigned short cpu_address; - asm volatile("stap %0" : "=m" (cpu_address)); + asm volatile("stap %0" : "=Q" (cpu_address)); return cpu_address; } diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h index 2f84e77f1f1b..a3788dafc0e1 100644 --- a/arch/s390/include/asm/ptrace.h +++ b/arch/s390/include/asm/ptrace.h @@ -13,10 +13,12 @@ #define PIF_SYSCALL 0 /* inside a system call */ #define PIF_PER_TRAP 1 /* deliver sigtrap on return to user */ #define PIF_SYSCALL_RESTART 2 /* restart the current system call */ +#define PIF_GUEST_FAULT 3 /* indicates program check in sie64a */ #define _PIF_SYSCALL _BITUL(PIF_SYSCALL) #define _PIF_PER_TRAP _BITUL(PIF_PER_TRAP) #define _PIF_SYSCALL_RESTART _BITUL(PIF_SYSCALL_RESTART) +#define _PIF_GUEST_FAULT _BITUL(PIF_GUEST_FAULT) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 8bc87dcb10eb..2eb0c8a7b664 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -36,7 +36,7 @@ #define MACHINE_FLAG_SCC _BITUL(17) #define LPP_MAGIC _BITUL(31) -#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL) +#define LPP_PID_MASK _AC(0xffffffff, UL) #ifndef __ASSEMBLY__ diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h index 6bc941be6921..96f9a9151fde 100644 --- a/arch/s390/include/asm/syscall.h +++ b/arch/s390/include/asm/syscall.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Access to user system call parameters and results * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef _ASM_SYSCALL_H diff --git a/arch/s390/include/asm/sysinfo.h b/arch/s390/include/asm/sysinfo.h index a702cb9d4269..25057c118d56 100644 --- a/arch/s390/include/asm/sysinfo.h +++ b/arch/s390/include/asm/sysinfo.h @@ -1,12 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * definition for store system information stsi * * Copyright IBM Corp. 2001, 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Ulrich Weigand <weigand@de.ibm.com> * Christian Borntraeger <borntraeger@de.ibm.com> */ diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h index 1807229b292f..cca406fdbe51 100644 --- a/arch/s390/include/asm/topology.h +++ b/arch/s390/include/asm/topology.h @@ -53,6 +53,7 @@ const struct cpumask *cpu_coregroup_mask(int cpu); static inline void topology_init_early(void) { } static inline void topology_schedule_update(void) { } static inline int topology_cpu_init(struct cpu *cpu) { return 0; } +static inline int topology_cpu_dedicated(int cpu_nr) { return 0; } static inline void topology_expect_change(void) { } #endif /* CONFIG_SCHED_TOPOLOGY */ diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index cdd0f0d999e2..ad6b91013a05 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -16,7 +16,7 @@ #include <asm/processor.h> #include <asm/ctl_reg.h> #include <asm/extable.h> - +#include <asm/facility.h> /* * The fs value determines whether argument validity checking should be @@ -26,27 +26,16 @@ * For historical reasons, these macros are grossly misnamed. */ -#define MAKE_MM_SEG(a) ((mm_segment_t) { (a) }) - - -#define KERNEL_DS MAKE_MM_SEG(0) -#define USER_DS MAKE_MM_SEG(1) +#define KERNEL_DS (0) +#define KERNEL_DS_SACF (1) +#define USER_DS (2) +#define USER_DS_SACF (3) #define get_ds() (KERNEL_DS) #define get_fs() (current->thread.mm_segment) -#define segment_eq(a,b) ((a).ar4 == (b).ar4) +#define segment_eq(a,b) (((a) & 2) == ((b) & 2)) -static inline void set_fs(mm_segment_t fs) -{ - current->thread.mm_segment = fs; - if (uaccess_kernel()) { - set_cpu_flag(CIF_ASCE_SECONDARY); - __ctl_load(S390_lowcore.kernel_asce, 7, 7); - } else { - clear_cpu_flag(CIF_ASCE_SECONDARY); - __ctl_load(S390_lowcore.user_asce, 7, 7); - } -} +void set_fs(mm_segment_t fs); static inline int __range_ok(unsigned long addr, unsigned long size) { @@ -95,7 +84,7 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n); static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) { - unsigned long spec = 0x810000UL; + unsigned long spec = 0x010000UL; int rc; switch (size) { @@ -125,7 +114,7 @@ static inline int __put_user_fn(void *x, void __user *ptr, unsigned long size) static inline int __get_user_fn(void *x, const void __user *ptr, unsigned long size) { - unsigned long spec = 0x81UL; + unsigned long spec = 0x01UL; int rc; switch (size) { diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index ae6261ef97d5..169d7604eb80 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -46,6 +46,7 @@ struct vdso_per_cpu_data { }; extern struct vdso_data *vdso_data; +extern struct vdso_data boot_vdso_data; void vdso_alloc_boot_cpu(struct lowcore *lowcore); int vdso_alloc_per_cpu(struct lowcore *lowcore); diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 9ad172dcd912..38535a57fef8 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -6,10 +6,6 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Carsten Otte <cotte@de.ibm.com> * Christian Borntraeger <borntraeger@de.ibm.com> */ diff --git a/arch/s390/include/uapi/asm/kvm_para.h b/arch/s390/include/uapi/asm/kvm_para.h index 0dc86b3a7cb0..b9ab584adf43 100644 --- a/arch/s390/include/uapi/asm/kvm_para.h +++ b/arch/s390/include/uapi/asm/kvm_para.h @@ -4,9 +4,5 @@ * * Copyright IBM Corp. 2008 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Christian Borntraeger <borntraeger@de.ibm.com> */ diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h index c36c97ffdc6f..84606b8cc49e 100644 --- a/arch/s390/include/uapi/asm/kvm_perf.h +++ b/arch/s390/include/uapi/asm/kvm_perf.h @@ -4,10 +4,6 @@ * * Copyright 2014 IBM Corp. * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #ifndef __LINUX_KVM_PERF_S390_H diff --git a/arch/s390/include/uapi/asm/perf_regs.h b/arch/s390/include/uapi/asm/perf_regs.h new file mode 100644 index 000000000000..7c8564f98205 --- /dev/null +++ b/arch/s390/include/uapi/asm/perf_regs.h @@ -0,0 +1,43 @@ +#ifndef _ASM_S390_PERF_REGS_H +#define _ASM_S390_PERF_REGS_H + +enum perf_event_s390_regs { + PERF_REG_S390_R0, + PERF_REG_S390_R1, + PERF_REG_S390_R2, + PERF_REG_S390_R3, + PERF_REG_S390_R4, + PERF_REG_S390_R5, + PERF_REG_S390_R6, + PERF_REG_S390_R7, + PERF_REG_S390_R8, + PERF_REG_S390_R9, + PERF_REG_S390_R10, + PERF_REG_S390_R11, + PERF_REG_S390_R12, + PERF_REG_S390_R13, + PERF_REG_S390_R14, + PERF_REG_S390_R15, + PERF_REG_S390_FP0, + PERF_REG_S390_FP1, + PERF_REG_S390_FP2, + PERF_REG_S390_FP3, + PERF_REG_S390_FP4, + PERF_REG_S390_FP5, + PERF_REG_S390_FP6, + PERF_REG_S390_FP7, + PERF_REG_S390_FP8, + PERF_REG_S390_FP9, + PERF_REG_S390_FP10, + PERF_REG_S390_FP11, + PERF_REG_S390_FP12, + PERF_REG_S390_FP13, + PERF_REG_S390_FP14, + PERF_REG_S390_FP15, + PERF_REG_S390_MASK, + PERF_REG_S390_PC, + + PERF_REG_S390_MAX +}; + +#endif /* _ASM_S390_PERF_REGS_H */ diff --git a/arch/s390/include/uapi/asm/virtio-ccw.h b/arch/s390/include/uapi/asm/virtio-ccw.h index 967aad390105..3a77833c74dc 100644 --- a/arch/s390/include/uapi/asm/virtio-ccw.h +++ b/arch/s390/include/uapi/asm/virtio-ccw.h @@ -4,10 +4,6 @@ * * Copyright IBM Corp. 2013 * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Author(s): Cornelia Huck <cornelia.huck@de.ibm.com> */ #ifndef __KVM_VIRTIO_CCW_H diff --git a/arch/s390/include/uapi/asm/zcrypt.h b/arch/s390/include/uapi/asm/zcrypt.h index 137ef473584e..d568307321fc 100644 --- a/arch/s390/include/uapi/asm/zcrypt.h +++ b/arch/s390/include/uapi/asm/zcrypt.h @@ -9,20 +9,6 @@ * Eric Rossman (edrossma@us.ibm.com) * * Hotplug & misc device support: Jochen Roehrig (roehrig@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifndef __ASM_S390_ZCRYPT_H diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 83bc82001c06..909bce65cb2b 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -59,7 +59,7 @@ obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o -obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o +obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o extra-y += head.o head64.o vmlinux.lds @@ -77,10 +77,9 @@ obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UPROBES) += uprobes.o -obj-$(CONFIG_ALTERNATIVES) += alternative.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o -obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o +obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_TRACEPOINTS) += trace.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 33ec80df7ed4..587b195b588d 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -171,6 +171,7 @@ int main(void) OFFSET(__LC_RESTART_DATA, lowcore, restart_data); OFFSET(__LC_RESTART_SOURCE, lowcore, restart_source); OFFSET(__LC_USER_ASCE, lowcore, user_asce); + OFFSET(__LC_VDSO_ASCE, lowcore, vdso_asce); OFFSET(__LC_LPP, lowcore, lpp); OFFSET(__LC_CURRENT_PID, lowcore, current_pid); OFFSET(__LC_PERCPU_OFFSET, lowcore, percpu_offset); @@ -178,7 +179,6 @@ int main(void) OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); - OFFSET(__LC_PASTE, lowcore, paste); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index a4a1208e3df3..ef246940b44c 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -50,19 +50,6 @@ typedef struct struct ucontext32 uc; } rt_sigframe32; -static inline void sigset_to_sigset32(unsigned long *set64, - compat_sigset_word *set32) -{ - set32[0] = (compat_sigset_word) set64[0]; - set32[1] = (compat_sigset_word)(set64[0] >> 32); -} - -static inline void sigset32_to_sigset(compat_sigset_word *set32, - unsigned long *set64) -{ - set64[0] = (unsigned long) set32[0] | ((unsigned long) set32[1] << 32); -} - int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; @@ -294,12 +281,10 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; - compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&cset.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) + if (get_compat_sigset(&set, (compat_sigset_t __user *)frame->sc.oldmask)) goto badframe; - sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); save_fpu_regs(); if (restore_sigregs32(regs, &frame->sregs)) @@ -317,12 +302,10 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; - compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&cset, &frame->uc.uc_sigmask, sizeof(cset))) + if (get_compat_sigset(&set, &frame->uc.uc_sigmask)) goto badframe; - sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; @@ -372,7 +355,6 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, { int sig = ksig->sig; sigframe32 __user *frame; - struct sigcontext32 sc; unsigned long restorer; size_t frame_size; @@ -394,9 +376,10 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, return -EFAULT; /* Create struct sigcontext32 on the signal stack */ - sigset_to_sigset32(set->sig, sc.oldmask); - sc.sregs = (__u32)(unsigned long __force) &frame->sregs; - if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) + if (put_compat_sigset((compat_sigset_t __user *)frame->sc.oldmask, + set, sizeof(compat_sigset_t))) + return -EFAULT; + if (__put_user(ptr_to_compat(&frame->sc), &frame->sc.sregs)) return -EFAULT; /* Store registers needed to create the signal frame */ @@ -455,7 +438,6 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - compat_sigset_t cset; rt_sigframe32 __user *frame; unsigned long restorer; size_t frame_size; @@ -502,12 +484,11 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, store_sigregs(); /* Create ucontext on the signal stack. */ - sigset_to_sigset32(set->sig, cset.sig); if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(0, &frame->uc.uc_link) || __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs32(regs, &frame->uc.uc_mcontext) || - __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) || + put_compat_sigset(&frame->uc.uc_sigmask, set, sizeof(compat_sigset_t)) || save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 58b9e127b615..80e974adb9e8 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1392,7 +1392,7 @@ int debug_dflt_header_fn(debug_info_t *id, struct debug_view *view, else except_str = "-"; caller = (unsigned long) entry->caller; - rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %p ", + rc += sprintf(out_buf, "%02i %011ld:%06lu %1u %1s %02i %pK ", area, sec, usec, level, except_str, entry->id.fields.cpuid, (void *)caller); return rc; diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index b811d3a8417d..b2c68fbf2634 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Disassemble s390 instructions. * @@ -396,9 +397,14 @@ struct s390_insn *find_insn(unsigned char *code) unsigned char opfrag; int i; + /* Search the opcode offset table to find an entry which + * matches the beginning of the opcode. If there is no match + * the last entry will be used, which is the default entry for + * unknown instructions as well as 1-byte opcode instructions. + */ for (i = 0; i < ARRAY_SIZE(opcode_offset); i++) { entry = &opcode_offset[i]; - if (entry->opcode == code[0] || entry->opcode == 0) + if (entry->opcode == code[0]) break; } @@ -480,7 +486,7 @@ void show_code(struct pt_regs *regs) { char *mode = user_mode(regs) ? "User" : "Krnl"; unsigned char code[64]; - char buffer[64], *ptr; + char buffer[128], *ptr; mm_segment_t old_fs; unsigned long addr; int start, end, opsize, hops, i; diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 2aa545dca4d5..5b23c4f6e50c 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack dumping functions * diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index f498d201f98d..9e5f6cd8e4c2 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -180,18 +180,17 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) */ ENTRY(__switch_to) stmg %r6,%r15,__SF_GPRS(%r15) # store gprs of prev task - lgr %r1,%r2 - aghi %r1,__TASK_thread # thread_struct of prev task - lg %r5,__TASK_stack(%r3) # start of kernel stack of next - stg %r15,__THREAD_ksp(%r1) # store kernel stack of prev - lgr %r1,%r3 - aghi %r1,__TASK_thread # thread_struct of next task + lghi %r4,__TASK_stack + lghi %r1,__TASK_thread + lg %r5,0(%r4,%r3) # start of kernel stack of next + stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lgr %r15,%r5 aghi %r15,STACK_INIT # end of kernel stack of next stg %r3,__LC_CURRENT # store task struct of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack - lg %r15,__THREAD_ksp(%r1) # load kernel stack of next - mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next + lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next + aghi %r3,__TASK_pid + mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP bzr %r14 @@ -379,13 +378,21 @@ ENTRY(system_call) jg s390_handle_mcck # TIF bit will be cleared by handler # -# _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce +# _CIF_ASCE_PRIMARY and/or _CIF_ASCE_SECONDARY set, load user space asce # .Lsysc_asce: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY + lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce + TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY + jz .Lsysc_return +#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES + tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? + jnz .Lsysc_set_fs_fixup ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY - jz .Lsysc_return + j .Lsysc_return +.Lsysc_set_fs_fixup: +#endif larl %r14,.Lsysc_return jg set_fs_fixup @@ -518,6 +525,7 @@ ENTRY(pgm_check_handler) stmg %r8,%r15,__LC_SAVE_AREA_SYNC lg %r10,__LC_LAST_BREAK lg %r12,__LC_CURRENT + lghi %r11,0 larl %r13,cleanup_critical lmg %r8,%r9,__LC_PGM_OLD_PSW tmhh %r8,0x0001 # test problem state bit @@ -532,6 +540,7 @@ ENTRY(pgm_check_handler) ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit + lghi %r11,_PIF_GUEST_FAULT #endif 0: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 1f # -> enabled, can't be a double fault @@ -549,13 +558,14 @@ ENTRY(pgm_check_handler) jz 3f mvc __THREAD_trap_tdb(256,%r14),0(%r13) 3: stg %r10,__THREAD_last_break(%r14) -4: la %r11,STACK_FRAME_OVERHEAD(%r15) +4: lgr %r13,%r11 + la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC stmg %r8,%r9,__PT_PSW(%r11) mvc __PT_INT_CODE(4,%r11),__LC_PGM_ILC mvc __PT_INT_PARM_LONG(8,%r11),__LC_TRANS_EXC_CODE - xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) + stg %r13,__PT_FLAGS(%r11) stg %r10,__PT_ARGS(%r11) tm __LC_PGM_ILC+3,0x80 # check for per exception jz 5f @@ -738,10 +748,18 @@ ENTRY(io_int_handler) # _CIF_ASCE_PRIMARY and/or CIF_ASCE_SECONDARY set, load user space asce # .Lio_asce: + ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_SECONDARY + lctlg %c7,%c7,__LC_VDSO_ASCE # load secondary asce + TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_PRIMARY + jz .Lio_return +#ifndef CONFIG_HAVE_MARCH_Z10_FEATURES + tm __LC_STFLE_FAC_LIST+3,0x10 # has MVCOS ? + jnz .Lio_set_fs_fixup ni __LC_CPU_FLAGS+7,255-_CIF_ASCE_PRIMARY lctlg %c1,%c1,__LC_USER_ASCE # load primary asce - TSTMSK __LC_CPU_FLAGS,_CIF_ASCE_SECONDARY - jz .Lio_return + j .Lio_return +.Lio_set_fs_fixup: +#endif larl %r14,.Lio_return jg set_fs_fixup diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 172002da7075..38a973ccf501 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -28,7 +28,7 @@ ENTRY(startup_continue) lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area # move IPL device to lowcore - lghi %r0,__LC_PASTE + larl %r0,boot_vdso_data stg %r0,__LC_VDSO_PER_CPU # # Setup stack diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 310e59e6eb4b..8ecb8726ac47 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ipl/reipl/dump support for Linux on s390. * diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 1a6521af1751..af3722c28fd9 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -1,20 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel Probes (KProbes) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * * Copyright IBM Corp. 2002, 2006 * * s390 port, used ppc64 as template. Mike Grundy <grundym@us.ibm.com> diff --git a/arch/s390/kernel/lgr.c b/arch/s390/kernel/lgr.c index bf9622f0e6b1..452502f9a0d9 100644 --- a/arch/s390/kernel/lgr.c +++ b/arch/s390/kernel/lgr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Linux Guest Relocation (LGR) detection * diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 6d9f73bb4142..b7abfad4fd7d 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Kernel module help for s390. * @@ -8,20 +9,6 @@ * * based on i386 version * Copyright (C) 2001 Rusty Russell. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include <linux/module.h> #include <linux/elf.h> @@ -433,16 +420,13 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *s; char *secstrings; - if (IS_ENABLED(CONFIG_ALTERNATIVES)) { - secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - if (!strcmp(".altinstructions", - secstrings + s->sh_name)) { - /* patch .altinstructions */ - void *aseg = (void *)s->sh_addr; + secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { + if (!strcmp(".altinstructions", secstrings + s->sh_name)) { + /* patch .altinstructions */ + void *aseg = (void *)s->sh_addr; - apply_alternatives(aseg, aseg + s->sh_size); - } + apply_alternatives(aseg, aseg + s->sh_size); } } diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 3f3cda41f32a..c7a627620e5e 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Machine check handler * @@ -191,7 +192,6 @@ static int notrace s390_check_registers(union mci mci, int umode) { union ctlreg2 cr2; int kill_task; - void *fpt_save_area; kill_task = 0; @@ -224,7 +224,6 @@ static int notrace s390_check_registers(union mci mci, int umode) if (!test_cpu_flag(CIF_FPU)) kill_task = 1; } - fpt_save_area = &S390_lowcore.floating_pt_save_area; if (!mci.fc) { /* * Floating point control register can't be restored. diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 746d03423333..cc085e2d2ce9 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x - CPU-measurement Counter Facility * * Copyright IBM Corp. 2012, 2017 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_cf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index bd4bbf61aaf3..1c9ddd7aa5ec 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for the System z CPU-measurement Sampling Facility * * Copyright IBM Corp. 2013 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "cpum_sf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt @@ -15,6 +12,7 @@ #include <linux/kernel_stat.h> #include <linux/perf_event.h> #include <linux/percpu.h> +#include <linux/pid.h> #include <linux/notifier.h> #include <linux/export.h> #include <linux/slab.h> @@ -77,6 +75,15 @@ struct sf_buffer { unsigned long *tail; /* last sample-data-block-table */ }; +struct aux_buffer { + struct sf_buffer sfb; + unsigned long head; /* index of SDB of buffer head */ + unsigned long alert_mark; /* index of SDB of alert request position */ + unsigned long empty_mark; /* mark of SDB not marked full */ + unsigned long *sdb_index; /* SDB address for fast lookup */ + unsigned long *sdbt_index; /* SDBT address for fast lookup */ +}; + struct cpu_hw_sf { /* CPU-measurement sampling information block */ struct hws_qsi_info_block qsi; @@ -85,6 +92,7 @@ struct cpu_hw_sf { struct sf_buffer sfb; /* Sampling buffer */ unsigned int flags; /* Status flags */ struct perf_event *event; /* Scheduled perf event */ + struct perf_output_handle handle; /* AUX buffer output handle */ }; static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); @@ -341,22 +349,6 @@ static void sfb_init_allocs(unsigned long num, struct hw_perf_event *hwc) sfb_account_allocs(num, hwc); } -static size_t event_sample_size(struct hw_perf_event *hwc) -{ - struct sf_raw_sample *sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); - size_t sample_size; - - /* The sample size depends on the sampling function: The basic-sampling - * function must be always enabled, diagnostic-sampling function is - * optional. - */ - sample_size = sfr->bsdes; - if (SAMPL_DIAG_MODE(hwc)) - sample_size += sfr->dsdes; - - return sample_size; -} - static void deallocate_buffers(struct cpu_hw_sf *cpuhw) { if (cpuhw->sfb.sdbt) @@ -366,35 +358,7 @@ static void deallocate_buffers(struct cpu_hw_sf *cpuhw) static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) { unsigned long n_sdb, freq, factor; - size_t sfr_size, sample_size; - struct sf_raw_sample *sfr; - - /* Allocate raw sample buffer - * - * The raw sample buffer is used to temporarily store sampling data - * entries for perf raw sample processing. The buffer size mainly - * depends on the size of diagnostic-sampling data entries which is - * machine-specific. The exact size calculation includes: - * 1. The first 4 bytes of diagnostic-sampling data entries are - * already reflected in the sf_raw_sample structure. Subtract - * these bytes. - * 2. The perf raw sample data must be 8-byte aligned (u64) and - * perf's internal data size must be considered too. So add - * an additional u32 for correct alignment and subtract before - * allocating the buffer. - * 3. Store the raw sample buffer pointer in the perf event - * hardware structure. - */ - sfr_size = ALIGN((sizeof(*sfr) - sizeof(sfr->diag) + cpuhw->qsi.dsdes) + - sizeof(u32), sizeof(u64)); - sfr_size -= sizeof(u32); - sfr = kzalloc(sfr_size, GFP_KERNEL); - if (!sfr) - return -ENOMEM; - sfr->size = sfr_size; - sfr->bsdes = cpuhw->qsi.bsdes; - sfr->dsdes = cpuhw->qsi.dsdes; - RAWSAMPLE_REG(hwc) = (unsigned long) sfr; + size_t sample_size; /* Calculate sampling buffers using 4K pages * @@ -420,7 +384,7 @@ static int allocate_buffers(struct cpu_hw_sf *cpuhw, struct hw_perf_event *hwc) * ensure a minimum of CPUM_SF_MIN_SDBT (one table can manage up * to 511 SDBs). */ - sample_size = event_sample_size(hwc); + sample_size = sizeof(struct hws_basic_entry); freq = sample_rate_to_freq(&cpuhw->qsi, SAMPL_RATE(hwc)); factor = 1; n_sdb = DIV_ROUND_UP(freq, factor * ((PAGE_SIZE-64) / sample_size)); @@ -619,10 +583,6 @@ static int reserve_pmc_hardware(void) static void hw_perf_event_destroy(struct perf_event *event) { - /* Free raw sample buffer */ - if (RAWSAMPLE_REG(&event->hw)) - kfree((void *) RAWSAMPLE_REG(&event->hw)); - /* Release PMC if this is the last perf event */ if (!atomic_add_unless(&num_events, -1, 1)) { mutex_lock(&pmc_reserve_mutex); @@ -642,15 +602,8 @@ static void hw_init_period(struct hw_perf_event *hwc, u64 period) static void hw_reset_registers(struct hw_perf_event *hwc, unsigned long *sdbt_origin) { - struct sf_raw_sample *sfr; - /* (Re)set to first sample-data-block-table */ TEAR_REG(hwc) = (unsigned long) sdbt_origin; - - /* (Re)set raw sampling buffer register */ - sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(hwc); - memset(&sfr->basic, 0, sizeof(sfr->basic)); - memset(&sfr->diag, 0, sfr->dsdes); } static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, @@ -660,6 +613,67 @@ static unsigned long hw_limit_rate(const struct hws_qsi_info_block *si, si->min_sampl_rate, si->max_sampl_rate); } +static u32 cpumsf_pid_type(struct perf_event *event, + u32 pid, enum pid_type type) +{ + struct task_struct *tsk; + + /* Idle process */ + if (!pid) + goto out; + + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + pid = -1; + if (tsk) { + /* + * Only top level events contain the pid namespace in which + * they are created. + */ + if (event->parent) + event = event->parent; + pid = __task_pid_nr_ns(tsk, type, event->ns); + /* + * See also 1d953111b648 + * "perf/core: Don't report zero PIDs for exiting tasks". + */ + if (!pid && !pid_alive(tsk)) + pid = -1; + } +out: + return pid; +} + +static void cpumsf_output_event_pid(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + u32 pid; + struct perf_event_header header; + struct perf_output_handle handle; + + /* + * Obtain the PID from the basic-sampling data entry and + * correct the data->tid_entry.pid value. + */ + pid = data->tid_entry.pid; + + /* Protect callchain buffers, tasks */ + rcu_read_lock(); + + perf_prepare_sample(&header, data, event, regs); + if (perf_output_begin(&handle, event, header.size)) + goto out; + + /* Update the process ID (see also kernel/events/core.c) */ + data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID); + data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID); + + perf_output_sample(&handle, &header, data, event); + perf_output_end(&handle); +out: + rcu_read_unlock(); +} + static int __hw_perf_event_init(struct perf_event *event) { struct cpu_hw_sf *cpuhw; @@ -770,6 +784,10 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->extra_reg.reg = REG_OVERFLOW; OVERFLOW_REG(hwc) = 0; + /* Use AUX buffer. No need to allocate it by ourself */ + if (attr->config == PERF_EVENT_CPUM_SF_DIAG) + return 0; + /* Allocate the per-CPU sampling buffer using the CPU information * from the event. If the event is not pinned to a particular * CPU (event->cpu == -1; or cpuhw == NULL), allocate sampling @@ -789,6 +807,14 @@ static int __hw_perf_event_init(struct perf_event *event) break; } } + + /* If PID/TID sampling is active, replace the default overflow + * handler to extract and resolve the PIDs from the basic-sampling + * data entries. + */ + if (event->attr.sample_type & PERF_SAMPLE_TID) + if (is_default_overflow_handler(event)) + event->overflow_handler = cpumsf_output_event_pid; out: return err; } @@ -866,10 +892,15 @@ static void cpumsf_pmu_enable(struct pmu *pmu) */ if (cpuhw->event) { hwc = &cpuhw->event->hw; - /* Account number of overflow-designated buffer extents */ - sfb_account_overflows(cpuhw, hwc); - if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) - extend_sampling_buffer(&cpuhw->sfb, hwc); + if (!(SAMPL_DIAG_MODE(hwc))) { + /* + * Account number of overflow-designated + * buffer extents + */ + sfb_account_overflows(cpuhw, hwc); + if (sfb_has_pending_allocs(&cpuhw->sfb, hwc)) + extend_sampling_buffer(&cpuhw->sfb, hwc); + } } /* (Re)enable the PMU and sampling facility */ @@ -884,6 +915,9 @@ static void cpumsf_pmu_enable(struct pmu *pmu) return; } + /* Load current program parameter */ + lpp(&S390_lowcore.lpp); + debug_sprintf_event(sfdbg, 6, "pmu_enable: es=%i cs=%i ed=%i cd=%i " "tear=%p dear=%p\n", cpuhw->lsctl.es, cpuhw->lsctl.cs, cpuhw->lsctl.ed, cpuhw->lsctl.cd, @@ -967,22 +1001,16 @@ static int perf_exclude_event(struct perf_event *event, struct pt_regs *regs, * * Return non-zero if an event overflow occurred. */ -static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) +static int perf_push_sample(struct perf_event *event, + struct hws_basic_entry *basic) { int overflow; struct pt_regs regs; struct perf_sf_sde_regs *sde_regs; struct perf_sample_data data; - struct perf_raw_record raw = { - .frag = { - .size = sfr->size, - .data = sfr, - }, - }; /* Setup perf sample */ perf_sample_data_init(&data, 0, event->hw.last_period); - data.raw = &raw; /* Setup pt_regs to look like an CPU-measurement external interrupt * using the Program Request Alert code. The regs.int_parm_long @@ -994,11 +1022,11 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) regs.int_parm = CPU_MF_INT_SF_PRA; sde_regs = (struct perf_sf_sde_regs *) ®s.int_parm_long; - psw_bits(regs.psw).ia = sfr->basic.ia; - psw_bits(regs.psw).dat = sfr->basic.T; - psw_bits(regs.psw).wait = sfr->basic.W; - psw_bits(regs.psw).pstate = sfr->basic.P; - psw_bits(regs.psw).as = sfr->basic.AS; + psw_bits(regs.psw).ia = basic->ia; + psw_bits(regs.psw).dat = basic->T; + psw_bits(regs.psw).wait = basic->W; + psw_bits(regs.psw).pstate = basic->P; + psw_bits(regs.psw).as = basic->AS; /* * Use the hardware provided configuration level to decide if the @@ -1011,7 +1039,7 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) * If the value differs from 0xffff (the host value), we assume to * be a KVM guest. */ - switch (sfr->basic.CL) { + switch (basic->CL) { case 1: /* logical partition */ sde_regs->in_guest = 0; break; @@ -1019,11 +1047,17 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) sde_regs->in_guest = 1; break; default: /* old machine, use heuristics */ - if (sfr->basic.gpp || sfr->basic.prim_asn != 0xffff) + if (basic->gpp || basic->prim_asn != 0xffff) sde_regs->in_guest = 1; break; } + /* + * Store the PID value from the sample-data-entry to be + * processed and resolved by cpumsf_output_event_pid(). + */ + data.tid_entry.pid = basic->hpp & LPP_PID_MASK; + overflow = 0; if (perf_exclude_event(event, ®s, sde_regs)) goto out; @@ -1041,75 +1075,12 @@ static void perf_event_count_update(struct perf_event *event, u64 count) local64_add(count, &event->count); } -static int sample_format_is_valid(struct hws_combined_entry *sample, - unsigned int flags) -{ - if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) - /* Only basic-sampling data entries with data-entry-format - * version of 0x0001 can be processed. - */ - if (sample->basic.def != 0x0001) - return 0; - if (flags & PERF_CPUM_SF_DIAG_MODE) - /* The data-entry-format number of diagnostic-sampling data - * entries can vary. Because diagnostic data is just passed - * through, do only a sanity check on the DEF. - */ - if (sample->diag.def < 0x8001) - return 0; - return 1; -} - -static int sample_is_consistent(struct hws_combined_entry *sample, - unsigned long flags) -{ - /* This check applies only to basic-sampling data entries of potentially - * combined-sampling data entries. Invalid entries cannot be processed - * by the PMU and, thus, do not deliver an associated - * diagnostic-sampling data entry. - */ - if (unlikely(!(flags & PERF_CPUM_SF_BASIC_MODE))) - return 0; - /* - * Samples are skipped, if they are invalid or for which the - * instruction address is not predictable, i.e., the wait-state bit is - * set. - */ - if (sample->basic.I || sample->basic.W) - return 0; - return 1; -} - -static void reset_sample_slot(struct hws_combined_entry *sample, - unsigned long flags) -{ - if (likely(flags & PERF_CPUM_SF_BASIC_MODE)) - sample->basic.def = 0; - if (flags & PERF_CPUM_SF_DIAG_MODE) - sample->diag.def = 0; -} - -static void sfr_store_sample(struct sf_raw_sample *sfr, - struct hws_combined_entry *sample) -{ - if (likely(sfr->format & PERF_CPUM_SF_BASIC_MODE)) - sfr->basic = sample->basic; - if (sfr->format & PERF_CPUM_SF_DIAG_MODE) - memcpy(&sfr->diag, &sample->diag, sfr->dsdes); -} - -static void debug_sample_entry(struct hws_combined_entry *sample, - struct hws_trailer_entry *te, - unsigned long flags) +static void debug_sample_entry(struct hws_basic_entry *sample, + struct hws_trailer_entry *te) { debug_sprintf_event(sfdbg, 4, "hw_collect_samples: Found unknown " - "sampling data entry: te->f=%i basic.def=%04x (%p)" - " diag.def=%04x (%p)\n", te->f, - sample->basic.def, &sample->basic, - (flags & PERF_CPUM_SF_DIAG_MODE) - ? sample->diag.def : 0xFFFF, - (flags & PERF_CPUM_SF_DIAG_MODE) - ? &sample->diag : NULL); + "sampling data entry: te->f=%i basic.def=%04x (%p)\n", + te->f, sample->def, sample); } /* hw_collect_samples() - Walk through a sample-data-block and collect samples @@ -1135,44 +1106,37 @@ static void debug_sample_entry(struct hws_combined_entry *sample, static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, unsigned long long *overflow) { - unsigned long flags = SAMPL_FLAGS(&event->hw); - struct hws_combined_entry *sample; struct hws_trailer_entry *te; - struct sf_raw_sample *sfr; - size_t sample_size; + struct hws_basic_entry *sample; - /* Prepare and initialize raw sample data */ - sfr = (struct sf_raw_sample *) RAWSAMPLE_REG(&event->hw); - sfr->format = flags & PERF_CPUM_SF_MODE_MASK; - - sample_size = event_sample_size(&event->hw); te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); - sample = (struct hws_combined_entry *) *sdbt; + sample = (struct hws_basic_entry *) *sdbt; while ((unsigned long *) sample < (unsigned long *) te) { /* Check for an empty sample */ - if (!sample->basic.def) + if (!sample->def) break; /* Update perf event period */ perf_event_count_update(event, SAMPL_RATE(&event->hw)); - /* Check sampling data entry */ - if (sample_format_is_valid(sample, flags)) { + /* Check whether sample is valid */ + if (sample->def == 0x0001) { /* If an event overflow occurred, the PMU is stopped to * throttle event delivery. Remaining sample data is * discarded. */ if (!*overflow) { - if (sample_is_consistent(sample, flags)) { + /* Check whether sample is consistent */ + if (sample->I == 0 && sample->W == 0) { /* Deliver sample data to perf */ - sfr_store_sample(sfr, sample); - *overflow = perf_push_sample(event, sfr); + *overflow = perf_push_sample(event, + sample); } } else /* Count discarded samples */ *overflow += 1; } else { - debug_sample_entry(sample, te, flags); + debug_sample_entry(sample, te); /* Sample slot is not yet written or other record. * * This condition can occur if the buffer was reused @@ -1188,8 +1152,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, } /* Reset sample slot and advance to next sample */ - reset_sample_slot(sample, flags); - sample += sample_size; + sample->def = 0; + sample++; } } @@ -1215,6 +1179,13 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) unsigned long long event_overflow, sampl_overflow, num_sdb, te_flags; int done; + /* + * AUX buffer is used when in diagnostic sampling mode. + * No perf events/samples are created. + */ + if (SAMPL_DIAG_MODE(&event->hw)) + return; + if (flush_all && SDB_FULL_BLOCKS(hwc)) flush_all = 0; @@ -1291,6 +1262,439 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) sampl_overflow, event_overflow); } +#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) +#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0) +#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark) +#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark) + +/* + * Get trailer entry by index of SDB. + */ +static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux, + unsigned long index) +{ + unsigned long sdb; + + index = AUX_SDB_INDEX(aux, index); + sdb = aux->sdb_index[index]; + return (struct hws_trailer_entry *)trailer_entry_ptr(sdb); +} + +/* + * Finish sampling on the cpu. Called by cpumsf_pmu_del() with pmu + * disabled. Collect the full SDBs in AUX buffer which have not reached + * the point of alert indicator. And ignore the SDBs which are not + * full. + * + * 1. Scan SDBs to see how much data is there and consume them. + * 2. Remove alert indicator in the buffer. + */ +static void aux_output_end(struct perf_output_handle *handle) +{ + unsigned long i, range_scan, idx; + struct aux_buffer *aux; + struct hws_trailer_entry *te; + + aux = perf_get_aux(handle); + if (!aux) + return; + + range_scan = AUX_SDB_NUM_ALERT(aux); + for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + if (!(te->flags & SDB_TE_BUFFER_FULL_MASK)) + break; + } + /* i is num of SDBs which are full */ + perf_aux_output_end(handle, i << PAGE_SHIFT); + + /* Remove alert indicators in the buffer */ + te = aux_sdb_trailer(aux, aux->alert_mark); + te->flags &= ~SDB_TE_ALERT_REQ_MASK; + + debug_sprintf_event(sfdbg, 6, "aux_output_end: collect %lx SDBs\n", i); +} + +/* + * Start sampling on the CPU. Called by cpumsf_pmu_add() when an event + * is first added to the CPU or rescheduled again to the CPU. It is called + * with pmu disabled. + * + * 1. Reset the trailer of SDBs to get ready for new data. + * 2. Tell the hardware where to put the data by reset the SDBs buffer + * head(tear/dear). + */ +static int aux_output_begin(struct perf_output_handle *handle, + struct aux_buffer *aux, + struct cpu_hw_sf *cpuhw) +{ + unsigned long range; + unsigned long i, range_scan, idx; + unsigned long head, base, offset; + struct hws_trailer_entry *te; + + if (WARN_ON_ONCE(handle->head & ~PAGE_MASK)) + return -EINVAL; + + aux->head = handle->head >> PAGE_SHIFT; + range = (handle->size + 1) >> PAGE_SHIFT; + if (range <= 1) + return -ENOMEM; + + /* + * SDBs between aux->head and aux->empty_mark are already ready + * for new data. range_scan is num of SDBs not within them. + */ + if (range > AUX_SDB_NUM_EMPTY(aux)) { + range_scan = range - AUX_SDB_NUM_EMPTY(aux); + idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + te->flags = te->flags & ~SDB_TE_BUFFER_FULL_MASK; + te->flags = te->flags & ~SDB_TE_ALERT_REQ_MASK; + te->overflow = 0; + } + /* Save the position of empty SDBs */ + aux->empty_mark = aux->head + range - 1; + } + + /* Set alert indicator */ + aux->alert_mark = aux->head + range/2 - 1; + te = aux_sdb_trailer(aux, aux->alert_mark); + te->flags = te->flags | SDB_TE_ALERT_REQ_MASK; + + /* Reset hardware buffer head */ + head = AUX_SDB_INDEX(aux, aux->head); + base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE]; + offset = head % CPUM_SF_SDB_PER_TABLE; + cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); + cpuhw->lsctl.dear = aux->sdb_index[head]; + + debug_sprintf_event(sfdbg, 6, "aux_output_begin: " + "head->alert_mark->empty_mark (num_alert, range)" + "[%lx -> %lx -> %lx] (%lx, %lx) " + "tear index %lx, tear %lx dear %lx\n", + aux->head, aux->alert_mark, aux->empty_mark, + AUX_SDB_NUM_ALERT(aux), range, + head / CPUM_SF_SDB_PER_TABLE, + cpuhw->lsctl.tear, + cpuhw->lsctl.dear); + + return 0; +} + +/* + * Set alert indicator on SDB at index @alert_index while sampler is running. + * + * Return true if successfully. + * Return false if full indicator is already set by hardware sampler. + */ +static bool aux_set_alert(struct aux_buffer *aux, unsigned long alert_index, + unsigned long long *overflow) +{ + unsigned long long orig_overflow, orig_flags, new_flags; + struct hws_trailer_entry *te; + + te = aux_sdb_trailer(aux, alert_index); + do { + orig_flags = te->flags; + orig_overflow = te->overflow; + *overflow = orig_overflow; + if (orig_flags & SDB_TE_BUFFER_FULL_MASK) { + /* + * SDB is already set by hardware. + * Abort and try to set somewhere + * behind. + */ + return false; + } + new_flags = orig_flags | SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + orig_flags, orig_overflow, + new_flags, 0ULL)); + return true; +} + +/* + * aux_reset_buffer() - Scan and setup SDBs for new samples + * @aux: The AUX buffer to set + * @range: The range of SDBs to scan started from aux->head + * @overflow: Set to overflow count + * + * Set alert indicator on the SDB at index of aux->alert_mark. If this SDB is + * marked as empty, check if it is already set full by the hardware sampler. + * If yes, that means new data is already there before we can set an alert + * indicator. Caller should try to set alert indicator to some position behind. + * + * Scan the SDBs in AUX buffer from behind aux->empty_mark. They are used + * previously and have already been consumed by user space. Reset these SDBs + * (clear full indicator and alert indicator) for new data. + * If aux->alert_mark fall in this area, just set it. Overflow count is + * recorded while scanning. + * + * SDBs between aux->head and aux->empty_mark are already reset at last time. + * and ready for new samples. So scanning on this area could be skipped. + * + * Return true if alert indicator is set successfully and false if not. + */ +static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, + unsigned long long *overflow) +{ + unsigned long long orig_overflow, orig_flags, new_flags; + unsigned long i, range_scan, idx; + struct hws_trailer_entry *te; + + if (range <= AUX_SDB_NUM_EMPTY(aux)) + /* + * No need to scan. All SDBs in range are marked as empty. + * Just set alert indicator. Should check race with hardware + * sampler. + */ + return aux_set_alert(aux, aux->alert_mark, overflow); + + if (aux->alert_mark <= aux->empty_mark) + /* + * Set alert indicator on empty SDB. Should check race + * with hardware sampler. + */ + if (!aux_set_alert(aux, aux->alert_mark, overflow)) + return false; + + /* + * Scan the SDBs to clear full and alert indicator used previously. + * Start scanning from one SDB behind empty_mark. If the new alert + * indicator fall into this range, set it. + */ + range_scan = range - AUX_SDB_NUM_EMPTY(aux); + idx = aux->empty_mark + 1; + for (i = 0; i < range_scan; i++, idx++) { + te = aux_sdb_trailer(aux, idx); + do { + orig_flags = te->flags; + orig_overflow = te->overflow; + new_flags = orig_flags & ~SDB_TE_BUFFER_FULL_MASK; + if (idx == aux->alert_mark) + new_flags |= SDB_TE_ALERT_REQ_MASK; + else + new_flags &= ~SDB_TE_ALERT_REQ_MASK; + } while (!cmpxchg_double(&te->flags, &te->overflow, + orig_flags, orig_overflow, + new_flags, 0ULL)); + *overflow += orig_overflow; + } + + /* Update empty_mark to new position */ + aux->empty_mark = aux->head + range - 1; + + return true; +} + +/* + * Measurement alert handler for diagnostic mode sampling. + */ +static void hw_collect_aux(struct cpu_hw_sf *cpuhw) +{ + struct aux_buffer *aux; + int done = 0; + unsigned long range = 0, size; + unsigned long long overflow = 0; + struct perf_output_handle *handle = &cpuhw->handle; + unsigned long num_sdb; + + aux = perf_get_aux(handle); + if (WARN_ON_ONCE(!aux)) + return; + + /* Inform user space new data arrived */ + size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + perf_aux_output_end(handle, size); + num_sdb = aux->sfb.num_sdb; + + while (!done) { + /* Get an output handle */ + aux = perf_aux_output_begin(handle, cpuhw->event); + if (handle->size == 0) { + pr_err("The AUX buffer with %lu pages for the " + "diagnostic-sampling mode is full\n", + num_sdb); + debug_sprintf_event(sfdbg, 1, "AUX buffer used up\n"); + break; + } + if (WARN_ON_ONCE(!aux)) + return; + + /* Update head and alert_mark to new position */ + aux->head = handle->head >> PAGE_SHIFT; + range = (handle->size + 1) >> PAGE_SHIFT; + if (range == 1) + aux->alert_mark = aux->head; + else + aux->alert_mark = aux->head + range/2 - 1; + + if (aux_reset_buffer(aux, range, &overflow)) { + if (!overflow) { + done = 1; + break; + } + size = range << PAGE_SHIFT; + perf_aux_output_end(&cpuhw->handle, size); + pr_err("Sample data caused the AUX buffer with %lu " + "pages to overflow\n", num_sdb); + debug_sprintf_event(sfdbg, 1, "head %lx range %lx " + "overflow %llx\n", + aux->head, range, overflow); + } else { + size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + perf_aux_output_end(&cpuhw->handle, size); + debug_sprintf_event(sfdbg, 6, "head %lx alert %lx " + "already full, try another\n", + aux->head, aux->alert_mark); + } + } + + if (done) + debug_sprintf_event(sfdbg, 6, "aux_reset_buffer: " + "[%lx -> %lx -> %lx] (%lx, %lx)\n", + aux->head, aux->alert_mark, aux->empty_mark, + AUX_SDB_NUM_ALERT(aux), range); +} + +/* + * Callback when freeing AUX buffers. + */ +static void aux_buffer_free(void *data) +{ + struct aux_buffer *aux = data; + unsigned long i, num_sdbt; + + if (!aux) + return; + + /* Free SDBT. SDB is freed by the caller */ + num_sdbt = aux->sfb.num_sdbt; + for (i = 0; i < num_sdbt; i++) + free_page(aux->sdbt_index[i]); + + kfree(aux->sdbt_index); + kfree(aux->sdb_index); + kfree(aux); + + debug_sprintf_event(sfdbg, 4, "aux_buffer_free: free " + "%lu SDBTs\n", num_sdbt); +} + +/* + * aux_buffer_setup() - Setup AUX buffer for diagnostic mode sampling + * @cpu: On which to allocate, -1 means current + * @pages: Array of pointers to buffer pages passed from perf core + * @nr_pages: Total pages + * @snapshot: Flag for snapshot mode + * + * This is the callback when setup an event using AUX buffer. Perf tool can + * trigger this by an additional mmap() call on the event. Unlike the buffer + * for basic samples, AUX buffer belongs to the event. It is scheduled with + * the task among online cpus when it is a per-thread event. + * + * Return the private AUX buffer structure if success or NULL if fails. + */ +static void *aux_buffer_setup(int cpu, void **pages, int nr_pages, + bool snapshot) +{ + struct sf_buffer *sfb; + struct aux_buffer *aux; + unsigned long *new, *tail; + int i, n_sdbt; + + if (!nr_pages || !pages) + return NULL; + + if (nr_pages > CPUM_SF_MAX_SDB * CPUM_SF_SDB_DIAG_FACTOR) { + pr_err("AUX buffer size (%i pages) is larger than the " + "maximum sampling buffer limit\n", + nr_pages); + return NULL; + } else if (nr_pages < CPUM_SF_MIN_SDB * CPUM_SF_SDB_DIAG_FACTOR) { + pr_err("AUX buffer size (%i pages) is less than the " + "minimum sampling buffer limit\n", + nr_pages); + return NULL; + } + + /* Allocate aux_buffer struct for the event */ + aux = kmalloc(sizeof(struct aux_buffer), GFP_KERNEL); + if (!aux) + goto no_aux; + sfb = &aux->sfb; + + /* Allocate sdbt_index for fast reference */ + n_sdbt = (nr_pages + CPUM_SF_SDB_PER_TABLE - 1) / CPUM_SF_SDB_PER_TABLE; + aux->sdbt_index = kmalloc_array(n_sdbt, sizeof(void *), GFP_KERNEL); + if (!aux->sdbt_index) + goto no_sdbt_index; + + /* Allocate sdb_index for fast reference */ + aux->sdb_index = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL); + if (!aux->sdb_index) + goto no_sdb_index; + + /* Allocate the first SDBT */ + sfb->num_sdbt = 0; + sfb->sdbt = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!sfb->sdbt) + goto no_sdbt; + aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)sfb->sdbt; + tail = sfb->tail = sfb->sdbt; + + /* + * Link the provided pages of AUX buffer to SDBT. + * Allocate SDBT if needed. + */ + for (i = 0; i < nr_pages; i++, tail++) { + if (require_table_link(tail)) { + new = (unsigned long *) get_zeroed_page(GFP_KERNEL); + if (!new) + goto no_sdbt; + aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new; + /* Link current page to tail of chain */ + *tail = (unsigned long)(void *) new + 1; + tail = new; + } + /* Tail is the entry in a SDBT */ + *tail = (unsigned long)pages[i]; + aux->sdb_index[i] = (unsigned long)pages[i]; + } + sfb->num_sdb = nr_pages; + + /* Link the last entry in the SDBT to the first SDBT */ + *tail = (unsigned long) sfb->sdbt + 1; + sfb->tail = tail; + + /* + * Initial all SDBs are zeroed. Mark it as empty. + * So there is no need to clear the full indicator + * when this event is first added. + */ + aux->empty_mark = sfb->num_sdb - 1; + + debug_sprintf_event(sfdbg, 4, "aux_buffer_setup: setup %lu SDBTs" + " and %lu SDBs\n", + sfb->num_sdbt, sfb->num_sdb); + + return aux; + +no_sdbt: + /* SDBs (AUX buffer pages) are freed by caller */ + for (i = 0; i < sfb->num_sdbt; i++) + free_page(aux->sdbt_index[i]); + kfree(aux->sdb_index); +no_sdb_index: + kfree(aux->sdbt_index); +no_sdbt_index: + kfree(aux); +no_aux: + return NULL; +} + static void cpumsf_pmu_read(struct perf_event *event) { /* Nothing to do ... updates are interrupt-driven */ @@ -1342,12 +1746,13 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags) static int cpumsf_pmu_add(struct perf_event *event, int flags) { struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf); + struct aux_buffer *aux; int err; if (cpuhw->flags & PMU_F_IN_USE) return -EAGAIN; - if (!cpuhw->sfb.sdbt) + if (!SAMPL_DIAG_MODE(&event->hw) && !cpuhw->sfb.sdbt) return -EINVAL; err = 0; @@ -1362,10 +1767,12 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) */ cpuhw->lsctl.s = 0; cpuhw->lsctl.h = 1; - cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; - cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); - hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + if (!SAMPL_DIAG_MODE(&event->hw)) { + cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; + cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; + hw_reset_registers(&event->hw, cpuhw->sfb.sdbt); + } /* Ensure sampling functions are in the disabled state. If disabled, * switch on sampling enable control. */ @@ -1373,9 +1780,18 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) err = -EAGAIN; goto out; } - cpuhw->lsctl.es = 1; - if (SAMPL_DIAG_MODE(&event->hw)) + if (SAMPL_DIAG_MODE(&event->hw)) { + aux = perf_aux_output_begin(&cpuhw->handle, event); + if (!aux) { + err = -EINVAL; + goto out; + } + err = aux_output_begin(&cpuhw->handle, aux, cpuhw); + if (err) + goto out; cpuhw->lsctl.ed = 1; + } + cpuhw->lsctl.es = 1; /* Set in_use flag and store event */ cpuhw->event = event; @@ -1401,6 +1817,8 @@ static void cpumsf_pmu_del(struct perf_event *event, int flags) cpuhw->flags &= ~PMU_F_IN_USE; cpuhw->event = NULL; + if (SAMPL_DIAG_MODE(&event->hw)) + aux_output_end(&cpuhw->handle); perf_event_update_userpage(event); perf_pmu_enable(event->pmu); } @@ -1448,6 +1866,9 @@ static struct pmu cpumf_sampling = { .read = cpumsf_pmu_read, .attr_groups = cpumsf_pmu_attr_groups, + + .setup_aux = aux_buffer_setup, + .free_aux = aux_buffer_free, }; static void cpumf_measurement_alert(struct ext_code ext_code, @@ -1471,7 +1892,10 @@ static void cpumf_measurement_alert(struct ext_code ext_code, /* Program alert request */ if (alert & CPU_MF_INT_SF_PRA) { if (cpuhw->flags & PMU_F_IN_USE) - hw_perf_event_update(cpuhw->event, 0); + if (SAMPL_DIAG_MODE(&cpuhw->event->hw)) + hw_collect_aux(cpuhw); + else + hw_perf_event_update(cpuhw->event, 0); else WARN_ON_ONCE(!(cpuhw->flags & PMU_F_IN_USE)); } @@ -1590,6 +2014,9 @@ static int __init init_cpum_sampling_pmu(void) return -ENODEV; } + if (!si.as && !si.ad) + return -ENODEV; + if (si.bsdes != sizeof(struct hws_basic_entry)) { pr_cpumsf_err(RS_INIT_FAILURE_BSDES); return -EINVAL; diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 93a386f4a3b5..0d770e513abf 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Performance event support for s390x * * Copyright IBM Corp. 2012, 2013 * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #define KMSG_COMPONENT "perf" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt diff --git a/arch/s390/kernel/perf_regs.c b/arch/s390/kernel/perf_regs.c new file mode 100644 index 000000000000..f8603ebed669 --- /dev/null +++ b/arch/s390/kernel/perf_regs.c @@ -0,0 +1,70 @@ +#include <linux/perf_event.h> +#include <linux/perf_regs.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/bug.h> +#include <asm/ptrace.h> +#include <asm/fpu/api.h> +#include <asm/fpu/types.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + freg_t fp; + + if (WARN_ON_ONCE((u32)idx >= PERF_REG_S390_MAX)) + return 0; + + if (idx >= PERF_REG_S390_R0 && idx <= PERF_REG_S390_R15) + return regs->gprs[idx]; + + if (idx >= PERF_REG_S390_FP0 && idx <= PERF_REG_S390_FP15) { + if (!user_mode(regs)) + return 0; + + idx -= PERF_REG_S390_FP0; + fp = MACHINE_HAS_VX ? *(freg_t *)(current->thread.fpu.vxrs + idx) + : current->thread.fpu.fprs[idx]; + return fp.ui; + } + + if (idx == PERF_REG_S390_MASK) + return regs->psw.mask; + if (idx == PERF_REG_S390_PC) + return regs->psw.addr; + + return regs->gprs[idx]; +} + +#define REG_RESERVED (~((1UL << PERF_REG_S390_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (test_tsk_thread_flag(task, TIF_31BIT)) + return PERF_SAMPLE_REGS_ABI_32; + + return PERF_SAMPLE_REGS_ABI_64; +} + +void perf_get_regs_user(struct perf_regs *regs_user, + struct pt_regs *regs, + struct pt_regs *regs_user_copy) +{ + /* + * Use the regs from the first interruption and let + * perf_sample_regs_intr() handle interrupts (regs == get_irq_regs()). + * + * Also save FPU registers for user-space tasks only. + */ + regs_user->regs = task_pt_regs(current); + if (user_mode(regs_user->regs)) + save_fpu_regs(); + regs_user->abi = perf_reg_abi(current); +} diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 26c0523c1488..cd3df5514552 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -1651,6 +1651,14 @@ static const struct user_regset s390_compat_regsets[] = { .set = s390_gs_cb_set, }, { + .core_note_type = NT_S390_GS_BC, + .n = sizeof(struct gs_cb) / sizeof(__u64), + .size = sizeof(__u64), + .align = sizeof(__u64), + .get = s390_gs_bc_get, + .set = s390_gs_bc_set, + }, + { .core_note_type = NT_S390_RI_CB, .n = sizeof(struct runtime_instr_cb) / sizeof(__u64), .size = sizeof(__u64), diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 090053cf279b..793da97f9a6e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * S390 version * Copyright IBM Corp. 1999, 2012 diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index cd4334e80b64..b8c1a85bcf2d 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -55,6 +55,7 @@ #include <asm/sigp.h> #include <asm/idle.h> #include <asm/nmi.h> +#include <asm/topology.h> #include "entry.h" enum { diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index e66687dc6144..460dcfba7d4e 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Stack trace management functions * diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 12981e197f01..80b862e9c53c 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -1,10 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * store hypervisor information instruction emulation functions. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. - * * Copyright IBM Corp. 2016 * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com> */ diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 5cbd52169348..cf561160ea88 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Time of day based timer functions. * @@ -523,7 +524,7 @@ static void __init stp_reset(void) } } -static void stp_timeout(unsigned long dummy) +static void stp_timeout(struct timer_list *unused) { queue_work(time_sync_wq, &stp_work); } @@ -532,7 +533,7 @@ static int __init stp_init(void) { if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags)) return 0; - setup_timer(&stp_timer, stp_timeout, 0UL); + timer_setup(&stp_timer, stp_timeout, 0); time_init_wq(); if (!stp_online) return 0; diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index f9b393d4a078..4d5b65e527b5 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com> diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 0520854a4dab..f3a1c7c6824e 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -1,12 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * vdso setup for s390 * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <linux/init.h> @@ -158,16 +155,9 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; struct vdso_per_cpu_data *vd; - u32 *psal, *aste; - int i; - - lowcore->vdso_per_cpu_data = __LC_PASTE; - - if (!vdso_enabled) - return 0; segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER); - page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA); + page_table = get_zeroed_page(GFP_KERNEL); page_frame = get_zeroed_page(GFP_KERNEL); if (!segment_table || !page_table || !page_frame) goto out; @@ -179,25 +169,15 @@ int vdso_alloc_per_cpu(struct lowcore *lowcore) vd->cpu_nr = lowcore->cpu_nr; vd->node_id = cpu_to_node(vd->cpu_nr); - /* Set up access register mode page table */ + /* Set up page table for the vdso address space */ memset64((u64 *)segment_table, _SEGMENT_ENTRY_EMPTY, _CRST_ENTRIES); memset64((u64 *)page_table, _PAGE_INVALID, PTRS_PER_PTE); *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table; *(unsigned long *) page_table = _PAGE_PROTECT + page_frame; - psal = (u32 *) (page_table + 256*sizeof(unsigned long)); - aste = psal + 32; - - for (i = 4; i < 32; i += 4) - psal[i] = 0x80000000; - - lowcore->paste[4] = (u32)(addr_t) psal; - psal[0] = 0x02000000; - psal[2] = (u32)(addr_t) aste; - *(unsigned long *) (aste + 2) = segment_table + + lowcore->vdso_asce = segment_table + _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT; - aste[4] = (u32)(addr_t) psal; lowcore->vdso_per_cpu_data = page_frame; return 0; @@ -212,14 +192,8 @@ out: void vdso_free_per_cpu(struct lowcore *lowcore) { unsigned long segment_table, page_table, page_frame; - u32 *psal, *aste; - if (!vdso_enabled) - return; - - psal = (u32 *)(addr_t) lowcore->paste[4]; - aste = (u32 *)(addr_t) psal[2]; - segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK; + segment_table = lowcore->vdso_asce & PAGE_MASK; page_table = *(unsigned long *) segment_table; page_frame = *(unsigned long *) page_table; @@ -228,16 +202,6 @@ void vdso_free_per_cpu(struct lowcore *lowcore) free_pages(segment_table, SEGMENT_ORDER); } -static void vdso_init_cr5(void) -{ - unsigned long cr5; - - if (!vdso_enabled) - return; - cr5 = offsetof(struct lowcore, paste); - __ctl_load(cr5, 5, 5); -} - /* * This is called from binfmt_elf, we create the special vma for the * vDSO and insert it into the mm struct tree @@ -314,8 +278,6 @@ static int __init vdso_init(void) { int i; - if (!vdso_enabled) - return 0; vdso_init_data(vdso_data); #ifdef CONFIG_COMPAT /* Calculate the size of the 32 bit vDSO */ @@ -354,7 +316,6 @@ static int __init vdso_init(void) vdso64_pagelist[vdso64_pages] = NULL; if (vdso_alloc_per_cpu(&S390_lowcore)) BUG(); - vdso_init_cr5(); get_page(virt_to_page(vdso_data)); diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S index eca3f001f081..f61df5253c23 100644 --- a/arch/s390/kernel/vdso32/clock_getres.S +++ b/arch/s390/kernel/vdso32/clock_getres.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 32 bits processes in a * s390 kernel for use in the vDSO * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <asm/vdso.h> #include <asm/asm-offsets.h> diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S index a5769b83d90e..2d6ec3abe095 100644 --- a/arch/s390/kernel/vdso32/clock_gettime.S +++ b/arch/s390/kernel/vdso32/clock_gettime.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 32 bits processes in a * s390 kernel for use in the vDSO * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <asm/vdso.h> #include <asm/asm-offsets.h> diff --git a/arch/s390/kernel/vdso32/getcpu.S b/arch/s390/kernel/vdso32/getcpu.S index 6e30769dd017..5477a2c112fb 100644 --- a/arch/s390/kernel/vdso32/getcpu.S +++ b/arch/s390/kernel/vdso32/getcpu.S @@ -15,23 +15,11 @@ .type __kernel_getcpu,@function __kernel_getcpu: .cfi_startproc - ear %r1,%a4 - lhi %r4,1 - sll %r4,24 - sar %a4,%r4 la %r4,0 - epsw %r0,0 - sacf 512 + sacf 256 l %r5,__VDSO_CPU_NR(%r4) l %r4,__VDSO_NODE_ID(%r4) - tml %r0,0x4000 - jo 1f - tml %r0,0x8000 - jno 0f - sacf 256 - j 1f -0: sacf 0 -1: sar %a4,%r1 + sacf 0 ltr %r2,%r2 jz 2f st %r5,0(%r2) diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S index 63b86dceb0bf..aa8bf13a2edb 100644 --- a/arch/s390/kernel/vdso32/gettimeofday.S +++ b/arch/s390/kernel/vdso32/gettimeofday.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 32 bits processes in a * s390 kernel for use in the vDSO * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <asm/vdso.h> #include <asm/asm-offsets.h> diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S index c8513deb8c66..faf5213b15df 100644 --- a/arch/s390/kernel/vdso64/clock_getres.S +++ b/arch/s390/kernel/vdso64/clock_getres.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_getres() for 64 bits processes in a * s390 kernel for use in the vDSO * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <asm/vdso.h> #include <asm/asm-offsets.h> diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S index 9c3b12626dba..6046b3bfca46 100644 --- a/arch/s390/kernel/vdso64/clock_gettime.S +++ b/arch/s390/kernel/vdso64/clock_gettime.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of clock_gettime() for 64 bits processes in a * s390 kernel for use in the vDSO * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <asm/vdso.h> #include <asm/asm-offsets.h> @@ -114,23 +111,12 @@ __kernel_clock_gettime: br %r14 /* CPUCLOCK_VIRT for this thread */ -9: icm %r0,15,__VDSO_ECTG_OK(%r5) +9: lghi %r4,0 + icm %r0,15,__VDSO_ECTG_OK(%r5) jz 12f - ear %r2,%a4 - llilh %r4,0x0100 - sar %a4,%r4 - lghi %r4,0 - epsw %r5,0 - sacf 512 /* Magic ectg instruction */ + sacf 256 /* Magic ectg instruction */ .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4 - tml %r5,0x4000 - jo 11f - tml %r5,0x8000 - jno 10f - sacf 256 - j 11f -10: sacf 0 -11: sar %a4,%r2 + sacf 0 algr %r1,%r0 /* r1 = cputime as TOD value */ mghi %r1,1000 /* convert to nanoseconds */ srlg %r1,%r1,12 /* r1 = cputime in nanosec */ diff --git a/arch/s390/kernel/vdso64/getcpu.S b/arch/s390/kernel/vdso64/getcpu.S index 43983764b959..e9c34364d97b 100644 --- a/arch/s390/kernel/vdso64/getcpu.S +++ b/arch/s390/kernel/vdso64/getcpu.S @@ -15,22 +15,11 @@ .type __kernel_getcpu,@function __kernel_getcpu: .cfi_startproc - ear %r1,%a4 - llilh %r4,0x0100 - sar %a4,%r4 la %r4,0 - epsw %r0,0 - sacf 512 + sacf 256 l %r5,__VDSO_CPU_NR(%r4) l %r4,__VDSO_NODE_ID(%r4) - tml %r0,0x4000 - jo 1f - tml %r0,0x8000 - jno 0f - sacf 256 - j 1f -0: sacf 0 -1: sar %a4,%r1 + sacf 0 ltgr %r2,%r2 jz 2f st %r5,0(%r2) diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S index b02e62f3bc12..cc9dbc27da6f 100644 --- a/arch/s390/kernel/vdso64/gettimeofday.S +++ b/arch/s390/kernel/vdso64/gettimeofday.S @@ -1,13 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Userland implementation of gettimeofday() for 64 bits processes in a * s390 kernel for use in the vDSO * * Copyright IBM Corp. 2008 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License (version 2 only) - * as published by the Free Software Foundation. */ #include <asm/vdso.h> #include <asm/asm-offsets.h> diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index dd7178fbb4f3..f24395a01918 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Virtual cpu timer based timer functions. * diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 98ad8b9e0360..9614aea5839b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3372,7 +3372,6 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { int rc; - sigset_t sigsaved; if (kvm_run->immediate_exit) return -EINTR; @@ -3382,8 +3381,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) return 0; } - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) { kvm_s390_vcpu_start(vcpu); @@ -3417,8 +3415,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) disable_cpu_timer_accounting(vcpu); store_regs(vcpu, kvm_run); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); vcpu->stat.exit_userspace++; return rc; diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 84c0faeaf7ea..30a7c8c29964 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -78,7 +78,7 @@ static inline int arch_load_niai4(int *lock) ALTERNATIVE("", ".long 0xb2fa0040", 49) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); - return owner; + return owner; } static inline int arch_cmpxchg_niai8(int *lock, int old, int new) @@ -226,9 +226,10 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp) /* Try to get the lock if it is free. */ if (!owner) { new = (old & _Q_TAIL_MASK) | lockval; - if (arch_cmpxchg_niai8(&lp->lock, old, new)) + if (arch_cmpxchg_niai8(&lp->lock, old, new)) { /* Got the lock */ - return; + return; + } continue; } if (count-- >= 0) diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c index 802903c50de1..cae5a1e16cbd 100644 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@ -40,10 +40,67 @@ static inline int copy_with_mvcos(void) } #endif +void set_fs(mm_segment_t fs) +{ + current->thread.mm_segment = fs; + if (fs == USER_DS) { + __ctl_load(S390_lowcore.user_asce, 1, 1); + clear_cpu_flag(CIF_ASCE_PRIMARY); + } else { + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE_PRIMARY); + } + if (fs & 1) { + if (fs == USER_DS_SACF) + __ctl_load(S390_lowcore.user_asce, 7, 7); + else + __ctl_load(S390_lowcore.kernel_asce, 7, 7); + set_cpu_flag(CIF_ASCE_SECONDARY); + } +} +EXPORT_SYMBOL(set_fs); + +mm_segment_t enable_sacf_uaccess(void) +{ + mm_segment_t old_fs; + unsigned long asce, cr; + + old_fs = current->thread.mm_segment; + if (old_fs & 1) + return old_fs; + current->thread.mm_segment |= 1; + asce = S390_lowcore.kernel_asce; + if (likely(old_fs == USER_DS)) { + __ctl_store(cr, 1, 1); + if (cr != S390_lowcore.kernel_asce) { + __ctl_load(S390_lowcore.kernel_asce, 1, 1); + set_cpu_flag(CIF_ASCE_PRIMARY); + } + asce = S390_lowcore.user_asce; + } + __ctl_store(cr, 7, 7); + if (cr != asce) { + __ctl_load(asce, 7, 7); + set_cpu_flag(CIF_ASCE_SECONDARY); + } + return old_fs; +} +EXPORT_SYMBOL(enable_sacf_uaccess); + +void disable_sacf_uaccess(mm_segment_t old_fs) +{ + if (old_fs == USER_DS && test_facility(27)) { + __ctl_load(S390_lowcore.user_asce, 1, 1); + clear_cpu_flag(CIF_ASCE_PRIMARY); + } + current->thread.mm_segment = old_fs; +} +EXPORT_SYMBOL(disable_sacf_uaccess); + static inline unsigned long copy_from_user_mvcos(void *x, const void __user *ptr, unsigned long size) { - register unsigned long reg0 asm("0") = 0x81UL; + register unsigned long reg0 asm("0") = 0x01UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -74,8 +131,9 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, unsigned long size) { unsigned long tmp1, tmp2; + mm_segment_t old_fs; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -102,6 +160,7 @@ static inline unsigned long copy_from_user_mvcp(void *x, const void __user *ptr, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -116,7 +175,7 @@ EXPORT_SYMBOL(raw_copy_from_user); static inline unsigned long copy_to_user_mvcos(void __user *ptr, const void *x, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810000UL; + register unsigned long reg0 asm("0") = 0x010000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -147,8 +206,9 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, unsigned long size) { unsigned long tmp1, tmp2; + mm_segment_t old_fs; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); tmp1 = -256UL; asm volatile( " sacf 0\n" @@ -175,6 +235,7 @@ static inline unsigned long copy_to_user_mvcs(void __user *ptr, const void *x, EX_TABLE(7b,3b) EX_TABLE(8b,3b) EX_TABLE(9b,6b) : "+a" (size), "+a" (ptr), "+a" (x), "+a" (tmp1), "=a" (tmp2) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -189,7 +250,7 @@ EXPORT_SYMBOL(raw_copy_to_user); static inline unsigned long copy_in_user_mvcos(void __user *to, const void __user *from, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810081UL; + register unsigned long reg0 asm("0") = 0x010001UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -212,9 +273,10 @@ static inline unsigned long copy_in_user_mvcos(void __user *to, const void __use static inline unsigned long copy_in_user_mvc(void __user *to, const void __user *from, unsigned long size) { + mm_segment_t old_fs; unsigned long tmp1; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -238,6 +300,7 @@ static inline unsigned long copy_in_user_mvc(void __user *to, const void __user EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "+a" (from), "=a" (tmp1) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -251,7 +314,7 @@ EXPORT_SYMBOL(raw_copy_in_user); static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size) { - register unsigned long reg0 asm("0") = 0x810000UL; + register unsigned long reg0 asm("0") = 0x010000UL; unsigned long tmp1, tmp2; tmp1 = -4096UL; @@ -279,9 +342,10 @@ static inline unsigned long clear_user_mvcos(void __user *to, unsigned long size static inline unsigned long clear_user_xc(void __user *to, unsigned long size) { + mm_segment_t old_fs; unsigned long tmp1, tmp2; - load_kernel_asce(); + old_fs = enable_sacf_uaccess(); asm volatile( " sacf 256\n" " aghi %0,-1\n" @@ -310,6 +374,7 @@ static inline unsigned long clear_user_xc(void __user *to, unsigned long size) EX_TABLE(1b,6b) EX_TABLE(2b,0b) EX_TABLE(4b,0b) : "+a" (size), "+a" (to), "=a" (tmp1), "=a" (tmp2) : : "cc", "memory"); + disable_sacf_uaccess(old_fs); return size; } @@ -345,10 +410,15 @@ static inline unsigned long strnlen_user_srst(const char __user *src, unsigned long __strnlen_user(const char __user *src, unsigned long size) { + mm_segment_t old_fs; + unsigned long len; + if (unlikely(!size)) return 0; - load_kernel_asce(); - return strnlen_user_srst(src, size); + old_fs = enable_sacf_uaccess(); + len = strnlen_user_srst(src, size); + disable_sacf_uaccess(old_fs); + return len; } EXPORT_SYMBOL(__strnlen_user); diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 2dbdcd85b68f..6cf024eb2085 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Collaborative memory management interface. * @@ -56,10 +57,10 @@ static DEFINE_SPINLOCK(cmm_lock); static struct task_struct *cmm_thread_ptr; static DECLARE_WAIT_QUEUE_HEAD(cmm_thread_wait); -static DEFINE_TIMER(cmm_timer, NULL); -static void cmm_timer_fn(unsigned long); +static void cmm_timer_fn(struct timer_list *); static void cmm_set_timer(void); +static DEFINE_TIMER(cmm_timer, cmm_timer_fn); static long cmm_alloc_pages(long nr, long *counter, struct cmm_page_array **list) @@ -194,13 +195,11 @@ static void cmm_set_timer(void) if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) return; } - cmm_timer.function = cmm_timer_fn; - cmm_timer.data = 0; cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; add_timer(&cmm_timer); } -static void cmm_timer_fn(unsigned long ignored) +static void cmm_timer_fn(struct timer_list *unused) { long nr; diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 242b78c0a9ec..93faeca52284 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -50,6 +50,13 @@ #define VM_FAULT_SIGNAL 0x080000 #define VM_FAULT_PFAULT 0x100000 +enum fault_type { + KERNEL_FAULT, + USER_FAULT, + VDSO_FAULT, + GMAP_FAULT, +}; + static unsigned long store_indication __read_mostly; static int __init fault_init(void) @@ -99,27 +106,34 @@ void bust_spinlocks(int yes) } /* - * Returns the address space associated with the fault. - * Returns 0 for kernel space and 1 for user space. + * Find out which address space caused the exception. + * Access register mode is impossible, ignore space == 3. */ -static inline int user_space_fault(struct pt_regs *regs) +static inline enum fault_type get_fault_type(struct pt_regs *regs) { unsigned long trans_exc_code; - /* - * The lowest two bits of the translation exception - * identification indicate which paging table was used. - */ trans_exc_code = regs->int_parm_long & 3; - if (trans_exc_code == 3) /* home space -> kernel */ - return 0; - if (user_mode(regs)) - return 1; - if (trans_exc_code == 2) /* secondary space -> set_fs */ - return current->thread.mm_segment.ar4; - if (current->flags & PF_VCPU) - return 1; - return 0; + if (likely(trans_exc_code == 0)) { + /* primary space exception */ + if (IS_ENABLED(CONFIG_PGSTE) && + test_pt_regs_flag(regs, PIF_GUEST_FAULT)) + return GMAP_FAULT; + if (current->thread.mm_segment == USER_DS) + return USER_FAULT; + return KERNEL_FAULT; + } + if (trans_exc_code == 2) { + /* secondary space exception */ + if (current->thread.mm_segment & 1) { + if (current->thread.mm_segment == USER_DS_SACF) + return USER_FAULT; + return KERNEL_FAULT; + } + return VDSO_FAULT; + } + /* home space exception -> access via kernel ASCE */ + return KERNEL_FAULT; } static int bad_address(void *p) @@ -204,20 +218,23 @@ static void dump_fault_info(struct pt_regs *regs) break; } pr_cont("mode while using "); - if (!user_space_fault(regs)) { - asce = S390_lowcore.kernel_asce; - pr_cont("kernel "); - } -#ifdef CONFIG_PGSTE - else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) { - struct gmap *gmap = (struct gmap *)S390_lowcore.gmap; - asce = gmap->asce; - pr_cont("gmap "); - } -#endif - else { + switch (get_fault_type(regs)) { + case USER_FAULT: asce = S390_lowcore.user_asce; pr_cont("user "); + break; + case VDSO_FAULT: + asce = S390_lowcore.vdso_asce; + pr_cont("vdso "); + break; + case GMAP_FAULT: + asce = ((struct gmap *) S390_lowcore.gmap)->asce; + pr_cont("gmap "); + break; + case KERNEL_FAULT: + asce = S390_lowcore.kernel_asce; + pr_cont("kernel "); + break; } pr_cont("ASCE.\n"); dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); @@ -273,7 +290,7 @@ static noinline void do_no_context(struct pt_regs *regs) * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - if (!user_space_fault(regs)) + if (get_fault_type(regs) == KERNEL_FAULT) printk(KERN_ALERT "Unable to handle kernel pointer dereference" " in virtual kernel address space\n"); else @@ -395,12 +412,11 @@ static noinline void do_fault_error(struct pt_regs *regs, int access, int fault) */ static inline int do_exception(struct pt_regs *regs, int access) { -#ifdef CONFIG_PGSTE struct gmap *gmap; -#endif struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; + enum fault_type type; unsigned long trans_exc_code; unsigned long address; unsigned int flags; @@ -425,8 +441,19 @@ static inline int do_exception(struct pt_regs *regs, int access) * user context. */ fault = VM_FAULT_BADCONTEXT; - if (unlikely(!user_space_fault(regs) || faulthandler_disabled() || !mm)) + type = get_fault_type(regs); + switch (type) { + case KERNEL_FAULT: + goto out; + case VDSO_FAULT: + fault = VM_FAULT_BADMAP; goto out; + case USER_FAULT: + case GMAP_FAULT: + if (faulthandler_disabled() || !mm) + goto out; + break; + } address = trans_exc_code & __FAIL_ADDR_MASK; perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); @@ -437,10 +464,9 @@ static inline int do_exception(struct pt_regs *regs, int access) flags |= FAULT_FLAG_WRITE; down_read(&mm->mmap_sem); -#ifdef CONFIG_PGSTE - gmap = (current->flags & PF_VCPU) ? - (struct gmap *) S390_lowcore.gmap : NULL; - if (gmap) { + gmap = NULL; + if (IS_ENABLED(CONFIG_PGSTE) && type == GMAP_FAULT) { + gmap = (struct gmap *) S390_lowcore.gmap; current->thread.gmap_addr = address; current->thread.gmap_write_flag = !!(flags & FAULT_FLAG_WRITE); current->thread.gmap_int_code = regs->int_code & 0xffff; @@ -452,7 +478,6 @@ static inline int do_exception(struct pt_regs *regs, int access) if (gmap->pfault_enabled) flags |= FAULT_FLAG_RETRY_NOWAIT; } -#endif retry: fault = VM_FAULT_BADMAP; @@ -507,15 +532,14 @@ retry: regs, address); } if (fault & VM_FAULT_RETRY) { -#ifdef CONFIG_PGSTE - if (gmap && (flags & FAULT_FLAG_RETRY_NOWAIT)) { + if (IS_ENABLED(CONFIG_PGSTE) && gmap && + (flags & FAULT_FLAG_RETRY_NOWAIT)) { /* FAULT_FLAG_RETRY_NOWAIT has been set, * mmap_sem has not been released */ current->thread.gmap_pfault = 1; fault = VM_FAULT_PFAULT; goto out_up; } -#endif /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk * of starvation. */ flags &= ~(FAULT_FLAG_ALLOW_RETRY | @@ -525,8 +549,7 @@ retry: goto retry; } } -#ifdef CONFIG_PGSTE - if (gmap) { + if (IS_ENABLED(CONFIG_PGSTE) && gmap) { address = __gmap_link(gmap, current->thread.gmap_addr, address); if (address == -EFAULT) { @@ -538,7 +561,6 @@ retry: goto out_up; } } -#endif fault = 0; out_up: up_read(&mm->mmap_sem); @@ -706,7 +728,7 @@ static void pfault_interrupt(struct ext_code ext_code, return; inc_irq_stat(IRQEXT_PFL); /* Get the token (= pid of the affected task). */ - pid = param64 & LPP_PFAULT_PID_MASK; + pid = param64 & LPP_PID_MASK; rcu_read_lock(); tsk = find_task_by_pid_ns(pid, &init_pid_ns); if (tsk) diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 2f66290c9b92..05d459b638f5 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * KVM guest address space mapping code * @@ -1187,12 +1188,11 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, unsigned long *sgt) { - unsigned long asce, *pgt; + unsigned long *pgt; struct page *page; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) sgt | _ASCE_TYPE_SEGMENT; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) continue; @@ -1245,12 +1245,11 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, unsigned long *r3t) { - unsigned long asce, *sgt; + unsigned long *sgt; struct page *page; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) r3t | _ASCE_TYPE_REGION3; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) continue; @@ -1303,12 +1302,11 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, unsigned long *r2t) { - unsigned long asce, *r3t; + unsigned long *r3t; struct page *page; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) r2t | _ASCE_TYPE_REGION2; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) continue; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 817c9e16e83e..671535e64aba 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -95,6 +95,7 @@ void __init paging_init(void) } init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits; S390_lowcore.kernel_asce = init_mm.context.asce; + S390_lowcore.user_asce = S390_lowcore.kernel_asce; crst_table_init((unsigned long *) init_mm.pgd, pgd_type); vmem_map_init(); diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5bea139517a2..831bdcf407bb 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -1,24 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * flexible mmap layout support * * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. * All Rights Reserved. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * * Started by Ingo Molnar <mingo@elte.hu> */ diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 4ad4c4f77b4d..434a9564917b 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -71,10 +71,8 @@ static void __crst_table_upgrade(void *arg) { struct mm_struct *mm = arg; - if (current->active_mm == mm) { - clear_user_asce(); + if (current->active_mm == mm) set_user_asce(mm); - } __tlb_flush_local(); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae677f814bc0..4f2b65d01a70 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2007, 2011 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 0fe649c0d542..4902fed221c0 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index c2f786f0ea06..b482e95b6249 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012,2015 * diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 0d300ee00f4e..f7aa5a77827e 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright IBM Corp. 2012 * diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index 81b840bc6e4e..19bcb3b45a70 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * s390 specific pci instructions * diff --git a/arch/s390/tools/Makefile b/arch/s390/tools/Makefile index 2ebf2872cc16..2e70e25de07a 100644 --- a/arch/s390/tools/Makefile +++ b/arch/s390/tools/Makefile @@ -21,4 +21,4 @@ include/generated/facilities.h: $(obj)/gen_facilities FORCE $(call filechk,facilities.h) include/generated/dis.h: $(obj)/gen_opcode_table FORCE - $(call filechk,dis.h,__FUN) + $(call filechk,dis.h) diff --git a/arch/sh/Makefile b/arch/sh/Makefile index 280bbff12102..65300193b99f 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -15,6 +15,12 @@ ifneq ($(SUBARCH),$(ARCH)) endif endif +ifeq ($(ARCH),sh) +KBUILD_DEFCONFIG := shx3_defconfig +else +KBUILD_DEFCONFIG := cayman_defconfig +endif + isa-y := any isa-$(CONFIG_SH_DSP) := sh isa-$(CONFIG_CPU_SH2) := sh2 @@ -105,14 +111,12 @@ ifdef CONFIG_SUPERH32 UTS_MACHINE := sh BITS := 32 LDFLAGS_vmlinux += -e _stext -KBUILD_DEFCONFIG := shx3_defconfig else UTS_MACHINE := sh64 BITS := 64 LDFLAGS_vmlinux += --defsym phys_stext=_stext-$(CONFIG_PAGE_OFFSET) \ --defsym phys_stext_shmedia=phys_stext+1 \ -e phys_stext_shmedia -KBUILD_DEFCONFIG := cayman_defconfig endif ifdef CONFIG_CPU_LITTLE_ENDIAN diff --git a/arch/sh/boot/compressed/.gitignore b/arch/sh/boot/compressed/.gitignore index 2374a83d87b2..edff113f1b85 100644 --- a/arch/sh/boot/compressed/.gitignore +++ b/arch/sh/boot/compressed/.gitignore @@ -1 +1,6 @@ +ashiftrt.S +ashldi3.c +ashlsi3.S +ashrsi3.S +lshrsi3.S vmlinux.bin.* diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c index f2d9d3079d4e..627ce8e75e01 100644 --- a/arch/sh/boot/compressed/misc.c +++ b/arch/sh/boot/compressed/misc.c @@ -104,6 +104,18 @@ static void error(char *x) while(1); /* Halt */ } +unsigned long __stack_chk_guard; + +void __stack_chk_guard_setup(void) +{ + __stack_chk_guard = 0x000a0dff; +} + +void __stack_chk_fail(void) +{ + error("stack-protector: Kernel stack is corrupted\n"); +} + #ifdef CONFIG_SUPERH64 #define stackalign 8 #else @@ -118,6 +130,8 @@ void decompress_kernel(void) { unsigned long output_addr; + __stack_chk_guard_setup(); + #ifdef CONFIG_SUPERH64 output_addr = (CONFIG_MEMORY_START + 0x2000); #else diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c index c6d96049a0bb..e8af2ff29bc3 100644 --- a/arch/sh/drivers/heartbeat.c +++ b/arch/sh/drivers/heartbeat.c @@ -59,9 +59,9 @@ static inline void heartbeat_toggle_bit(struct heartbeat_data *hd, } } -static void heartbeat_timer(unsigned long data) +static void heartbeat_timer(struct timer_list *t) { - struct heartbeat_data *hd = (struct heartbeat_data *)data; + struct heartbeat_data *hd = from_timer(hd, t, timer); static unsigned bit = 0, up = 1; heartbeat_toggle_bit(hd, bit, hd->flags & HEARTBEAT_INVERTED); @@ -133,7 +133,7 @@ static int heartbeat_drv_probe(struct platform_device *pdev) } } - setup_timer(&hd->timer, heartbeat_timer, (unsigned long)hd); + timer_setup(&hd->timer, heartbeat_timer, 0); platform_set_drvdata(pdev, hd); return mod_timer(&hd->timer, jiffies + 1); diff --git a/arch/sh/drivers/pci/common.c b/arch/sh/drivers/pci/common.c index cae707f3472d..fe163ecd0719 100644 --- a/arch/sh/drivers/pci/common.c +++ b/arch/sh/drivers/pci/common.c @@ -85,18 +85,18 @@ int __init pci_is_66mhz_capable(struct pci_channel *hose, return cap66 > 0; } -static void pcibios_enable_err(unsigned long __data) +static void pcibios_enable_err(struct timer_list *t) { - struct pci_channel *hose = (struct pci_channel *)__data; + struct pci_channel *hose = from_timer(hose, t, err_timer); del_timer(&hose->err_timer); printk(KERN_DEBUG "PCI: re-enabling error IRQ.\n"); enable_irq(hose->err_irq); } -static void pcibios_enable_serr(unsigned long __data) +static void pcibios_enable_serr(struct timer_list *t) { - struct pci_channel *hose = (struct pci_channel *)__data; + struct pci_channel *hose = from_timer(hose, t, serr_timer); del_timer(&hose->serr_timer); printk(KERN_DEBUG "PCI: re-enabling system error IRQ.\n"); @@ -106,15 +106,11 @@ static void pcibios_enable_serr(unsigned long __data) void pcibios_enable_timers(struct pci_channel *hose) { if (hose->err_irq) { - init_timer(&hose->err_timer); - hose->err_timer.data = (unsigned long)hose; - hose->err_timer.function = pcibios_enable_err; + timer_setup(&hose->err_timer, pcibios_enable_err, 0); } if (hose->serr_irq) { - init_timer(&hose->serr_timer); - hose->serr_timer.data = (unsigned long)hose; - hose->serr_timer.function = pcibios_enable_serr; + timer_setup(&hose->serr_timer, pcibios_enable_serr, 0); } } diff --git a/arch/sh/drivers/push-switch.c b/arch/sh/drivers/push-switch.c index 5bfb341cc5c4..a17181160233 100644 --- a/arch/sh/drivers/push-switch.c +++ b/arch/sh/drivers/push-switch.c @@ -26,9 +26,9 @@ static ssize_t switch_show(struct device *dev, } static DEVICE_ATTR(switch, S_IRUGO, switch_show, NULL); -static void switch_timer(unsigned long data) +static void switch_timer(struct timer_list *t) { - struct push_switch *psw = (struct push_switch *)data; + struct push_switch *psw = from_timer(psw, t, debounce); schedule_work(&psw->work); } @@ -78,10 +78,7 @@ static int switch_drv_probe(struct platform_device *pdev) } INIT_WORK(&psw->work, switch_work_handler); - init_timer(&psw->debounce); - - psw->debounce.function = switch_timer; - psw->debounce.data = (unsigned long)psw; + timer_setup(&psw->debounce, switch_timer, 0); /* Workqueue API brain-damage */ psw->pdev = pdev; diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h index 9a32eb4098df..1db470e02456 100644 --- a/arch/sh/include/asm/topology.h +++ b/arch/sh/include/asm/topology.h @@ -5,7 +5,6 @@ #ifdef CONFIG_NUMA #define cpu_to_node(cpu) ((void)(cpu),0) -#define parent_node(node) ((void)(node),0) #define cpumask_of_node(node) ((void)node, cpu_online_mask) diff --git a/arch/sparc/Kbuild b/arch/sparc/Kbuild index 675afa285ddb..b4d0f570cc00 100644 --- a/arch/sparc/Kbuild +++ b/arch/sparc/Kbuild @@ -7,3 +7,4 @@ obj-y += mm/ obj-y += math-emu/ obj-y += net/ obj-y += crypto/ +obj-$(CONFIG_SPARC64) += vdso/ diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 4e83f950713e..6bf594ace663 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -84,6 +84,8 @@ config SPARC64 select HAVE_REGS_AND_STACK_ACCESS_API select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select GENERIC_TIME_VSYSCALL + select ARCH_CLOCKSOURCE_DATA config ARCH_DEFCONFIG string @@ -96,9 +98,6 @@ config ARCH_PROC_KCORE_TEXT config CPU_BIG_ENDIAN def_bool y -config CPU_BIG_ENDIAN - def_bool y - config ARCH_ATU bool default y if SPARC64 diff --git a/arch/sparc/Makefile b/arch/sparc/Makefile index dbc448923f48..edac927e4952 100644 --- a/arch/sparc/Makefile +++ b/arch/sparc/Makefile @@ -81,6 +81,10 @@ install: archclean: $(Q)$(MAKE) $(clean)=$(boot) +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/sparc/vdso $@ + # This is the image used for packaging KBUILD_IMAGE := $(boot)/zImage diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h index a90eea24b286..ca7ea5913494 100644 --- a/arch/sparc/include/asm/bitops_64.h +++ b/arch/sparc/include/asm/bitops_64.h @@ -23,10 +23,11 @@ void set_bit(unsigned long nr, volatile unsigned long *addr); void clear_bit(unsigned long nr, volatile unsigned long *addr); void change_bit(unsigned long nr, volatile unsigned long *addr); +int fls(unsigned int word); +int __fls(unsigned long word); + #include <asm-generic/bitops/non-atomic.h> -#include <asm-generic/bitops/fls.h> -#include <asm-generic/bitops/__fls.h> #include <asm-generic/bitops/fls64.h> #ifdef __KERNEL__ diff --git a/arch/sparc/include/asm/clocksource.h b/arch/sparc/include/asm/clocksource.h new file mode 100644 index 000000000000..d63ef224befe --- /dev/null +++ b/arch/sparc/include/asm/clocksource.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ASM_SPARC_CLOCKSOURCE_H +#define _ASM_SPARC_CLOCKSOURCE_H + +/* VDSO clocksources */ +#define VCLOCK_NONE 0 /* Nothing userspace can do. */ +#define VCLOCK_TICK 1 /* Use %tick. */ +#define VCLOCK_STICK 2 /* Use %stick. */ + +struct arch_clocksource_data { + int vclock_mode; +}; + +#endif /* _ASM_SPARC_CLOCKSOURCE_H */ diff --git a/arch/sparc/include/asm/cmpxchg_32.h b/arch/sparc/include/asm/cmpxchg_32.h index 3e3823db303e..c73b5a3ab7b9 100644 --- a/arch/sparc/include/asm/cmpxchg_32.h +++ b/arch/sparc/include/asm/cmpxchg_32.h @@ -63,6 +63,9 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size) (unsigned long)_n_, sizeof(*(ptr))); \ }) +u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new); +#define cmpxchg64(ptr, old, new) __cmpxchg_u64(ptr, old, new) + #include <asm-generic/cmpxchg-local.h> /* diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 977c3f280ba1..fa38c78de0f0 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -209,7 +209,6 @@ typedef struct compat_siginfo { } compat_siginfo_t; #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL /* * A pointer passed in from user mode. This should not diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index 5894389f5ed5..25340df3570c 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -211,4 +211,18 @@ do { if ((ex).e_ident[EI_CLASS] == ELFCLASS32) \ (current->personality & (~PER_MASK))); \ } while (0) +extern unsigned int vdso_enabled; + +#define ARCH_DLINFO \ +do { \ + if (vdso_enabled) \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, \ + (unsigned long)current->mm->context.vdso); \ +} while (0) + +struct linux_binprm; + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int uses_interp); #endif /* !(__ASM_SPARC64_ELF_H) */ diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h index 5fe64a57b4ba..ad4fb93508ba 100644 --- a/arch/sparc/include/asm/mmu_64.h +++ b/arch/sparc/include/asm/mmu_64.h @@ -97,6 +97,7 @@ typedef struct { unsigned long thp_pte_count; struct tsb_config tsb_block[MM_NUM_TSBS]; struct hv_tsb_descr tsb_descr[MM_NUM_TSBS]; + void *vdso; } mm_context_t; #endif /* !__ASSEMBLY__ */ diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index e25d25b0a34b..b361702ef52a 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -8,9 +8,11 @@ #include <linux/spinlock.h> #include <linux/mm_types.h> +#include <linux/smp.h> #include <asm/spitfire.h> #include <asm-generic/mm_hooks.h> +#include <asm/percpu.h> static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 5a9e96be1665..9937c5ff94a9 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -715,7 +715,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd) return pte_pfn(pte); } -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline unsigned long pmd_write(pmd_t pmd) { pte_t pte = __pte(pmd_val(pmd)); diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index c7c79fe8d265..aac23d4a4ddd 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -200,6 +200,13 @@ unsigned long get_wchan(struct task_struct *task); * To make a long story short, we are trying to yield the current cpu * strand during busy loops. */ +#ifdef BUILD_VDSO +#define cpu_relax() asm volatile("\n99:\n\t" \ + "rd %%ccr, %%g0\n\t" \ + "rd %%ccr, %%g0\n\t" \ + "rd %%ccr, %%g0\n\t" \ + ::: "memory") +#else /* ! BUILD_VDSO */ #define cpu_relax() asm volatile("\n99:\n\t" \ "rd %%ccr, %%g0\n\t" \ "rd %%ccr, %%g0\n\t" \ @@ -211,6 +218,7 @@ unsigned long get_wchan(struct task_struct *task); "nop\n\t" \ ".previous" \ ::: "memory") +#endif /* Prefetch support. This is tuned for UltraSPARC-III and later. * UltraSPARC-I will treat these as nops, and UltraSPARC-II has diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 3831b1911a19..34c628a22ea5 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -11,8 +11,6 @@ static inline int cpu_to_node(int cpu) return numa_cpu_lookup_table[cpu]; } -#define parent_node(node) (node) - #define cpumask_of_node(node) ((node) == -1 ? \ cpu_all_mask : \ &numa_cpumask_lookup_table[node]) diff --git a/arch/sparc/include/asm/tsb.h b/arch/sparc/include/asm/tsb.h index 25b6abdb3908..522a677e050d 100644 --- a/arch/sparc/include/asm/tsb.h +++ b/arch/sparc/include/asm/tsb.h @@ -217,7 +217,7 @@ extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; sllx REG2, 32, REG2; \ andcc REG1, REG2, %g0; \ be,pt %xcc, 700f; \ - sethi %hi(0x1ffc0000), REG2; \ + sethi %hi(0xffe00000), REG2; \ sllx REG2, 1, REG2; \ brgez,pn REG1, FAIL_LABEL; \ andn REG1, REG2, REG1; \ diff --git a/arch/sparc/include/asm/vdso.h b/arch/sparc/include/asm/vdso.h new file mode 100644 index 000000000000..93b628731a5e --- /dev/null +++ b/arch/sparc/include/asm/vdso.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ASM_SPARC_VDSO_H +#define _ASM_SPARC_VDSO_H + +struct vdso_image { + void *data; + unsigned long size; /* Always a multiple of PAGE_SIZE */ + long sym_vvar_start; /* Negative offset to the vvar area */ + long sym_vread_tick; /* Start of vread_tick section */ + long sym_vread_tick_patch_start; /* Start of tick read */ + long sym_vread_tick_patch_end; /* End of tick read */ +}; + +#ifdef CONFIG_SPARC64 +extern const struct vdso_image vdso_image_64_builtin; +#endif +#ifdef CONFIG_COMPAT +extern const struct vdso_image vdso_image_32_builtin; +#endif + +#endif /* _ASM_SPARC_VDSO_H */ diff --git a/arch/sparc/include/asm/vvar.h b/arch/sparc/include/asm/vvar.h new file mode 100644 index 000000000000..0289503d1cb0 --- /dev/null +++ b/arch/sparc/include/asm/vvar.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#ifndef _ASM_SPARC_VVAR_DATA_H +#define _ASM_SPARC_VVAR_DATA_H + +#include <asm/clocksource.h> +#include <linux/seqlock.h> +#include <linux/time.h> +#include <linux/types.h> + +struct vvar_data { + unsigned int seq; + + int vclock_mode; + struct { /* extract of a clocksource struct */ + u64 cycle_last; + u64 mask; + int mult; + int shift; + } clock; + /* open coded 'struct timespec' */ + u64 wall_time_sec; + u64 wall_time_snsec; + u64 monotonic_time_snsec; + u64 monotonic_time_sec; + u64 monotonic_time_coarse_sec; + u64 monotonic_time_coarse_nsec; + u64 wall_time_coarse_sec; + u64 wall_time_coarse_nsec; + + int tz_minuteswest; + int tz_dsttime; +}; + +extern struct vvar_data *vvar_data; +extern int vdso_fix_stick; + +static inline unsigned int vvar_read_begin(const struct vvar_data *s) +{ + unsigned int ret; + +repeat: + ret = READ_ONCE(s->seq); + if (unlikely(ret & 1)) { + cpu_relax(); + goto repeat; + } + smp_rmb(); /* Finish all reads before we return seq */ + return ret; +} + +static inline int vvar_read_retry(const struct vvar_data *s, + unsigned int start) +{ + smp_rmb(); /* Finish all reads before checking the value of seq */ + return unlikely(s->seq != start); +} + +static inline void vvar_write_begin(struct vvar_data *s) +{ + ++s->seq; + smp_wmb(); /* Makes sure that increment of seq is reflected */ +} + +static inline void vvar_write_end(struct vvar_data *s) +{ + smp_wmb(); /* Makes the value of seq current before we increment */ + ++s->seq; +} + + +#endif /* _ASM_SPARC_VVAR_DATA_H */ diff --git a/arch/sparc/include/uapi/asm/auxvec.h b/arch/sparc/include/uapi/asm/auxvec.h index ad6f360261f6..5f80a70cc901 100644 --- a/arch/sparc/include/uapi/asm/auxvec.h +++ b/arch/sparc/include/uapi/asm/auxvec.h @@ -1,4 +1,8 @@ #ifndef __ASMSPARC_AUXVEC_H #define __ASMSPARC_AUXVEC_H +#define AT_SYSINFO_EHDR 33 + +#define AT_VECTOR_SIZE_ARCH 1 + #endif /* !(__ASMSPARC_AUXVEC_H) */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 8de9617589a5..cc97545737f0 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_SPARC32) += systbls_32.o obj-y += time_$(BITS).o obj-$(CONFIG_SPARC32) += windows.o obj-y += cpu.o +obj-$(CONFIG_SPARC64) += vdso.o obj-$(CONFIG_SPARC32) += devices.o obj-y += ptrace_$(BITS).o obj-y += unaligned_$(BITS).o diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 9e293de12052..a41e6e16eb36 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -641,6 +641,8 @@ niagara4_patch: nop call niagara4_patch_pageops nop + call niagara4_patch_fls + nop ba,a,pt %xcc, 80f nop diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 1ef6156b1530..418592a09b41 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -13,6 +13,7 @@ #include <linux/miscdevice.h> #include <linux/bootmem.h> #include <linux/export.h> +#include <linux/refcount.h> #include <asm/cpudata.h> #include <asm/hypervisor.h> @@ -71,7 +72,7 @@ struct mdesc_handle { struct list_head list; struct mdesc_mem_ops *mops; void *self_base; - atomic_t refcnt; + refcount_t refcnt; unsigned int handle_size; struct mdesc_hdr mdesc; }; @@ -153,7 +154,7 @@ static void mdesc_handle_init(struct mdesc_handle *hp, memset(hp, 0, handle_size); INIT_LIST_HEAD(&hp->list); hp->self_base = base; - atomic_set(&hp->refcnt, 1); + refcount_set(&hp->refcnt, 1); hp->handle_size = handle_size; } @@ -183,7 +184,7 @@ static void __init mdesc_memblock_free(struct mdesc_handle *hp) unsigned int alloc_size; unsigned long start; - BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(refcount_read(&hp->refcnt) != 0); BUG_ON(!list_empty(&hp->list)); alloc_size = PAGE_ALIGN(hp->handle_size); @@ -221,7 +222,7 @@ static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size) static void mdesc_kfree(struct mdesc_handle *hp) { - BUG_ON(atomic_read(&hp->refcnt) != 0); + BUG_ON(refcount_read(&hp->refcnt) != 0); BUG_ON(!list_empty(&hp->list)); kfree(hp->self_base); @@ -260,7 +261,7 @@ struct mdesc_handle *mdesc_grab(void) spin_lock_irqsave(&mdesc_lock, flags); hp = cur_mdesc; if (hp) - atomic_inc(&hp->refcnt); + refcount_inc(&hp->refcnt); spin_unlock_irqrestore(&mdesc_lock, flags); return hp; @@ -272,7 +273,7 @@ void mdesc_release(struct mdesc_handle *hp) unsigned long flags; spin_lock_irqsave(&mdesc_lock, flags); - if (atomic_dec_and_test(&hp->refcnt)) { + if (refcount_dec_and_test(&hp->refcnt)) { list_del_init(&hp->list); hp->mops->free(hp); } @@ -514,7 +515,7 @@ void mdesc_update(void) if (status != HV_EOK || real_len > len) { printk(KERN_ERR "MD: mdesc reread fails with %lu\n", status); - atomic_dec(&hp->refcnt); + refcount_dec(&hp->refcnt); mdesc_free(hp); goto out; } @@ -527,7 +528,7 @@ void mdesc_update(void) mdesc_notify_clients(orig_hp, hp); spin_lock_irqsave(&mdesc_lock, flags); - if (atomic_dec_and_test(&orig_hp->refcnt)) + if (refcount_dec_and_test(&orig_hp->refcnt)) mdesc_free(orig_hp); else list_add(&orig_hp->list, &mdesc_zombie_list); diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c index 5c572de64c74..54a6159b9cd8 100644 --- a/arch/sparc/kernel/signal32.c +++ b/arch/sparc/kernel/signal32.c @@ -249,7 +249,6 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) compat_uptr_t fpu_save; compat_uptr_t rwin_save; sigset_t set; - compat_sigset_t seta; int err, i; /* Always make any pending restarted system calls return -EINTR */ @@ -312,7 +311,7 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) err |= __get_user(fpu_save, &sf->fpu_save); if (!err && fpu_save) err |= restore_fpu_state(regs, compat_ptr(fpu_save)); - err |= copy_from_user(&seta, &sf->mask, sizeof(compat_sigset_t)); + err |= get_compat_sigset(&set, &sf->mask); err |= compat_restore_altstack(&sf->stack); if (err) goto segv; @@ -323,7 +322,6 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs) goto segv; } - set.sig[0] = seta.sig[0] + (((long)seta.sig[1]) << 32); set_current_blocked(&set); return; segv: @@ -555,7 +553,6 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs, void __user *tail; int sigframe_size; u32 psr; - compat_sigset_t seta; /* 1. Make sure everything is clean */ synchronize_user_stack(); @@ -625,9 +622,7 @@ static int setup_rt_frame32(struct ksignal *ksig, struct pt_regs *regs, /* Setup sigaltstack */ err |= __compat_save_altstack(&sf->stack, regs->u_regs[UREG_FP]); - seta.sig[1] = (oldset->sig[0] >> 32); - seta.sig[0] = oldset->sig[0]; - err |= __copy_to_user(&sf->mask, &seta, sizeof(compat_sigset_t)); + err |= put_compat_sigset(&sf->mask, oldset, sizeof(compat_sigset_t)); if (!wsaved) { err |= copy_in_user((u32 __user *)sf, diff --git a/arch/sparc/kernel/sys_sparc32.c b/arch/sparc/kernel/sys_sparc32.c index b4e1478413a1..6d964bdefbaa 100644 --- a/arch/sparc/kernel/sys_sparc32.c +++ b/arch/sparc/kernel/sys_sparc32.c @@ -160,7 +160,6 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, { struct k_sigaction new_ka, old_ka; int ret; - compat_sigset_t set32; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(compat_sigset_t)) @@ -172,8 +171,7 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, new_ka.ka_restorer = restorer; ret = get_user(u_handler, &act->sa_handler); new_ka.sa.sa_handler = compat_ptr(u_handler); - ret |= copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t)); - sigset_from_compat(&new_ka.sa.sa_mask, &set32); + ret |= get_compat_sigset(&new_ka.sa.sa_mask, &act->sa_mask); ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags); ret |= get_user(u_restorer, &act->sa_restorer); new_ka.sa.sa_restorer = compat_ptr(u_restorer); @@ -184,9 +182,9 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig, ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); if (!ret && oact) { - sigset_to_compat(&set32, &old_ka.sa.sa_mask); ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); - ret |= copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t)); + ret |= put_compat_sigset(&oact->sa_mask, &old_ka.sa.sa_mask, + sizeof(oact->sa_mask)); ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags); ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer); if (ret) diff --git a/arch/sparc/kernel/time_64.c b/arch/sparc/kernel/time_64.c index 3b397081047a..2ef8cfa9677e 100644 --- a/arch/sparc/kernel/time_64.c +++ b/arch/sparc/kernel/time_64.c @@ -28,7 +28,6 @@ #include <linux/jiffies.h> #include <linux/cpufreq.h> #include <linux/percpu.h> -#include <linux/miscdevice.h> #include <linux/rtc/m48t59.h> #include <linux/kernel_stat.h> #include <linux/clockchips.h> @@ -54,6 +53,8 @@ DEFINE_SPINLOCK(rtc_lock); +unsigned int __read_mostly vdso_fix_stick; + #ifdef CONFIG_SMP unsigned long profile_pc(struct pt_regs *regs) { @@ -831,12 +832,17 @@ static void init_tick_ops(struct sparc64_tick_ops *ops) void __init time_init_early(void) { if (tlb_type == spitfire) { - if (is_hummingbird()) + if (is_hummingbird()) { init_tick_ops(&hbtick_operations); - else + clocksource_tick.archdata.vclock_mode = VCLOCK_NONE; + } else { init_tick_ops(&tick_operations); + clocksource_tick.archdata.vclock_mode = VCLOCK_TICK; + vdso_fix_stick = 1; + } } else { init_tick_ops(&stick_operations); + clocksource_tick.archdata.vclock_mode = VCLOCK_STICK; } } diff --git a/arch/sparc/kernel/vdso.c b/arch/sparc/kernel/vdso.c new file mode 100644 index 000000000000..58880662b271 --- /dev/null +++ b/arch/sparc/kernel/vdso.c @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE + * Copyright 2003 Andi Kleen, SuSE Labs. + * + * Thanks to hpa@transmeta.com for some useful hint. + * Special thanks to Ingo Molnar for his early experience with + * a different vsyscall implementation for Linux/IA32 and for the name. + */ + +#include <linux/seqlock.h> +#include <linux/time.h> +#include <linux/timekeeper_internal.h> + +#include <asm/vvar.h> + +void update_vsyscall_tz(void) +{ + if (unlikely(vvar_data == NULL)) + return; + + vvar_data->tz_minuteswest = sys_tz.tz_minuteswest; + vvar_data->tz_dsttime = sys_tz.tz_dsttime; +} + +void update_vsyscall(struct timekeeper *tk) +{ + struct vvar_data *vdata = vvar_data; + + if (unlikely(vdata == NULL)) + return; + + vvar_write_begin(vdata); + vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode; + vdata->clock.cycle_last = tk->tkr_mono.cycle_last; + vdata->clock.mask = tk->tkr_mono.mask; + vdata->clock.mult = tk->tkr_mono.mult; + vdata->clock.shift = tk->tkr_mono.shift; + + vdata->wall_time_sec = tk->xtime_sec; + vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec; + + vdata->monotonic_time_sec = tk->xtime_sec + + tk->wall_to_monotonic.tv_sec; + vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec + + (tk->wall_to_monotonic.tv_nsec << + tk->tkr_mono.shift); + + while (vdata->monotonic_time_snsec >= + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + vdata->monotonic_time_snsec -= + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; + vdata->monotonic_time_sec++; + } + + vdata->wall_time_coarse_sec = tk->xtime_sec; + vdata->wall_time_coarse_nsec = + (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift); + + vdata->monotonic_time_coarse_sec = + vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; + vdata->monotonic_time_coarse_nsec = + vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec; + + while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) { + vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC; + vdata->monotonic_time_coarse_sec++; + } + + vvar_write_end(vdata); +} diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index c858f5f3ce2c..635d67ffc9a3 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -798,9 +798,9 @@ void vio_port_up(struct vio_driver_state *vio) } EXPORT_SYMBOL(vio_port_up); -static void vio_port_timer(unsigned long _arg) +static void vio_port_timer(struct timer_list *t) { - struct vio_driver_state *vio = (struct vio_driver_state *) _arg; + struct vio_driver_state *vio = from_timer(vio, t, timer); vio_port_up(vio); } @@ -849,7 +849,7 @@ int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev, vio->ops = ops; - setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio); + timer_setup(&vio->timer, vio_port_timer, 0); return 0; } diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 44829a8dc458..063556fe2cb1 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -17,6 +17,9 @@ lib-$(CONFIG_SPARC64) += atomic_64.o lib-$(CONFIG_SPARC32) += lshrdi3.o ashldi3.o lib-$(CONFIG_SPARC32) += muldi3.o bitext.o cmpdi2.o lib-$(CONFIG_SPARC64) += multi3.o +lib-$(CONFIG_SPARC64) += fls.o +lib-$(CONFIG_SPARC64) += fls64.o +lib-$(CONFIG_SPARC64) += NG4fls.o lib-$(CONFIG_SPARC64) += copy_page.o clear_page.o bzero.o lib-$(CONFIG_SPARC64) += csum_copy.o csum_copy_from_user.o csum_copy_to_user.o diff --git a/arch/sparc/lib/NG4fls.S b/arch/sparc/lib/NG4fls.S new file mode 100644 index 000000000000..2d0991e5b034 --- /dev/null +++ b/arch/sparc/lib/NG4fls.S @@ -0,0 +1,30 @@ +/* NG4fls.S: SPARC optimized fls and __fls for T4 and above. + * + * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved. + */ + +#include <linux/linkage.h> + +#define LZCNT_O0_G2 \ + .word 0x85b002e8 + + .text + .register %g2, #scratch + .register %g3, #scratch + +ENTRY(NG4fls) + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 64, %g3 + retl + sub %g3, %g2, %o0 +ENDPROC(NG4fls) + +ENTRY(__NG4fls) + brz,pn %o0, 1f + LZCNT_O0_G2 !lzcnt %o0, %g2 + mov 63, %g3 + sub %g3, %g2, %o0 +1: + retl + nop +ENDPROC(__NG4fls) diff --git a/arch/sparc/lib/NG4patch.S b/arch/sparc/lib/NG4patch.S index aa58ab39f9a6..37866175c921 100644 --- a/arch/sparc/lib/NG4patch.S +++ b/arch/sparc/lib/NG4patch.S @@ -4,6 +4,8 @@ * Copyright (C) 2012 David S. Miller <davem@davemloft.net> */ +#include <linux/linkage.h> + #define BRANCH_ALWAYS 0x10680000 #define NOP 0x01000000 #define NG_DO_PATCH(OLD, NEW) \ @@ -53,3 +55,10 @@ niagara4_patch_pageops: retl nop .size niagara4_patch_pageops,.-niagara4_patch_pageops + +ENTRY(niagara4_patch_fls) + NG_DO_PATCH(fls, NG4fls) + NG_DO_PATCH(__fls, __NG4fls) + retl + nop +ENDPROC(niagara4_patch_fls) diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 5010df497387..465a901a0ada 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -173,6 +173,20 @@ unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new) } EXPORT_SYMBOL(__cmpxchg_u32); +u64 __cmpxchg_u64(u64 *ptr, u64 old, u64 new) +{ + unsigned long flags; + u64 prev; + + spin_lock_irqsave(ATOMIC_HASH(ptr), flags); + if ((prev = *ptr) == old) + *ptr = new; + spin_unlock_irqrestore(ATOMIC_HASH(ptr), flags); + + return prev; +} +EXPORT_SYMBOL(__cmpxchg_u64); + unsigned long __xchg_u32(volatile u32 *ptr, u32 new) { unsigned long flags; diff --git a/arch/sparc/lib/fls.S b/arch/sparc/lib/fls.S new file mode 100644 index 000000000000..06b8d300bcae --- /dev/null +++ b/arch/sparc/lib/fls.S @@ -0,0 +1,67 @@ +/* fls.S: SPARC default fls definition. + * + * SPARC default fls definition, which follows the same algorithm as + * in generic fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include <linux/linkage.h> +#include <asm/export.h> + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(fls) + brz,pn %o0, 6f + mov 0, %o1 + sethi %hi(0xffff0000), %g3 + mov %o0, %g2 + andcc %o0, %g3, %g0 + be,pt %icc, 8f + mov 32, %o1 + sethi %hi(0xff000000), %g3 + andcc %g2, %g3, %g0 + bne,pt %icc, 3f + sethi %hi(0xf0000000), %g3 + sll %o0, 8, %o0 +1: + add %o1, -8, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +2: + sethi %hi(0xf0000000), %g3 +3: + andcc %g2, %g3, %g0 + bne,pt %icc, 4f + sethi %hi(0xc0000000), %g3 + sll %o0, 4, %o0 + add %o1, -4, %o1 + sra %o0, 0, %o0 + mov %o0, %g2 +4: + andcc %g2, %g3, %g0 + be,a,pt %icc, 7f + sll %o0, 2, %o0 +5: + xnor %g0, %o0, %o0 + srl %o0, 31, %o0 + sub %o1, %o0, %o1 +6: + jmp %o7 + 8 + sra %o1, 0, %o0 +7: + add %o1, -2, %o1 + ba,pt %xcc, 5b + sra %o0, 0, %o0 +8: + sll %o0, 16, %o0 + sethi %hi(0xff000000), %g3 + sra %o0, 0, %o0 + mov %o0, %g2 + andcc %g2, %g3, %g0 + bne,pt %icc, 2b + mov 16, %o1 + ba,pt %xcc, 1b + sll %o0, 8, %o0 +ENDPROC(fls) +EXPORT_SYMBOL(fls) diff --git a/arch/sparc/lib/fls64.S b/arch/sparc/lib/fls64.S new file mode 100644 index 000000000000..c83e22ae9586 --- /dev/null +++ b/arch/sparc/lib/fls64.S @@ -0,0 +1,61 @@ +/* fls64.S: SPARC default __fls definition. + * + * SPARC default __fls definition, which follows the same algorithm as + * in generic __fls(). This function will be boot time patched on T4 + * and onward. + */ + +#include <linux/linkage.h> +#include <asm/export.h> + + .text + .register %g2, #scratch + .register %g3, #scratch +ENTRY(__fls) + mov -1, %g2 + sllx %g2, 32, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 1f + mov 63, %g1 + sllx %o0, 32, %o0 + mov 31, %g1 +1: + mov -1, %g2 + sllx %g2, 48, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 2f + mov -1, %g2 + sllx %o0, 16, %o0 + add %g1, -16, %g1 +2: + mov -1, %g2 + sllx %g2, 56, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 3f + mov -1, %g2 + sllx %o0, 8, %o0 + add %g1, -8, %g1 +3: + sllx %g2, 60, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 4f + mov -1, %g2 + sllx %o0, 4, %o0 + add %g1, -4, %g1 +4: + sllx %g2, 62, %g2 + and %o0, %g2, %g2 + brnz,pt %g2, 5f + mov -1, %g3 + sllx %o0, 2, %o0 + add %g1, -2, %g1 +5: + mov 0, %g2 + sllx %g3, 63, %g3 + and %o0, %g3, %o0 + movre %o0, 1, %g2 + sub %g1, %g2, %g1 + jmp %o7+8 + sra %g1, 0, %o0 +ENDPROC(__fls) +EXPORT_SYMBOL(__fls) diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 5335ba3c850e..33c0f8bb0f33 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -75,7 +75,7 @@ static int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr, if (!(pmd_val(pmd) & _PAGE_VALID)) return 0; - if (write && !pmd_write(pmd)) + if (!pmd_access_permitted(pmd, write)) return 0; refs = 0; @@ -114,7 +114,7 @@ static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr, if (!(pud_val(pud) & _PAGE_VALID)) return 0; - if (write && !pud_write(pud)) + if (!pud_access_permitted(pud, write)) return 0; refs = 0; diff --git a/arch/sparc/vdso/.gitignore b/arch/sparc/vdso/.gitignore new file mode 100644 index 000000000000..ef925b998222 --- /dev/null +++ b/arch/sparc/vdso/.gitignore @@ -0,0 +1,3 @@ +vdso.lds +vdso-image-*.c +vdso2c diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile new file mode 100644 index 000000000000..a6615d8864f7 --- /dev/null +++ b/arch/sparc/vdso/Makefile @@ -0,0 +1,149 @@ +# +# Building vDSO images for sparc. +# + +KBUILD_CFLAGS += $(DISABLE_LTO) + +VDSO64-$(CONFIG_SPARC64) := y +VDSOCOMPAT-$(CONFIG_COMPAT) := y + +# files to link into the vdso +vobjs-y := vdso-note.o vclock_gettime.o + +# files to link into kernel +obj-y += vma.o + +# vDSO images to build +vdso_img-$(VDSO64-y) += 64 +vdso_img-$(VDSOCOMPAT-y) += 32 + +vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) + +$(obj)/vdso.o: $(obj)/vdso.so + +targets += vdso.lds $(vobjs-y) + +# Build the vDSO image C files and link them in. +vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) +vdso_img_cfiles := $(vdso_img-y:%=vdso-image-%.c) +vdso_img_sodbg := $(vdso_img-y:%=vdso%.so.dbg) +obj-y += $(vdso_img_objs) +targets += $(vdso_img_cfiles) +targets += $(vdso_img_sodbg) +.SECONDARY: $(vdso_img-y:%=$(obj)/vdso-image-%.c) \ + $(vdso_img-y:%=$(obj)/vdso%.so) + +export CPPFLAGS_vdso.lds += -P -C + +VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ + -Wl,--no-undefined \ + -Wl,-z,max-page-size=8192 -Wl,-z,common-page-size=8192 \ + $(DISABLE_LTO) + +$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE + $(call if_changed,vdso) + +HOST_EXTRACFLAGS += -I$(srctree)/tools/include +hostprogs-y += vdso2c + +quiet_cmd_vdso2c = VDSO2C $@ +define cmd_vdso2c + $(obj)/vdso2c $< $(<:%.dbg=%) $@ +endef + +$(obj)/vdso-image-%.c: $(obj)/vdso%.so.dbg $(obj)/vdso%.so $(obj)/vdso2c FORCE + $(call if_changed,vdso2c) + +# +# Don't omit frame pointers for ease of userspace debugging, but do +# optimize sibling calls. +# +CFL := $(PROFILING) -mcmodel=medlow -fPIC -O2 -fasynchronous-unwind-tables \ + -m64 -ffixed-g2 -ffixed-g3 -fcall-used-g4 -fcall-used-g5 -ffixed-g6 \ + -ffixed-g7 $(filter -g%,$(KBUILD_CFLAGS)) \ + $(call cc-option, -fno-stack-protector) -fno-omit-frame-pointer \ + -foptimize-sibling-calls -DBUILD_VDSO + +$(vobjs): KBUILD_CFLAGS += $(CFL) + +# +# vDSO code runs in userspace and -pg doesn't help with profiling anyway. +# +CFLAGS_REMOVE_vdso-note.o = -pg +CFLAGS_REMOVE_vclock_gettime.o = -pg + +$(obj)/%.so: OBJCOPYFLAGS := -S +$(obj)/%.so: $(obj)/%.so.dbg + $(call if_changed,objcopy) + +CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) +VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf32_sparc,-soname=linux-gate.so.1 + +#This makes sure the $(obj) subdirectory exists even though vdso32/ +#is not a kbuild sub-make subdirectory +override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ + +targets += vdso32/vdso32.lds +targets += vdso32/vdso-note.o +targets += vdso32/vclock_gettime.o + +KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO +$(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) +$(obj)/vdso32.so.dbg: asflags-$(CONFIG_SPARC64) += -m32 + +KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) +KBUILD_CFLAGS_32 := $(filter-out -mcmodel=medlow,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 += -m32 -msoft-float -fpic -mno-app-regs -ffixed-g7 +KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) +KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) +KBUILD_CFLAGS_32 += -fno-omit-frame-pointer +KBUILD_CFLAGS_32 += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS_32 += -mv8plus +$(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) + +$(obj)/vdso32.so.dbg: FORCE \ + $(obj)/vdso32/vdso32.lds \ + $(obj)/vdso32/vclock_gettime.o \ + $(obj)/vdso32/vdso-note.o + $(call if_changed,vdso) + +# +# The DSO images are built using a special linker script. +# +quiet_cmd_vdso = VDSO $@ + cmd_vdso = $(CC) -nostdlib -o $@ \ + $(VDSO_LDFLAGS) $(VDSO_LDFLAGS_$(filter %.lds,$(^F))) \ + -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) + +VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ + $(call cc-ldoption, -Wl$(comma)--build-id) -Wl,-Bsymbolic +GCOV_PROFILE := n + +# +# Install the unstripped copies of vdso*.so. If our toolchain supports +# build-id, install .build-id links as well. +# +quiet_cmd_vdso_install = INSTALL $(@:install_%=%) +define cmd_vdso_install + cp $< "$(MODLIB)/vdso/$(@:install_%=%)"; \ + if readelf -n $< |grep -q 'Build ID'; then \ + buildid=`readelf -n $< |grep 'Build ID' |sed -e 's/^.*Build ID: \(.*\)$$/\1/'`; \ + first=`echo $$buildid | cut -b-2`; \ + last=`echo $$buildid | cut -b3-`; \ + mkdir -p "$(MODLIB)/vdso/.build-id/$$first"; \ + ln -sf "../../$(@:install_%=%)" "$(MODLIB)/vdso/.build-id/$$first/$$last.debug"; \ + fi +endef + +vdso_img_insttargets := $(vdso_img_sodbg:%.dbg=install_%) + +$(MODLIB)/vdso: FORCE + @mkdir -p $(MODLIB)/vdso + +$(vdso_img_insttargets): install_%: $(obj)/%.dbg $(MODLIB)/vdso FORCE + $(call cmd,vdso_install) + +PHONY += vdso_install $(vdso_img_insttargets) +vdso_install: $(vdso_img_insttargets) FORCE diff --git a/arch/sparc/vdso/vclock_gettime.c b/arch/sparc/vdso/vclock_gettime.c new file mode 100644 index 000000000000..3feb3d960ca5 --- /dev/null +++ b/arch/sparc/vdso/vclock_gettime.c @@ -0,0 +1,264 @@ +/* + * Copyright 2006 Andi Kleen, SUSE Labs. + * Subject to the GNU Public License, v.2 + * + * Fast user context implementation of clock_gettime, gettimeofday, and time. + * + * The code should have no internal unresolved relocations. + * Check with readelf after changing. + * Also alternative() doesn't work. + */ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +/* Disable profiling for userspace code: */ +#ifndef DISABLE_BRANCH_PROFILING +#define DISABLE_BRANCH_PROFILING +#endif + +#include <linux/kernel.h> +#include <linux/time.h> +#include <linux/string.h> +#include <asm/io.h> +#include <asm/unistd.h> +#include <asm/timex.h> +#include <asm/clocksource.h> +#include <asm/vvar.h> + +#undef TICK_PRIV_BIT +#ifdef CONFIG_SPARC64 +#define TICK_PRIV_BIT (1UL << 63) +#else +#define TICK_PRIV_BIT (1ULL << 63) +#endif + +#define SYSCALL_STRING \ + "ta 0x6d;" \ + "sub %%g0, %%o0, %%o0;" \ + +#define SYSCALL_CLOBBERS \ + "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", \ + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", \ + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", \ + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", \ + "f32", "f34", "f36", "f38", "f40", "f42", "f44", "f46", \ + "f48", "f50", "f52", "f54", "f56", "f58", "f60", "f62", \ + "cc", "memory" + +/* + * Compute the vvar page's address in the process address space, and return it + * as a pointer to the vvar_data. + */ +static notrace noinline struct vvar_data * +get_vvar_data(void) +{ + unsigned long ret; + + /* + * vdso data page is the first vDSO page so grab the return address + * and move up a page to get to the data page. + */ + ret = (unsigned long)__builtin_return_address(0); + ret &= ~(8192 - 1); + ret -= 8192; + + return (struct vvar_data *) ret; +} + +static notrace long +vdso_fallback_gettime(long clock, struct timespec *ts) +{ + register long num __asm__("g1") = __NR_clock_gettime; + register long o0 __asm__("o0") = clock; + register long o1 __asm__("o1") = (long) ts; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +static notrace __always_inline long +vdso_fallback_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + register long num __asm__("g1") = __NR_gettimeofday; + register long o0 __asm__("o0") = (long) tv; + register long o1 __asm__("o1") = (long) tz; + + __asm__ __volatile__(SYSCALL_STRING : "=r" (o0) : "r" (num), + "0" (o0), "r" (o1) : SYSCALL_CLOBBERS); + return o0; +} + +#ifdef CONFIG_SPARC64 +static notrace noinline u64 +vread_tick(void) { + u64 ret; + + __asm__ __volatile__("rd %%asr24, %0 \n" + ".section .vread_tick_patch, \"ax\" \n" + "rd %%tick, %0 \n" + ".previous \n" + : "=&r" (ret)); + return ret & ~TICK_PRIV_BIT; +} +#else +static notrace noinline u64 +vread_tick(void) +{ + unsigned int lo, hi; + + __asm__ __volatile__("rd %%asr24, %%g1\n\t" + "srlx %%g1, 32, %1\n\t" + "srl %%g1, 0, %0\n" + ".section .vread_tick_patch, \"ax\" \n" + "rd %%tick, %%g1\n" + ".previous \n" + : "=&r" (lo), "=&r" (hi) + : + : "g1"); + return lo | ((u64)hi << 32); +} +#endif + +static notrace inline u64 +vgetsns(struct vvar_data *vvar) +{ + u64 v; + u64 cycles; + + cycles = vread_tick(); + v = (cycles - vvar->clock.cycle_last) & vvar->clock.mask; + return v * vvar->clock.mult; +} + +static notrace noinline int +do_realtime(struct vvar_data *vvar, struct timespec *ts) +{ + unsigned long seq; + u64 ns; + + ts->tv_nsec = 0; + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->wall_time_sec; + ns = vvar->wall_time_snsec; + ns += vgetsns(vvar); + ns >>= vvar->clock.shift; + } while (unlikely(vvar_read_retry(vvar, seq))); + + timespec_add_ns(ts, ns); + + return 0; +} + +static notrace noinline int +do_monotonic(struct vvar_data *vvar, struct timespec *ts) +{ + unsigned long seq; + u64 ns; + + ts->tv_nsec = 0; + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->monotonic_time_sec; + ns = vvar->monotonic_time_snsec; + ns += vgetsns(vvar); + ns >>= vvar->clock.shift; + } while (unlikely(vvar_read_retry(vvar, seq))); + + timespec_add_ns(ts, ns); + + return 0; +} + +static notrace noinline int +do_realtime_coarse(struct vvar_data *vvar, struct timespec *ts) +{ + unsigned long seq; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->wall_time_coarse_sec; + ts->tv_nsec = vvar->wall_time_coarse_nsec; + } while (unlikely(vvar_read_retry(vvar, seq))); + return 0; +} + +static notrace noinline int +do_monotonic_coarse(struct vvar_data *vvar, struct timespec *ts) +{ + unsigned long seq; + + do { + seq = vvar_read_begin(vvar); + ts->tv_sec = vvar->monotonic_time_coarse_sec; + ts->tv_nsec = vvar->monotonic_time_coarse_nsec; + } while (unlikely(vvar_read_retry(vvar, seq))); + + return 0; +} + +notrace int +__vdso_clock_gettime(clockid_t clock, struct timespec *ts) +{ + struct vvar_data *vvd = get_vvar_data(); + + switch (clock) { + case CLOCK_REALTIME: + if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) + break; + return do_realtime(vvd, ts); + case CLOCK_MONOTONIC: + if (unlikely(vvd->vclock_mode == VCLOCK_NONE)) + break; + return do_monotonic(vvd, ts); + case CLOCK_REALTIME_COARSE: + return do_realtime_coarse(vvd, ts); + case CLOCK_MONOTONIC_COARSE: + return do_monotonic_coarse(vvd, ts); + } + /* + * Unknown clock ID ? Fall back to the syscall. + */ + return vdso_fallback_gettime(clock, ts); +} +int +clock_gettime(clockid_t, struct timespec *) + __attribute__((weak, alias("__vdso_clock_gettime"))); + +notrace int +__vdso_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + struct vvar_data *vvd = get_vvar_data(); + + if (likely(vvd->vclock_mode != VCLOCK_NONE)) { + if (likely(tv != NULL)) { + union tstv_t { + struct timespec ts; + struct timeval tv; + } *tstv = (union tstv_t *) tv; + do_realtime(vvd, &tstv->ts); + /* + * Assign before dividing to ensure that the division is + * done in the type of tv_usec, not tv_nsec. + * + * There cannot be > 1 billion usec in a second: + * do_realtime() has already distributed such overflow + * into tv_sec. So we can assign it to an int safely. + */ + tstv->tv.tv_usec = tstv->ts.tv_nsec; + tstv->tv.tv_usec /= 1000; + } + if (unlikely(tz != NULL)) { + /* Avoid memcpy. Some old compilers fail to inline it */ + tz->tz_minuteswest = vvd->tz_minuteswest; + tz->tz_dsttime = vvd->tz_dsttime; + } + return 0; + } + return vdso_fallback_gettimeofday(tv, tz); +} +int +gettimeofday(struct timeval *, struct timezone *) + __attribute__((weak, alias("__vdso_gettimeofday"))); diff --git a/arch/sparc/vdso/vdso-layout.lds.S b/arch/sparc/vdso/vdso-layout.lds.S new file mode 100644 index 000000000000..f2c83abaca12 --- /dev/null +++ b/arch/sparc/vdso/vdso-layout.lds.S @@ -0,0 +1,104 @@ +/* + * Linker script for vDSO. This is an ELF shared object prelinked to + * its virtual address, and with only one read-only segment. + * This script controls its layout. + */ + +#if defined(BUILD_VDSO64) +# define SHDR_SIZE 64 +#elif defined(BUILD_VDSO32) +# define SHDR_SIZE 40 +#else +# error unknown VDSO target +#endif + +#define NUM_FAKE_SHDRS 7 + +SECTIONS +{ + /* + * User/kernel shared data is before the vDSO. This may be a little + * uglier than putting it after the vDSO, but it avoids issues with + * non-allocatable things that dangle past the end of the PT_LOAD + * segment. Page size is 8192 for both 64-bit and 32-bit vdso binaries + */ + + vvar_start = . -8192; + vvar_data = vvar_start; + + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .dynamic : { *(.dynamic) } :text :dynamic + + .rodata : { + *(.rodata*) + *(.data*) + *(.sdata*) + *(.got.plt) *(.got) + *(.gnu.linkonce.d.*) + *(.bss*) + *(.dynbss*) + *(.gnu.linkonce.b.*) + + /* + * Ideally this would live in a C file: kept in here for + * compatibility with x86-64. + */ + VDSO_FAKE_SECTION_TABLE_START = .; + . = . + NUM_FAKE_SHDRS * SHDR_SIZE; + VDSO_FAKE_SECTION_TABLE_END = .; + } :text + + .fake_shstrtab : { *(.fake_shstrtab) } :text + + + .note : { *(.note.*) } :text :note + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + + + /* + * Text is well-separated from actual data: there's plenty of + * stuff that isn't used at runtime in between. + */ + + .text : { *(.text*) } :text =0x90909090, + + .vread_tick_patch : { + vread_tick_patch_start = .; + *(.vread_tick_patch) + vread_tick_patch_end = .; + } + + /DISCARD/ : { + *(.discard) + *(.discard.*) + *(__bug_table) + } +} + +/* + * Very old versions of ld do not recognize this name token; use the constant. + */ +#define PT_GNU_EH_FRAME 0x6474e550 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} diff --git a/arch/sparc/vdso/vdso-note.S b/arch/sparc/vdso/vdso-note.S new file mode 100644 index 000000000000..79a071e4357e --- /dev/null +++ b/arch/sparc/vdso/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/sparc/vdso/vdso.lds.S b/arch/sparc/vdso/vdso.lds.S new file mode 100644 index 000000000000..f3caa29a331c --- /dev/null +++ b/arch/sparc/vdso/vdso.lds.S @@ -0,0 +1,25 @@ +/* + * Linker script for 64-bit vDSO. + * We #include the file to define the layout details. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. + */ + +#define BUILD_VDSO64 + +#include "vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + local: *; + }; +} diff --git a/arch/sparc/vdso/vdso2c.c b/arch/sparc/vdso/vdso2c.c new file mode 100644 index 000000000000..9f5b1cd6d51d --- /dev/null +++ b/arch/sparc/vdso/vdso2c.c @@ -0,0 +1,234 @@ +/* + * vdso2c - A vdso image preparation tool + * Copyright (c) 2014 Andy Lutomirski and others + * Licensed under the GPL v2 + * + * vdso2c requires stripped and unstripped input. It would be trivial + * to fully strip the input in here, but, for reasons described below, + * we need to write a section table. Doing this is more or less + * equivalent to dropping all non-allocatable sections, but it's + * easier to let objcopy handle that instead of doing it ourselves. + * If we ever need to do something fancier than what objcopy provides, + * it would be straightforward to add here. + * + * We keep a section table for a few reasons: + * + * Binutils has issues debugging the vDSO: it reads the section table to + * find SHT_NOTE; it won't look at PT_NOTE for the in-memory vDSO, which + * would break build-id if we removed the section table. Binutils + * also requires that shstrndx != 0. See: + * https://sourceware.org/bugzilla/show_bug.cgi?id=17064 + * + * elfutils might not look for PT_NOTE if there is a section table at + * all. I don't know whether this matters for any practical purpose. + * + * For simplicity, rather than hacking up a partial section table, we + * just write a mostly complete one. We omit non-dynamic symbols, + * though, since they're rather large. + * + * Once binutils gets fixed, we might be able to drop this for all but + * the 64-bit vdso, since build-id only works in kernel RPMs, and + * systems that update to new enough kernel RPMs will likely update + * binutils in sync. build-id has never worked for home-built kernel + * RPMs without manual symlinking, and I suspect that no one ever does + * that. + */ + +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#include <inttypes.h> +#include <stdint.h> +#include <unistd.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <err.h> + +#include <sys/mman.h> +#include <sys/types.h> +#include <tools/be_byteshift.h> + +#include <linux/elf.h> +#include <linux/types.h> +#include <linux/kernel.h> + +const char *outfilename; + +/* Symbols that we need in vdso2c. */ +enum { + sym_vvar_start, + sym_VDSO_FAKE_SECTION_TABLE_START, + sym_VDSO_FAKE_SECTION_TABLE_END, + sym_vread_tick, + sym_vread_tick_patch_start, + sym_vread_tick_patch_end +}; + +struct vdso_sym { + const char *name; + int export; +}; + +struct vdso_sym required_syms[] = { + [sym_vvar_start] = {"vvar_start", 1}, + [sym_VDSO_FAKE_SECTION_TABLE_START] = { + "VDSO_FAKE_SECTION_TABLE_START", 0 + }, + [sym_VDSO_FAKE_SECTION_TABLE_END] = { + "VDSO_FAKE_SECTION_TABLE_END", 0 + }, + [sym_vread_tick] = {"vread_tick", 1}, + [sym_vread_tick_patch_start] = {"vread_tick_patch_start", 1}, + [sym_vread_tick_patch_end] = {"vread_tick_patch_end", 1} +}; + +__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) +static void fail(const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + fprintf(stderr, "Error: "); + vfprintf(stderr, format, ap); + if (outfilename) + unlink(outfilename); + exit(1); + va_end(ap); +} + +/* + * Evil macros for big-endian reads and writes + */ +#define GBE(x, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + (__typeof__(*(x)))get_unaligned_be##bits(x), ifnot) + +#define LAST_GBE(x) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x), (void)(0)) + +#define GET_BE(x) \ + GBE(x, 64, GBE(x, 32, GBE(x, 16, LAST_GBE(x)))) + +#define PBE(x, val, bits, ifnot) \ + __builtin_choose_expr( \ + (sizeof(*(x)) == bits/8), \ + put_unaligned_be##bits((val), (x)), ifnot) + +#define LAST_PBE(x, val) \ + __builtin_choose_expr(sizeof(*(x)) == 1, *(x) = (val), (void)(0)) + +#define PUT_BE(x, val) \ + PBE(x, val, 64, PBE(x, val, 32, PBE(x, val, 16, LAST_PBE(x, val)))) + +#define NSYMS ARRAY_SIZE(required_syms) + +#define BITSFUNC3(name, bits, suffix) name##bits##suffix +#define BITSFUNC2(name, bits, suffix) BITSFUNC3(name, bits, suffix) +#define BITSFUNC(name) BITSFUNC2(name, ELF_BITS, ) + +#define INT_BITS BITSFUNC2(int, ELF_BITS, _t) + +#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x +#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) +#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) + +#define ELF_BITS 64 +#include "vdso2c.h" +#undef ELF_BITS + +#define ELF_BITS 32 +#include "vdso2c.h" +#undef ELF_BITS + +static void go(void *raw_addr, size_t raw_len, + void *stripped_addr, size_t stripped_len, + FILE *outfile, const char *name) +{ + Elf64_Ehdr *hdr = (Elf64_Ehdr *)raw_addr; + + if (hdr->e_ident[EI_CLASS] == ELFCLASS64) { + go64(raw_addr, raw_len, stripped_addr, stripped_len, + outfile, name); + } else if (hdr->e_ident[EI_CLASS] == ELFCLASS32) { + go32(raw_addr, raw_len, stripped_addr, stripped_len, + outfile, name); + } else { + fail("unknown ELF class\n"); + } +} + +static void map_input(const char *name, void **addr, size_t *len, int prot) +{ + off_t tmp_len; + + int fd = open(name, O_RDONLY); + + if (fd == -1) + err(1, "%s", name); + + tmp_len = lseek(fd, 0, SEEK_END); + if (tmp_len == (off_t)-1) + err(1, "lseek"); + *len = (size_t)tmp_len; + + *addr = mmap(NULL, tmp_len, prot, MAP_PRIVATE, fd, 0); + if (*addr == MAP_FAILED) + err(1, "mmap"); + + close(fd); +} + +int main(int argc, char **argv) +{ + size_t raw_len, stripped_len; + void *raw_addr, *stripped_addr; + FILE *outfile; + char *name, *tmp; + int namelen; + + if (argc != 4) { + printf("Usage: vdso2c RAW_INPUT STRIPPED_INPUT OUTPUT\n"); + return 1; + } + + /* + * Figure out the struct name. If we're writing to a .so file, + * generate raw output insted. + */ + name = strdup(argv[3]); + namelen = strlen(name); + if (namelen >= 3 && !strcmp(name + namelen - 3, ".so")) { + name = NULL; + } else { + tmp = strrchr(name, '/'); + if (tmp) + name = tmp + 1; + tmp = strchr(name, '.'); + if (tmp) + *tmp = '\0'; + for (tmp = name; *tmp; tmp++) + if (*tmp == '-') + *tmp = '_'; + } + + map_input(argv[1], &raw_addr, &raw_len, PROT_READ); + map_input(argv[2], &stripped_addr, &stripped_len, PROT_READ); + + outfilename = argv[3]; + outfile = fopen(outfilename, "w"); + if (!outfile) + err(1, "%s", argv[2]); + + go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name); + + munmap(raw_addr, raw_len); + munmap(stripped_addr, stripped_len); + fclose(outfile); + + return 0; +} diff --git a/arch/sparc/vdso/vdso2c.h b/arch/sparc/vdso/vdso2c.h new file mode 100644 index 000000000000..808decb0f7be --- /dev/null +++ b/arch/sparc/vdso/vdso2c.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +/* + * This file is included up to twice from vdso2c.c. It generates code for + * 32-bit and 64-bit vDSOs. We will eventually need both for 64-bit builds, + * since 32-bit vDSOs will then be built for 32-bit userspace. + */ + +static void BITSFUNC(go)(void *raw_addr, size_t raw_len, + void *stripped_addr, size_t stripped_len, + FILE *outfile, const char *name) +{ + int found_load = 0; + unsigned long load_size = -1; /* Work around bogus warning */ + unsigned long mapping_size; + int i; + unsigned long j; + + ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr; + ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr; + ELF(Dyn) *dyn = 0, *dyn_end = 0; + INT_BITS syms[NSYMS] = {}; + + ELF(Phdr) *pt = (ELF(Phdr) *)(raw_addr + GET_BE(&hdr->e_phoff)); + + /* Walk the segment table. */ + for (i = 0; i < GET_BE(&hdr->e_phnum); i++) { + if (GET_BE(&pt[i].p_type) == PT_LOAD) { + if (found_load) + fail("multiple PT_LOAD segs\n"); + + if (GET_BE(&pt[i].p_offset) != 0 || + GET_BE(&pt[i].p_vaddr) != 0) + fail("PT_LOAD in wrong place\n"); + + if (GET_BE(&pt[i].p_memsz) != GET_BE(&pt[i].p_filesz)) + fail("cannot handle memsz != filesz\n"); + + load_size = GET_BE(&pt[i].p_memsz); + found_load = 1; + } else if (GET_BE(&pt[i].p_type) == PT_DYNAMIC) { + dyn = raw_addr + GET_BE(&pt[i].p_offset); + dyn_end = raw_addr + GET_BE(&pt[i].p_offset) + + GET_BE(&pt[i].p_memsz); + } + } + if (!found_load) + fail("no PT_LOAD seg\n"); + + if (stripped_len < load_size) + fail("stripped input is too short\n"); + + /* Walk the dynamic table */ + for (i = 0; dyn + i < dyn_end && + GET_BE(&dyn[i].d_tag) != DT_NULL; i++) { + typeof(dyn[i].d_tag) tag = GET_BE(&dyn[i].d_tag); + typeof(dyn[i].d_un.d_val) val = GET_BE(&dyn[i].d_un.d_val); + + if ((tag == DT_RELSZ || tag == DT_RELASZ) && (val != 0)) + fail("vdso image contains dynamic relocations\n"); + } + + /* Walk the section table */ + for (i = 0; i < GET_BE(&hdr->e_shnum); i++) { + ELF(Shdr) *sh = raw_addr + GET_BE(&hdr->e_shoff) + + GET_BE(&hdr->e_shentsize) * i; + if (GET_BE(&sh->sh_type) == SHT_SYMTAB) + symtab_hdr = sh; + } + + if (!symtab_hdr) + fail("no symbol table\n"); + + strtab_hdr = raw_addr + GET_BE(&hdr->e_shoff) + + GET_BE(&hdr->e_shentsize) * GET_BE(&symtab_hdr->sh_link); + + /* Walk the symbol table */ + for (i = 0; + i < GET_BE(&symtab_hdr->sh_size) / GET_BE(&symtab_hdr->sh_entsize); + i++) { + int k; + + ELF(Sym) *sym = raw_addr + GET_BE(&symtab_hdr->sh_offset) + + GET_BE(&symtab_hdr->sh_entsize) * i; + const char *name = raw_addr + GET_BE(&strtab_hdr->sh_offset) + + GET_BE(&sym->st_name); + + for (k = 0; k < NSYMS; k++) { + if (!strcmp(name, required_syms[k].name)) { + if (syms[k]) { + fail("duplicate symbol %s\n", + required_syms[k].name); + } + + /* + * Careful: we use negative addresses, but + * st_value is unsigned, so we rely + * on syms[k] being a signed type of the + * correct width. + */ + syms[k] = GET_BE(&sym->st_value); + } + } + } + + /* Validate mapping addresses. */ + if (syms[sym_vvar_start] % 8192) + fail("vvar_begin must be a multiple of 8192\n"); + + if (!name) { + fwrite(stripped_addr, stripped_len, 1, outfile); + return; + } + + mapping_size = (stripped_len + 8191) / 8192 * 8192; + + fprintf(outfile, "/* AUTOMATICALLY GENERATED -- DO NOT EDIT */\n\n"); + fprintf(outfile, "#include <linux/cache.h>\n"); + fprintf(outfile, "#include <asm/vdso.h>\n"); + fprintf(outfile, "\n"); + fprintf(outfile, + "static unsigned char raw_data[%lu] __ro_after_init __aligned(8192)= {", + mapping_size); + for (j = 0; j < stripped_len; j++) { + if (j % 10 == 0) + fprintf(outfile, "\n\t"); + fprintf(outfile, "0x%02X, ", + (int)((unsigned char *)stripped_addr)[j]); + } + fprintf(outfile, "\n};\n\n"); + + fprintf(outfile, "const struct vdso_image %s_builtin = {\n", name); + fprintf(outfile, "\t.data = raw_data,\n"); + fprintf(outfile, "\t.size = %lu,\n", mapping_size); + for (i = 0; i < NSYMS; i++) { + if (required_syms[i].export && syms[i]) + fprintf(outfile, "\t.sym_%s = %" PRIi64 ",\n", + required_syms[i].name, (int64_t)syms[i]); + } + fprintf(outfile, "};\n"); +} diff --git a/arch/sparc/vdso/vdso32/.gitignore b/arch/sparc/vdso/vdso32/.gitignore new file mode 100644 index 000000000000..e45fba9d0ced --- /dev/null +++ b/arch/sparc/vdso/vdso32/.gitignore @@ -0,0 +1 @@ +vdso32.lds diff --git a/arch/sparc/vdso/vdso32/vclock_gettime.c b/arch/sparc/vdso/vdso32/vclock_gettime.c new file mode 100644 index 000000000000..026abb3b826c --- /dev/null +++ b/arch/sparc/vdso/vdso32/vclock_gettime.c @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#define BUILD_VDSO32 + +#ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE +#undef CONFIG_OPTIMIZE_INLINING +#endif + +#ifdef CONFIG_SPARC64 + +/* + * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel + * configuration + */ +#undef CONFIG_64BIT +#undef CONFIG_SPARC64 +#define BUILD_VDSO32_64 +#define CONFIG_32BIT +#undef CONFIG_QUEUED_RWLOCKS +#undef CONFIG_QUEUED_SPINLOCKS + +#endif + +#include "../vclock_gettime.c" diff --git a/arch/sparc/vdso/vdso32/vdso-note.S b/arch/sparc/vdso/vdso32/vdso-note.S new file mode 100644 index 000000000000..e234983cf0d8 --- /dev/null +++ b/arch/sparc/vdso/vdso32/vdso-note.S @@ -0,0 +1,12 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO + * text. Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> +#include <linux/elfnote.h> + +ELFNOTE_START(Linux, 0, "a") + .long LINUX_VERSION_CODE +ELFNOTE_END diff --git a/arch/sparc/vdso/vdso32/vdso32.lds.S b/arch/sparc/vdso/vdso32/vdso32.lds.S new file mode 100644 index 000000000000..53575ee154c4 --- /dev/null +++ b/arch/sparc/vdso/vdso32/vdso32.lds.S @@ -0,0 +1,24 @@ +/* + * Linker script for sparc32 vDSO + * We #include the file to define the layout details. + * + * This file defines the version script giving the user-exported symbols in + * the DSO. + */ + +#define BUILD_VDSO32 +#include "../vdso-layout.lds.S" + +/* + * This controls what userland symbols we export from the vDSO. + */ +VERSION { + LINUX_2.6 { + global: + clock_gettime; + __vdso_clock_gettime; + gettimeofday; + __vdso_gettimeofday; + local: *; + }; +} diff --git a/arch/sparc/vdso/vma.c b/arch/sparc/vdso/vma.c new file mode 100644 index 000000000000..0a6f50098e23 --- /dev/null +++ b/arch/sparc/vdso/vma.c @@ -0,0 +1,268 @@ +/* + * Set up the VMAs to tell the VM about the vDSO. + * Copyright 2007 Andi Kleen, SUSE Labs. + * Subject to the GPL, v.2 + */ + +/* + * Copyright (c) 2017 Oracle and/or its affiliates. All rights reserved. + */ + +#include <linux/mm.h> +#include <linux/err.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/random.h> +#include <linux/elf.h> +#include <asm/vdso.h> +#include <asm/vvar.h> +#include <asm/page.h> + +unsigned int __read_mostly vdso_enabled = 1; + +static struct vm_special_mapping vvar_mapping = { + .name = "[vvar]" +}; + +#ifdef CONFIG_SPARC64 +static struct vm_special_mapping vdso_mapping64 = { + .name = "[vdso]" +}; +#endif + +#ifdef CONFIG_COMPAT +static struct vm_special_mapping vdso_mapping32 = { + .name = "[vdso]" +}; +#endif + +struct vvar_data *vvar_data; + +#define SAVE_INSTR_SIZE 4 + +/* + * Allocate pages for the vdso and vvar, and copy in the vdso text from the + * kernel image. + */ +int __init init_vdso_image(const struct vdso_image *image, + struct vm_special_mapping *vdso_mapping) +{ + int i; + struct page *dp, **dpp = NULL; + int dnpages = 0; + struct page *cp, **cpp = NULL; + int cnpages = (image->size) / PAGE_SIZE; + + /* + * First, the vdso text. This is initialied data, an integral number of + * pages long. + */ + if (WARN_ON(image->size % PAGE_SIZE != 0)) + goto oom; + + cpp = kcalloc(cnpages, sizeof(struct page *), GFP_KERNEL); + vdso_mapping->pages = cpp; + + if (!cpp) + goto oom; + + if (vdso_fix_stick) { + /* + * If the system uses %tick instead of %stick, patch the VDSO + * with instruction reading %tick instead of %stick. + */ + unsigned int j, k = SAVE_INSTR_SIZE; + unsigned char *data = image->data; + + for (j = image->sym_vread_tick_patch_start; + j < image->sym_vread_tick_patch_end; j++) { + + data[image->sym_vread_tick + k] = data[j]; + k++; + } + } + + for (i = 0; i < cnpages; i++) { + cp = alloc_page(GFP_KERNEL); + if (!cp) + goto oom; + cpp[i] = cp; + copy_page(page_address(cp), image->data + i * PAGE_SIZE); + } + + /* + * Now the vvar page. This is uninitialized data. + */ + + if (vvar_data == NULL) { + dnpages = (sizeof(struct vvar_data) / PAGE_SIZE) + 1; + if (WARN_ON(dnpages != 1)) + goto oom; + dpp = kcalloc(dnpages, sizeof(struct page *), GFP_KERNEL); + vvar_mapping.pages = dpp; + + if (!dpp) + goto oom; + + dp = alloc_page(GFP_KERNEL); + if (!dp) + goto oom; + + dpp[0] = dp; + vvar_data = page_address(dp); + memset(vvar_data, 0, PAGE_SIZE); + + vvar_data->seq = 0; + } + + return 0; + oom: + if (cpp != NULL) { + for (i = 0; i < cnpages; i++) { + if (cpp[i] != NULL) + __free_page(cpp[i]); + } + kfree(cpp); + vdso_mapping->pages = NULL; + } + + if (dpp != NULL) { + for (i = 0; i < dnpages; i++) { + if (dpp[i] != NULL) + __free_page(dpp[i]); + } + kfree(dpp); + vvar_mapping.pages = NULL; + } + + pr_warn("Cannot allocate vdso\n"); + vdso_enabled = 0; + return -ENOMEM; +} + +static int __init init_vdso(void) +{ + int err = 0; +#ifdef CONFIG_SPARC64 + err = init_vdso_image(&vdso_image_64_builtin, &vdso_mapping64); + if (err) + return err; +#endif + +#ifdef CONFIG_COMPAT + err = init_vdso_image(&vdso_image_32_builtin, &vdso_mapping32); +#endif + return err; + +} +subsys_initcall(init_vdso); + +struct linux_binprm; + +/* Shuffle the vdso up a bit, randomly. */ +static unsigned long vdso_addr(unsigned long start, unsigned int len) +{ + unsigned int offset; + + /* This loses some more bits than a modulo, but is cheaper */ + offset = get_random_int() & (PTRS_PER_PTE - 1); + return start + (offset << PAGE_SHIFT); +} + +static int map_vdso(const struct vdso_image *image, + struct vm_special_mapping *vdso_mapping) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long text_start, addr = 0; + int ret = 0; + + down_write(&mm->mmap_sem); + + /* + * First, get an unmapped region: then randomize it, and make sure that + * region is free. + */ + if (current->flags & PF_RANDOMIZE) { + addr = get_unmapped_area(NULL, 0, + image->size - image->sym_vvar_start, + 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + addr = vdso_addr(addr, image->size - image->sym_vvar_start); + } + addr = get_unmapped_area(NULL, addr, + image->size - image->sym_vvar_start, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + text_start = addr - image->sym_vvar_start; + current->mm->context.vdso = (void __user *)text_start; + + /* + * MAYWRITE to allow gdb to COW and set breakpoints + */ + vma = _install_special_mapping(mm, + text_start, + image->size, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + goto up_fail; + } + + vma = _install_special_mapping(mm, + addr, + -image->sym_vvar_start, + VM_READ|VM_MAYREAD, + &vvar_mapping); + + if (IS_ERR(vma)) { + ret = PTR_ERR(vma); + do_munmap(mm, text_start, image->size, NULL); + } + +up_fail: + if (ret) + current->mm->context.vdso = NULL; + + up_write(&mm->mmap_sem); + return ret; +} + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + + if (!vdso_enabled) + return 0; + +#if defined CONFIG_COMPAT + if (!(is_32bit_task())) + return map_vdso(&vdso_image_64_builtin, &vdso_mapping64); + else + return map_vdso(&vdso_image_32_builtin, &vdso_mapping32); +#else + return map_vdso(&vdso_image_64_builtin, &vdso_mapping64); +#endif + +} + +static __init int vdso_setup(char *s) +{ + int err; + unsigned long val; + + err = kstrtoul(s, 10, &val); + vdso_enabled = val; + return err; +} +__setup("vdso=", vdso_setup); diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index c14e36f008c8..62a7b83025dd 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -173,7 +173,6 @@ typedef struct compat_siginfo { } compat_siginfo_t; #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL struct compat_ipc64_perm { compat_key_t key; diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h index 2a26cc4fefc2..adfa21b18488 100644 --- a/arch/tile/include/asm/pgtable.h +++ b/arch/tile/include/asm/pgtable.h @@ -475,7 +475,6 @@ static inline void pmd_clear(pmd_t *pmdp) #define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd))) #define pmd_huge_page(pmd) pte_huge(pmd_pte(pmd)) #define pmd_mkhuge(pmd) pte_pmd(pte_mkhuge(pmd_pte(pmd))) -#define __HAVE_ARCH_PMD_WRITE #define pfn_pmd(pfn, pgprot) pte_pmd(pfn_pte((pfn), (pgprot))) #define pmd_pfn(pmd) pte_pfn(pmd_pte(pmd)) diff --git a/arch/tile/include/asm/topology.h b/arch/tile/include/asm/topology.h index b11d5fcd2c41..635a0a4596f0 100644 --- a/arch/tile/include/asm/topology.h +++ b/arch/tile/include/asm/topology.h @@ -29,12 +29,6 @@ static inline int cpu_to_node(int cpu) return cpu_2_node[cpu]; } -/* - * Returns the number of the node containing Node 'node'. - * This architecture is flat, so it is a pretty simple function! - */ -#define parent_node(node) (node) - /* Returns a bitmask of CPUs on Node 'node'. */ static inline const struct cpumask *cpumask_of_node(int node) { diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common index d9280482a2f8..c68add8df3ae 100644 --- a/arch/um/Kconfig.common +++ b/arch/um/Kconfig.common @@ -10,7 +10,6 @@ config UML select HAVE_DEBUG_KMEMLEAK select GENERIC_IRQ_SHOW select GENERIC_CPU_DEVICES - select GENERIC_IO select GENERIC_CLOCKEVENTS select HAVE_GCC_PLUGINS select TTY # Needed for line.c diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df3276d6bfe3..8eed3f94bfc7 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1804,14 +1804,20 @@ config X86_SMAP If unsure, say Y. config X86_INTEL_UMIP - def_bool n + def_bool y depends on CPU_SUP_INTEL prompt "Intel User Mode Instruction Prevention" if EXPERT ---help--- The User Mode Instruction Prevention (UMIP) is a security feature in newer Intel processors. If enabled, a general - protection fault is issued if the instructions SGDT, SLDT, - SIDT, SMSW and STR are executed in user mode. + protection fault is issued if the SGDT, SLDT, SIDT, SMSW + or STR instructions are executed in user mode. These instructions + unnecessarily expose information about the hardware state. + + The vast majority of applications do not use these instructions. + For the very few that do, software emulation is provided in + specific cases in protected and virtual-8086 modes. Emulated + results are dummy. config X86_INTEL_MPX prompt "Intel MPX (Memory Protection Extensions)" diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index a63fbc25ce84..8199a6187251 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -171,7 +171,6 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size) static void mem_avoid_memmap(char *str) { static int i; - int rc; if (i >= MAX_MEMMAP_REGIONS) return; @@ -219,7 +218,7 @@ static int handle_mem_memmap(void) return 0; tmp_cmdline = malloc(len + 1); - if (!tmp_cmdline ) + if (!tmp_cmdline) error("Failed to allocate space for tmp_cmdline"); memcpy(tmp_cmdline, args, len); @@ -363,7 +362,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, cmd_line |= boot_params->hdr.cmd_line_ptr; /* Calculate size of cmd_line. */ ptr = (char *)(unsigned long)cmd_line; - for (cmd_line_size = 0; ptr[cmd_line_size++]; ) + for (cmd_line_size = 0; ptr[cmd_line_size++];) ; mem_avoid[MEM_AVOID_CMDLINE].start = cmd_line; mem_avoid[MEM_AVOID_CMDLINE].size = cmd_line_size; diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a2b30ec69497..f81d50d7ceac 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -51,15 +51,19 @@ ENTRY(native_usergs_sysret64) END(native_usergs_sysret64) #endif /* CONFIG_PARAVIRT */ -.macro TRACE_IRQS_IRETQ +.macro TRACE_IRQS_FLAGS flags:req #ifdef CONFIG_TRACE_IRQFLAGS - bt $9, EFLAGS(%rsp) /* interrupts off? */ + bt $9, \flags /* interrupts off? */ jnc 1f TRACE_IRQS_ON 1: #endif .endm +.macro TRACE_IRQS_IRETQ + TRACE_IRQS_FLAGS EFLAGS(%rsp) +.endm + /* * When dynamic function tracer is enabled it will add a breakpoint * to all locations that it is about to modify, sync CPUs, update @@ -148,8 +152,6 @@ ENTRY(entry_SYSCALL_64) movq %rsp, PER_CPU_VAR(rsp_scratch) movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - TRACE_IRQS_OFF - /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ pushq PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */ @@ -170,6 +172,8 @@ GLOBAL(entry_SYSCALL_64_after_hwframe) sub $(6*8), %rsp /* pt_regs->bp, bx, r12-15 not saved */ UNWIND_HINT_REGS extra=0 + TRACE_IRQS_OFF + /* * If we need to do entry work or if we guess we'll need to do * exit work, go straight to the slow path. @@ -943,11 +947,13 @@ ENTRY(native_load_gs_index) FRAME_BEGIN pushfq DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI) + TRACE_IRQS_OFF SWAPGS .Lgs_change: movl %edi, %gs 2: ALTERNATIVE "", "mfence", X86_BUG_SWAPGS_FENCE SWAPGS + TRACE_IRQS_FLAGS (%rsp) popfq FRAME_END ret diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index c366c0adeb40..1943aebadede 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -130,10 +130,6 @@ $(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 -# This makes sure the $(obj) subdirectory exists even though vdso32/ -# is not a kbuild sub-make subdirectory. -override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ - targets += vdso32/vdso32.lds targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o targets += vdso32/vclock_gettime.o diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 43445da30cea..09c26a4f139c 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3734,6 +3734,19 @@ EVENT_ATTR_STR(cycles-t, cycles_t, "event=0x3c,in_tx=1"); EVENT_ATTR_STR(cycles-ct, cycles_ct, "event=0x3c,in_tx=1,in_tx_cp=1"); static struct attribute *hsw_events_attrs[] = { + EVENT_PTR(mem_ld_hsw), + EVENT_PTR(mem_st_hsw), + EVENT_PTR(td_slots_issued), + EVENT_PTR(td_slots_retired), + EVENT_PTR(td_fetch_bubbles), + EVENT_PTR(td_total_slots), + EVENT_PTR(td_total_slots_scale), + EVENT_PTR(td_recovery_bubbles), + EVENT_PTR(td_recovery_bubbles_scale), + NULL +}; + +static struct attribute *hsw_tsx_events_attrs[] = { EVENT_PTR(tx_start), EVENT_PTR(tx_commit), EVENT_PTR(tx_abort), @@ -3746,18 +3759,16 @@ static struct attribute *hsw_events_attrs[] = { EVENT_PTR(el_conflict), EVENT_PTR(cycles_t), EVENT_PTR(cycles_ct), - EVENT_PTR(mem_ld_hsw), - EVENT_PTR(mem_st_hsw), - EVENT_PTR(td_slots_issued), - EVENT_PTR(td_slots_retired), - EVENT_PTR(td_fetch_bubbles), - EVENT_PTR(td_total_slots), - EVENT_PTR(td_total_slots_scale), - EVENT_PTR(td_recovery_bubbles), - EVENT_PTR(td_recovery_bubbles_scale), NULL }; +static __init struct attribute **get_hsw_events_attrs(void) +{ + return boot_cpu_has(X86_FEATURE_RTM) ? + merge_attr(hsw_events_attrs, hsw_tsx_events_attrs) : + hsw_events_attrs; +} + static ssize_t freeze_on_smi_show(struct device *cdev, struct device_attribute *attr, char *buf) @@ -4186,7 +4197,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.cpu_events = get_hsw_events_attrs(); x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; @@ -4225,7 +4236,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; - x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.cpu_events = get_hsw_events_attrs(); x86_pmu.limit_period = bdw_limit_period; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; @@ -4283,7 +4294,7 @@ __init int intel_pmu_init(void) extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_attr = merge_attr(extra_attr, skl_format_attr); - x86_pmu.cpu_events = hsw_events_attrs; + x86_pmu.cpu_events = get_hsw_events_attrs(); intel_pmu_pebs_data_source_skl( boot_cpu_data.x86_model == INTEL_FAM6_SKYLAKE_X); pr_cont("Skylake events, "); diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d45e06346f14..7874c980d569 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -975,10 +975,10 @@ static void uncore_pci_remove(struct pci_dev *pdev) int i, phys_id, pkg; phys_id = uncore_pcibus_to_physid(pdev->bus); - pkg = topology_phys_to_logical_pkg(phys_id); box = pci_get_drvdata(pdev); if (!box) { + pkg = topology_phys_to_logical_pkg(phys_id); for (i = 0; i < UNCORE_EXTRA_PCI_DEV_MAX; i++) { if (uncore_extra_pci_dev[pkg].dev[i] == pdev) { uncore_extra_pci_dev[pkg].dev[i] = NULL; @@ -994,7 +994,7 @@ static void uncore_pci_remove(struct pci_dev *pdev) return; pci_set_drvdata(pdev, NULL); - pmu->boxes[pkg] = NULL; + pmu->boxes[box->pkgid] = NULL; if (atomic_dec_return(&pmu->activeboxes) == 0) uncore_pmu_unregister(pmu); uncore_box_exit(box); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 4364191e7c6b..414dc7e7c950 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -100,7 +100,7 @@ struct intel_uncore_extra_reg { struct intel_uncore_box { int pci_phys_id; - int pkgid; + int pkgid; /* Logical package ID */ int n_active; /* number of active events */ int n_events; int cpu; /* cpu to collect events */ diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 95cb19f4e06f..6d8044ab1060 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1057,7 +1057,7 @@ static void snbep_qpi_enable_event(struct intel_uncore_box *box, struct perf_eve if (reg1->idx != EXTRA_REG_NONE) { int idx = box->pmu->pmu_idx + SNBEP_PCI_QPI_PORT0_FILTER; - int pkg = topology_phys_to_logical_pkg(box->pci_phys_id); + int pkg = box->pkgid; struct pci_dev *filter_pdev = uncore_extra_pci_dev[pkg].dev[idx]; if (filter_pdev) { @@ -3035,11 +3035,19 @@ static struct intel_uncore_type *bdx_msr_uncores[] = { NULL, }; +/* Bit 7 'Use Occupancy' is not available for counter 0 on BDX */ +static struct event_constraint bdx_uncore_pcu_constraints[] = { + EVENT_CONSTRAINT(0x80, 0xe, 0x80), + EVENT_CONSTRAINT_END +}; + void bdx_uncore_cpu_init(void) { if (bdx_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores) bdx_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores; uncore_msr_uncores = bdx_msr_uncores; + + hswep_uncore_pcu.constraints = bdx_uncore_pcu_constraints; } static struct intel_uncore_type bdx_uncore_ha = { diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index a600a6cda9ec..2cbd75dd2fd3 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -210,7 +210,6 @@ typedef struct compat_siginfo { } compat_siginfo_t; #define COMPAT_OFF_T_MAX 0x7fffffff -#define COMPAT_LOFF_T_MAX 0x7fffffffffffffffL struct compat_ipc64_perm { compat_key_t key; diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 3a091cea36c5..0d157d2a1e2a 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -309,6 +309,7 @@ static inline int mmap_is_ia32(void) extern unsigned long task_size_32bit(void); extern unsigned long task_size_64bit(int full_addr_space); extern unsigned long get_mmap_base(int is_legacy); +extern bool mmap_address_hint_valid(unsigned long addr, unsigned long len); #ifdef CONFIG_X86_32 diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b80e46733909..2851077b6051 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -99,14 +99,6 @@ struct irq_alloc_info { void *dmar_data; }; #endif -#ifdef CONFIG_HT_IRQ - struct { - int ht_pos; - int ht_idx; - struct pci_dev *ht_dev; - void *ht_update; - }; -#endif #ifdef CONFIG_X86_UV struct { int uv_limit; diff --git a/arch/x86/include/asm/hypertransport.h b/arch/x86/include/asm/hypertransport.h deleted file mode 100644 index 5d55df352879..000000000000 --- a/arch/x86/include/asm/hypertransport.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_HYPERTRANSPORT_H -#define _ASM_X86_HYPERTRANSPORT_H - -/* - * Constants for x86 Hypertransport Interrupts. - */ - -#define HT_IRQ_LOW_BASE 0xf8000000 - -#define HT_IRQ_LOW_VECTOR_SHIFT 16 -#define HT_IRQ_LOW_VECTOR_MASK 0x00ff0000 -#define HT_IRQ_LOW_VECTOR(v) \ - (((v) << HT_IRQ_LOW_VECTOR_SHIFT) & HT_IRQ_LOW_VECTOR_MASK) - -#define HT_IRQ_LOW_DEST_ID_SHIFT 8 -#define HT_IRQ_LOW_DEST_ID_MASK 0x0000ff00 -#define HT_IRQ_LOW_DEST_ID(v) \ - (((v) << HT_IRQ_LOW_DEST_ID_SHIFT) & HT_IRQ_LOW_DEST_ID_MASK) - -#define HT_IRQ_LOW_DM_PHYSICAL 0x0000000 -#define HT_IRQ_LOW_DM_LOGICAL 0x0000040 - -#define HT_IRQ_LOW_RQEOI_EDGE 0x0000000 -#define HT_IRQ_LOW_RQEOI_LEVEL 0x0000020 - - -#define HT_IRQ_LOW_MT_FIXED 0x0000000 -#define HT_IRQ_LOW_MT_ARBITRATED 0x0000004 -#define HT_IRQ_LOW_MT_SMI 0x0000008 -#define HT_IRQ_LOW_MT_NMI 0x000000c -#define HT_IRQ_LOW_MT_INIT 0x0000010 -#define HT_IRQ_LOW_MT_STARTUP 0x0000014 -#define HT_IRQ_LOW_MT_EXTINT 0x0000018 -#define HT_IRQ_LOW_MT_LINT1 0x000008c -#define HT_IRQ_LOW_MT_LINT0 0x0000098 - -#define HT_IRQ_LOW_IRQ_MASKED 0x0000001 - - -#define HT_IRQ_HIGH_DEST_ID_SHIFT 0 -#define HT_IRQ_HIGH_DEST_ID_MASK 0x00ffffff -#define HT_IRQ_HIGH_DEST_ID(v) \ - ((((v) >> 8) << HT_IRQ_HIGH_DEST_ID_SHIFT) & HT_IRQ_HIGH_DEST_ID_MASK) - -#endif /* _ASM_X86_HYPERTRANSPORT_H */ diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index e1d3b4ce8a92..2b6ccf2c49f1 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -18,6 +18,6 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs); int insn_get_modrm_rm_off(struct insn *insn, struct pt_regs *regs); unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); -char insn_get_code_seg_params(struct pt_regs *regs); +int insn_get_code_seg_params(struct pt_regs *regs); #endif /* _ASM_X86_INSN_EVAL_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 93ae8aee1780..95e948627fd0 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -111,6 +111,10 @@ build_mmio_write(__writeq, "q", unsigned long, "r", ) #endif +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern int valid_phys_addr_range(phys_addr_t addr, size_t size); +extern int valid_mmap_phys_addr_range(unsigned long pfn, size_t size); + /** * virt_to_phys - map virtual addresses to physical * @address: address to remap diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index f695cc6b8e1f..139feef467f7 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -56,10 +56,4 @@ extern void arch_init_msi_domain(struct irq_domain *domain); static inline void arch_init_msi_domain(struct irq_domain *domain) { } #endif -#ifdef CONFIG_HT_IRQ -extern void arch_init_htirq_domain(struct irq_domain *domain); -#else -static inline void arch_init_htirq_domain(struct irq_domain *domain) { } -#endif - #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bfb99770c34..977de5fb968b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1161,7 +1161,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2, static inline int emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type) { - return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0); + return x86_emulate_instruction(vcpu, 0, + emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0); } void kvm_enable_efer_bits(u64); diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 09f9e1e00e3b..95e2dfd75521 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1061,7 +1061,7 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); -#define __HAVE_ARCH_PMD_WRITE +#define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { return pmd_flags(pmd) & _PAGE_RW; @@ -1088,6 +1088,12 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); } +#define pud_write pud_write +static inline int pud_write(pud_t pud) +{ + return pud_flags(pud) & _PAGE_RW; +} + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2db7cf720b04..cc16fa882e3e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -132,6 +132,7 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; + unsigned initialized : 1; } __randomize_layout; struct cpuid_regs { diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ef9e02e614d0..f4c463df8b08 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -342,13 +342,12 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi); + static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) { - int ioapic; - int pin; - struct mpc_intsrc mp_irq; - /* * Check bus_irq boundary. */ @@ -358,14 +357,6 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, } /* - * Convert 'gsi' to 'ioapic.pin'. - */ - ioapic = mp_find_ioapic(gsi); - if (ioapic < 0) - return; - pin = mp_find_ioapic_pin(ioapic, gsi); - - /* * TBD: This check is for faulty timer entries, where the override * erroneously sets the trigger to level, resulting in a HUGE * increase of timer interrupts! @@ -373,16 +364,8 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, if ((bus_irq == 0) && (trigger == 3)) trigger = 1; - mp_irq.type = MP_INTSRC; - mp_irq.irqtype = mp_INT; - mp_irq.irqflag = (trigger << 2) | polarity; - mp_irq.srcbus = MP_ISA_BUS; - mp_irq.srcbusirq = bus_irq; /* IRQ */ - mp_irq.dstapic = mpc_ioapic_id(ioapic); /* APIC ID */ - mp_irq.dstirq = pin; /* INTIN# */ - - mp_save_irq(&mp_irq); - + if (mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi) < 0) + return; /* * Reset default identity mapping if gsi is also an legacy IRQ, * otherwise there will be more than one entry with the same GSI @@ -429,6 +412,34 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, return 0; } +static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity, + u8 trigger, u32 gsi) +{ + struct mpc_intsrc mp_irq; + int ioapic, pin; + + /* Convert 'gsi' to 'ioapic.pin'(INTIN#) */ + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) { + pr_warn("Failed to find ioapic for gsi : %u\n", gsi); + return ioapic; + } + + pin = mp_find_ioapic_pin(ioapic, gsi); + + mp_irq.type = MP_INTSRC; + mp_irq.irqtype = mp_INT; + mp_irq.irqflag = (trigger << 2) | polarity; + mp_irq.srcbus = MP_ISA_BUS; + mp_irq.srcbusirq = bus_irq; + mp_irq.dstapic = mpc_ioapic_id(ioapic); + mp_irq.dstirq = pin; + + mp_save_irq(&mp_irq); + + return 0; +} + static int __init acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end) { @@ -473,7 +484,11 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, if (acpi_sci_flags & ACPI_MADT_POLARITY_MASK) polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; - mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + if (bus_irq < NR_IRQS_LEGACY) + mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + else + mp_register_ioapic_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index a9e08924927e..a6fcaf16cdbf 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile @@ -12,7 +12,6 @@ obj-y += hw_nmi.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_HT_IRQ) += htirq.o obj-$(CONFIG_SMP) += ipi.o ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c deleted file mode 100644 index b07075dce8b7..000000000000 --- a/arch/x86/kernel/apic/htirq.c +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Support Hypertransport IRQ - * - * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo - * Moved from arch/x86/kernel/apic/io_apic.c. - * Jiang Liu <jiang.liu@linux.intel.com> - * Add support of hierarchical irqdomain - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/mm.h> -#include <linux/interrupt.h> -#include <linux/init.h> -#include <linux/device.h> -#include <linux/pci.h> -#include <linux/htirq.h> -#include <asm/irqdomain.h> -#include <asm/hw_irq.h> -#include <asm/apic.h> -#include <asm/hypertransport.h> - -static struct irq_domain *htirq_domain; - -/* - * Hypertransport interrupt support - */ -static int -ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force) -{ - struct irq_data *parent = data->parent_data; - int ret; - - ret = parent->chip->irq_set_affinity(parent, mask, force); - if (ret >= 0) { - struct ht_irq_msg msg; - struct irq_cfg *cfg = irqd_cfg(data); - - fetch_ht_irq_msg(data->irq, &msg); - msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | - HT_IRQ_LOW_DEST_ID_MASK); - msg.address_lo |= HT_IRQ_LOW_VECTOR(cfg->vector) | - HT_IRQ_LOW_DEST_ID(cfg->dest_apicid); - msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK); - msg.address_hi |= HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); - write_ht_irq_msg(data->irq, &msg); - } - - return ret; -} - -static struct irq_chip ht_irq_chip = { - .name = "PCI-HT", - .irq_mask = mask_ht_irq, - .irq_unmask = unmask_ht_irq, - .irq_ack = irq_chip_ack_parent, - .irq_set_affinity = ht_set_affinity, - .irq_retrigger = irq_chip_retrigger_hierarchy, - .flags = IRQCHIP_SKIP_SET_WAKE, -}; - -static int htirq_domain_alloc(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs, void *arg) -{ - struct ht_irq_cfg *ht_cfg; - struct irq_alloc_info *info = arg; - struct pci_dev *dev; - irq_hw_number_t hwirq; - int ret; - - if (nr_irqs > 1 || !info) - return -EINVAL; - - dev = info->ht_dev; - hwirq = (info->ht_idx & 0xFF) | - PCI_DEVID(dev->bus->number, dev->devfn) << 8 | - (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 24; - if (irq_find_mapping(domain, hwirq) > 0) - return -EEXIST; - - ht_cfg = kmalloc(sizeof(*ht_cfg), GFP_KERNEL); - if (!ht_cfg) - return -ENOMEM; - - ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, info); - if (ret < 0) { - kfree(ht_cfg); - return ret; - } - - /* Initialize msg to a value that will never match the first write. */ - ht_cfg->msg.address_lo = 0xffffffff; - ht_cfg->msg.address_hi = 0xffffffff; - ht_cfg->dev = info->ht_dev; - ht_cfg->update = info->ht_update; - ht_cfg->pos = info->ht_pos; - ht_cfg->idx = 0x10 + (info->ht_idx * 2); - irq_domain_set_info(domain, virq, hwirq, &ht_irq_chip, ht_cfg, - handle_edge_irq, ht_cfg, "edge"); - - return 0; -} - -static void htirq_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs) -{ - struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq); - - BUG_ON(nr_irqs != 1); - kfree(irq_data->chip_data); - irq_domain_free_irqs_top(domain, virq, nr_irqs); -} - -static int htirq_domain_activate(struct irq_domain *domain, - struct irq_data *irq_data, bool early) -{ - struct ht_irq_msg msg; - struct irq_cfg *cfg = irqd_cfg(irq_data); - - msg.address_hi = HT_IRQ_HIGH_DEST_ID(cfg->dest_apicid); - msg.address_lo = - HT_IRQ_LOW_BASE | - HT_IRQ_LOW_DEST_ID(cfg->dest_apicid) | - HT_IRQ_LOW_VECTOR(cfg->vector) | - ((apic->irq_dest_mode == 0) ? - HT_IRQ_LOW_DM_PHYSICAL : - HT_IRQ_LOW_DM_LOGICAL) | - HT_IRQ_LOW_RQEOI_EDGE | - ((apic->irq_delivery_mode != dest_LowestPrio) ? - HT_IRQ_LOW_MT_FIXED : - HT_IRQ_LOW_MT_ARBITRATED) | - HT_IRQ_LOW_IRQ_MASKED; - write_ht_irq_msg(irq_data->irq, &msg); - return 0; -} - -static void htirq_domain_deactivate(struct irq_domain *domain, - struct irq_data *irq_data) -{ - struct ht_irq_msg msg; - - memset(&msg, 0, sizeof(msg)); - write_ht_irq_msg(irq_data->irq, &msg); -} - -static const struct irq_domain_ops htirq_domain_ops = { - .alloc = htirq_domain_alloc, - .free = htirq_domain_free, - .activate = htirq_domain_activate, - .deactivate = htirq_domain_deactivate, -}; - -void __init arch_init_htirq_domain(struct irq_domain *parent) -{ - struct fwnode_handle *fn; - - if (disable_apic) - return; - - fn = irq_domain_alloc_named_fwnode("PCI-HT"); - if (!fn) - goto warn; - - htirq_domain = irq_domain_create_tree(fn, &htirq_domain_ops, NULL); - irq_domain_free_fwnode(fn); - if (!htirq_domain) - goto warn; - - htirq_domain->parent = parent; - return; - -warn: - pr_warn("Failed to initialize irqdomain for HTIRQ.\n"); -} - -int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, - ht_irq_update_t *update) -{ - struct irq_alloc_info info; - - if (!htirq_domain) - return -ENOSYS; - - init_irq_alloc_info(&info, NULL); - info.ht_idx = idx; - info.ht_pos = pos; - info.ht_dev = dev; - info.ht_update = update; - - return irq_domain_alloc_irqs(htirq_domain, 1, dev_to_node(&dev->dev), - &info); -} - -void arch_teardown_ht_irq(unsigned int irq) -{ - irq_domain_free_irqs(irq, 1); -} diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 05c85e693a5d..6a823a25eaff 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -1,5 +1,5 @@ /* - * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc. + * Local APIC related interfaces to support IOAPIC, MSI, etc. * * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo * Moved from arch/x86/kernel/apic/io_apic.c. @@ -601,7 +601,7 @@ int __init arch_probe_nr_irqs(void) nr_irqs = NR_VECTORS * nr_cpu_ids; nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) +#if defined(CONFIG_PCI_MSI) /* * for MSI and HT dyn irq */ @@ -663,7 +663,6 @@ int __init arch_early_irq_init(void) irq_set_default_host(x86_vector_domain); arch_init_msi_domain(x86_vector_domain); - arch_init_htirq_domain(x86_vector_domain); BUG_ON(!alloc_cpumask_var(&vector_searchmask, GFP_KERNEL)); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 90cb82dbba57..570e8bb1f386 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -22,7 +22,7 @@ obj-y += common.o obj-y += rdrand.o obj-y += match.o obj-y += bugs.o -obj-$(CONFIG_CPU_FREQ) += aperfmperf.o +obj-y += aperfmperf.o obj-y += cpuid-deps.o obj-$(CONFIG_PROC_FS) += proc.o diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 957813e0180d..7eba34df54c3 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -14,6 +14,8 @@ #include <linux/percpu.h> #include <linux/smp.h> +#include "cpu.h" + struct aperfmperf_sample { unsigned int khz; ktime_t time; @@ -24,7 +26,7 @@ struct aperfmperf_sample { static DEFINE_PER_CPU(struct aperfmperf_sample, samples); #define APERFMPERF_CACHE_THRESHOLD_MS 10 -#define APERFMPERF_REFRESH_DELAY_MS 20 +#define APERFMPERF_REFRESH_DELAY_MS 10 #define APERFMPERF_STALE_THRESHOLD_MS 1000 /* @@ -38,8 +40,6 @@ static void aperfmperf_snapshot_khz(void *dummy) u64 aperf, aperf_delta; u64 mperf, mperf_delta; struct aperfmperf_sample *s = this_cpu_ptr(&samples); - ktime_t now = ktime_get(); - s64 time_delta = ktime_ms_delta(now, s->time); unsigned long flags; local_irq_save(flags); @@ -57,38 +57,68 @@ static void aperfmperf_snapshot_khz(void *dummy) if (mperf_delta == 0) return; - s->time = now; + s->time = ktime_get(); s->aperf = aperf; s->mperf = mperf; - - /* If the previous iteration was too long ago, discard it. */ - if (time_delta > APERFMPERF_STALE_THRESHOLD_MS) - s->khz = 0; - else - s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); + s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta); } -unsigned int arch_freq_get_on_cpu(int cpu) +static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait) { - s64 time_delta; - unsigned int khz; + s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu)); + + /* Don't bother re-computing within the cache threshold time. */ + if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS) + return true; + + smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait); + + /* Return false if the previous iteration was too long ago. */ + return time_delta <= APERFMPERF_STALE_THRESHOLD_MS; +} +unsigned int aperfmperf_get_khz(int cpu) +{ if (!cpu_khz) return 0; if (!static_cpu_has(X86_FEATURE_APERFMPERF)) return 0; - /* Don't bother re-computing within the cache threshold time. */ - time_delta = ktime_ms_delta(ktime_get(), per_cpu(samples.time, cpu)); - khz = per_cpu(samples.khz, cpu); - if (khz && time_delta < APERFMPERF_CACHE_THRESHOLD_MS) - return khz; + aperfmperf_snapshot_cpu(cpu, ktime_get(), true); + return per_cpu(samples.khz, cpu); +} - smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); - khz = per_cpu(samples.khz, cpu); - if (khz) - return khz; +void arch_freq_prepare_all(void) +{ + ktime_t now = ktime_get(); + bool wait = false; + int cpu; + + if (!cpu_khz) + return; + + if (!static_cpu_has(X86_FEATURE_APERFMPERF)) + return; + + for_each_online_cpu(cpu) + if (!aperfmperf_snapshot_cpu(cpu, now, false)) + wait = true; + + if (wait) + msleep(APERFMPERF_REFRESH_DELAY_MS); +} + +unsigned int arch_freq_get_on_cpu(int cpu) +{ + if (!cpu_khz) + return 0; + + if (!static_cpu_has(X86_FEATURE_APERFMPERF)) + return 0; + + if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) + return per_cpu(samples.khz, cpu); msleep(APERFMPERF_REFRESH_DELAY_MS); smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 13ae9e5eec2f..fa998ca8aa5a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -341,6 +341,8 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) cr4_set_bits(X86_CR4_UMIP); + pr_info("x86/cpu: Activated the Intel User Mode Instruction Prevention (UMIP) CPU feature\n"); + return; out: diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index f52a370b6c00..e806b11a99af 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -47,4 +47,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); + +unsigned int aperfmperf_get_khz(int cpu); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 6b7e17bf0b71..e7ecedafa1c8 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c @@ -5,6 +5,8 @@ #include <linux/seq_file.h> #include <linux/cpufreq.h> +#include "cpu.h" + /* * Get CPU information for use by the procfs. */ @@ -78,9 +80,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "microcode\t: 0x%x\n", c->microcode); if (cpu_has(c, X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get(cpu); + unsigned int freq = aperfmperf_get_khz(cpu); if (!freq) + freq = cpufreq_quick_get(cpu); + if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", freq / 1000, (freq % 1000)); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 410c5dadcee3..3a4b12809ab5 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -431,6 +431,7 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) } static unsigned long mpf_base; +static bool mpf_found; static unsigned long __init get_mpc_size(unsigned long physptr) { @@ -504,7 +505,7 @@ void __init default_get_smp_config(unsigned int early) if (!smp_found_config) return; - if (!mpf_base) + if (!mpf_found) return; if (acpi_lapic && early) @@ -593,6 +594,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) smp_found_config = 1; #endif mpf_base = base; + mpf_found = true; pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", base, base + sizeof(*mpf) - 1, mpf); @@ -858,7 +860,7 @@ static int __init update_mp_table(void) if (!enable_update_mptable) return 0; - if (!mpf_base) + if (!mpf_found) return 0; mpf = early_memremap(mpf_base, sizeof(*mpf)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5f59e6bee123..3d01df7d7cf6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -101,9 +101,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info); EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ -static int *physical_to_logical_pkg __read_mostly; -static unsigned long *physical_package_map __read_mostly;; -static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); static unsigned int logical_packages __read_mostly; @@ -281,108 +278,48 @@ static void notrace start_secondary(void *unused) } /** + * topology_phys_to_logical_pkg - Map a physical package id to a logical + * + * Returns logical package id or -1 if not found + */ +int topology_phys_to_logical_pkg(unsigned int phys_pkg) +{ + int cpu; + + for_each_possible_cpu(cpu) { + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->initialized && c->phys_proc_id == phys_pkg) + return c->logical_proc_id; + } + return -1; +} +EXPORT_SYMBOL(topology_phys_to_logical_pkg); + +/** * topology_update_package_map - Update the physical to logical package map * @pkg: The physical package id as retrieved via CPUID * @cpu: The cpu for which this is updated */ int topology_update_package_map(unsigned int pkg, unsigned int cpu) { - unsigned int new; + int new; - /* Called from early boot ? */ - if (!physical_package_map) - return 0; - - if (pkg >= max_physical_pkg_id) - return -EINVAL; - - /* Set the logical package id */ - if (test_and_set_bit(pkg, physical_package_map)) + /* Already available somewhere? */ + new = topology_phys_to_logical_pkg(pkg); + if (new >= 0) goto found; - if (logical_packages >= __max_logical_packages) { - pr_warn("Package %u of CPU %u exceeds BIOS package data %u.\n", - logical_packages, cpu, __max_logical_packages); - return -ENOSPC; - } - new = logical_packages++; if (new != pkg) { pr_info("CPU %u Converting physical %u to logical package %u\n", cpu, pkg, new); } - physical_to_logical_pkg[pkg] = new; - found: - cpu_data(cpu).logical_proc_id = physical_to_logical_pkg[pkg]; + cpu_data(cpu).logical_proc_id = new; return 0; } -/** - * topology_phys_to_logical_pkg - Map a physical package id to a logical - * - * Returns logical package id or -1 if not found - */ -int topology_phys_to_logical_pkg(unsigned int phys_pkg) -{ - if (phys_pkg >= max_physical_pkg_id) - return -1; - return physical_to_logical_pkg[phys_pkg]; -} -EXPORT_SYMBOL(topology_phys_to_logical_pkg); - -static void __init smp_init_package_map(struct cpuinfo_x86 *c, unsigned int cpu) -{ - unsigned int ncpus; - size_t size; - - /* - * Today neither Intel nor AMD support heterogenous systems. That - * might change in the future.... - * - * While ideally we'd want '* smp_num_siblings' in the below @ncpus - * computation, this won't actually work since some Intel BIOSes - * report inconsistent HT data when they disable HT. - * - * In particular, they reduce the APIC-IDs to only include the cores, - * but leave the CPUID topology to say there are (2) siblings. - * This means we don't know how many threads there will be until - * after the APIC enumeration. - * - * By not including this we'll sometimes over-estimate the number of - * logical packages by the amount of !present siblings, but this is - * still better than MAX_LOCAL_APIC. - * - * We use total_cpus not nr_cpu_ids because nr_cpu_ids can be limited - * on the command line leading to a similar issue as the HT disable - * problem because the hyperthreads are usually enumerated after the - * primary cores. - */ - ncpus = boot_cpu_data.x86_max_cores; - if (!ncpus) { - pr_warn("x86_max_cores == zero !?!?"); - ncpus = 1; - } - - __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); - logical_packages = 0; - - /* - * Possibly larger than what we need as the number of apic ids per - * package can be smaller than the actual used apic ids. - */ - max_physical_pkg_id = DIV_ROUND_UP(MAX_LOCAL_APIC, ncpus); - size = max_physical_pkg_id * sizeof(unsigned int); - physical_to_logical_pkg = kmalloc(size, GFP_KERNEL); - memset(physical_to_logical_pkg, 0xff, size); - size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); - physical_package_map = kzalloc(size, GFP_KERNEL); - - pr_info("Max logical packages: %u\n", __max_logical_packages); - - topology_update_package_map(c->phys_proc_id, cpu); -} - void __init smp_store_boot_cpu_info(void) { int id = 0; /* CPU 0 */ @@ -390,7 +327,8 @@ void __init smp_store_boot_cpu_info(void) *c = boot_cpu_data; c->cpu_index = id; - smp_init_package_map(c, id); + topology_update_package_map(c->phys_proc_id, id); + c->initialized = true; } /* @@ -401,13 +339,16 @@ void smp_store_cpu_info(int id) { struct cpuinfo_x86 *c = &cpu_data(id); - *c = boot_cpu_data; + /* Copy boot_cpu_data only on the first bringup */ + if (!c->initialized) + *c = boot_cpu_data; c->cpu_index = id; /* * During boot time, CPU0 has this setup already. Save the info when * bringing up AP or offlined CPU0. */ identify_secondary_cpu(c); + c->initialized = true; } static bool @@ -1356,7 +1297,16 @@ void __init native_smp_prepare_boot_cpu(void) void __init native_smp_cpus_done(unsigned int max_cpus) { + int ncpus; + pr_debug("Boot done\n"); + /* + * Today neither Intel nor AMD support heterogenous systems so + * extrapolate the boot cpu's data to all packages. + */ + ncpus = cpu_data(0).booted_cores * smp_num_siblings; + __max_logical_packages = DIV_ROUND_UP(nr_cpu_ids, ncpus); + pr_info("Max logical packages: %u\n", __max_logical_packages); if (x86_has_numa_in_package) set_sched_topology(x86_numa_in_package_topology); diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index a63fe77b3217..676774b9bb8d 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -188,6 +188,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (len > TASK_SIZE) return -ENOMEM; + /* No address checking. See comment at mmap_address_hint_valid() */ if (flags & MAP_FIXED) return addr; @@ -197,12 +198,15 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, /* requesting a specific address */ if (addr) { - addr = PAGE_ALIGN(addr); + addr &= PAGE_MASK; + if (!mmap_address_hint_valid(addr, len)) + goto get_unmapped_area; + vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) + if (!vma || addr + len <= vm_start_gap(vma)) return addr; } +get_unmapped_area: info.flags = VM_UNMAPPED_AREA_TOPDOWN; info.length = len; diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index 6ba82be68cff..f44ce0fb3583 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -78,7 +78,60 @@ #define UMIP_INST_SGDT 0 /* 0F 01 /0 */ #define UMIP_INST_SIDT 1 /* 0F 01 /1 */ -#define UMIP_INST_SMSW 3 /* 0F 01 /4 */ +#define UMIP_INST_SMSW 2 /* 0F 01 /4 */ +#define UMIP_INST_SLDT 3 /* 0F 00 /0 */ +#define UMIP_INST_STR 4 /* 0F 00 /1 */ + +const char * const umip_insns[5] = { + [UMIP_INST_SGDT] = "SGDT", + [UMIP_INST_SIDT] = "SIDT", + [UMIP_INST_SMSW] = "SMSW", + [UMIP_INST_SLDT] = "SLDT", + [UMIP_INST_STR] = "STR", +}; + +#define umip_pr_err(regs, fmt, ...) \ + umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__) +#define umip_pr_warning(regs, fmt, ...) \ + umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__) + +/** + * umip_printk() - Print a rate-limited message + * @regs: Register set with the context in which the warning is printed + * @log_level: Kernel log level to print the message + * @fmt: The text string to print + * + * Print the text contained in @fmt. The print rate is limited to bursts of 5 + * messages every two minutes. The purpose of this customized version of + * printk() is to print messages when user space processes use any of the + * UMIP-protected instructions. Thus, the printed text is prepended with the + * task name and process ID number of the current task as well as the + * instruction and stack pointers in @regs as seen when entering kernel mode. + * + * Returns: + * + * None. + */ +static __printf(3, 4) +void umip_printk(const struct pt_regs *regs, const char *log_level, + const char *fmt, ...) +{ + /* Bursts of 5 messages every two minutes */ + static DEFINE_RATELIMIT_STATE(ratelimit, 2 * 60 * HZ, 5); + struct task_struct *tsk = current; + struct va_format vaf; + va_list args; + + if (!__ratelimit(&ratelimit)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk("%s" pr_fmt("%s[%d] ip:%lx sp:%lx: %pV"), log_level, tsk->comm, + task_pid_nr(tsk), regs->ip, regs->sp, &vaf); + va_end(args); +} /** * identify_insn() - Identify a UMIP-protected instruction @@ -118,10 +171,16 @@ static int identify_insn(struct insn *insn) default: return -EINVAL; } + } else if (insn->opcode.bytes[1] == 0x0) { + if (X86_MODRM_REG(insn->modrm.value) == 0) + return UMIP_INST_SLDT; + else if (X86_MODRM_REG(insn->modrm.value) == 1) + return UMIP_INST_STR; + else + return -EINVAL; + } else { + return -EINVAL; } - - /* SLDT AND STR are not emulated */ - return -EINVAL; } /** @@ -228,10 +287,8 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) if (!(show_unhandled_signals && unhandled_signal(tsk, SIGSEGV))) return; - pr_err_ratelimited("%s[%d] umip emulation segfault ip:%lx sp:%lx error:%x in %lx\n", - tsk->comm, task_pid_nr(tsk), regs->ip, - regs->sp, X86_PF_USER | X86_PF_WRITE, - regs->ip); + umip_pr_err(regs, "segfault in emulation. error%x\n", + X86_PF_USER | X86_PF_WRITE); } /** @@ -262,15 +319,11 @@ bool fixup_umip_exception(struct pt_regs *regs) unsigned char buf[MAX_INSN_SIZE]; void __user *uaddr; struct insn insn; - char seg_defs; + int seg_defs; if (!regs) return false; - /* Do not emulate 64-bit processes. */ - if (user_64bit_mode(regs)) - return false; - /* * If not in user-space long mode, a custom code segment could be in * use. This is true in protected mode (if the process defined a local @@ -322,6 +375,15 @@ bool fixup_umip_exception(struct pt_regs *regs) if (umip_inst < 0) return false; + umip_pr_warning(regs, "%s instruction cannot be used by applications.\n", + umip_insns[umip_inst]); + + /* Do not emulate SLDT, STR or user long mode processes. */ + if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT || user_64bit_mode(regs)) + return false; + + umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n"); + if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size)) return false; diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index cdc70a3a6583..c2cea6651279 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -44,7 +44,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_8086_0001_EDX] = {0x80860001, 0, CPUID_EDX}, [CPUID_1_ECX] = { 1, 0, CPUID_ECX}, [CPUID_C000_0001_EDX] = {0xc0000001, 0, CPUID_EDX}, - [CPUID_8000_0001_ECX] = {0xc0000001, 0, CPUID_ECX}, + [CPUID_8000_0001_ECX] = {0x80000001, 0, CPUID_ECX}, [CPUID_7_0_EBX] = { 7, 0, CPUID_EBX}, [CPUID_D_1_EAX] = { 0xd, 1, CPUID_EAX}, [CPUID_F_0_EDX] = { 0xf, 0, CPUID_EDX}, diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 8079d141792a..e7d04d0c8008 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -4014,6 +4014,26 @@ static int em_fxsave(struct x86_emulate_ctxt *ctxt) fxstate_size(ctxt)); } +/* + * FXRSTOR might restore XMM registers not provided by the guest. Fill + * in the host registers (via FXSAVE) instead, so they won't be modified. + * (preemption has to stay disabled until FXRSTOR). + * + * Use noinline to keep the stack for other functions called by callers small. + */ +static noinline int fxregs_fixup(struct fxregs_state *fx_state, + const size_t used_size) +{ + struct fxregs_state fx_tmp; + int rc; + + rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_tmp)); + memcpy((void *)fx_state + used_size, (void *)&fx_tmp + used_size, + __fxstate_size(16) - used_size); + + return rc; +} + static int em_fxrstor(struct x86_emulate_ctxt *ctxt) { struct fxregs_state fx_state; @@ -4024,19 +4044,19 @@ static int em_fxrstor(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; + size = fxstate_size(ctxt); + rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); + if (rc != X86EMUL_CONTINUE) + return rc; + ctxt->ops->get_fpu(ctxt); - size = fxstate_size(ctxt); if (size < __fxstate_size(16)) { - rc = asm_safe("fxsave %[fx]", , [fx] "+m"(fx_state)); + rc = fxregs_fixup(&fx_state, size); if (rc != X86EMUL_CONTINUE) goto out; } - rc = segmented_read_std(ctxt, ctxt->memop.addr.mem, &fx_state, size); - if (rc != X86EMUL_CONTINUE) - goto out; - if (fx_state.mxcsr >> 16) { rc = emulate_gp(ctxt, 0); goto out; @@ -5000,6 +5020,8 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) bool op_prefix = false; bool has_seg_override = false; struct opcode opcode; + u16 dummy; + struct desc_struct desc; ctxt->memop.type = OP_NONE; ctxt->memopp = NULL; @@ -5018,6 +5040,11 @@ int x86_decode_insn(struct x86_emulate_ctxt *ctxt, void *insn, int insn_len) switch (mode) { case X86EMUL_MODE_REAL: case X86EMUL_MODE_VM86: + def_op_bytes = def_ad_bytes = 2; + ctxt->ops->get_segment(ctxt, &dummy, &desc, NULL, VCPU_SREG_CS); + if (desc.d) + def_op_bytes = def_ad_bytes = 4; + break; case X86EMUL_MODE_PROT16: def_op_bytes = def_ad_bytes = 2; break; diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index bdff437acbcb..4e822ad363f3 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -209,12 +209,12 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, old_irr = ioapic->irr; ioapic->irr |= mask; - if (edge) + if (edge) { ioapic->irr_delivered &= ~mask; - if ((edge && old_irr == ioapic->irr) || - (!edge && entry.fields.remote_irr)) { - ret = 0; - goto out; + if (old_irr == ioapic->irr) { + ret = 0; + goto out; + } } ret = ioapic_service(ioapic, irq, line_status); @@ -257,8 +257,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) index == RTC_GSI) { if (kvm_apic_match_dest(vcpu, NULL, 0, e->fields.dest_id, e->fields.dest_mode) || - (e->fields.trig_mode == IOAPIC_EDGE_TRIG && - kvm_apic_pending_eoi(vcpu, e->fields.vector))) + kvm_apic_pending_eoi(vcpu, e->fields.vector)) __set_bit(e->fields.vector, ioapic_handled_vectors); } @@ -277,6 +276,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) { unsigned index; bool mask_before, mask_after; + int old_remote_irr, old_delivery_status; union kvm_ioapic_redirect_entry *e; switch (ioapic->ioregsel) { @@ -299,14 +299,28 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val) return; e = &ioapic->redirtbl[index]; mask_before = e->fields.mask; + /* Preserve read-only fields */ + old_remote_irr = e->fields.remote_irr; + old_delivery_status = e->fields.delivery_status; if (ioapic->ioregsel & 1) { e->bits &= 0xffffffff; e->bits |= (u64) val << 32; } else { e->bits &= ~0xffffffffULL; e->bits |= (u32) val; - e->fields.remote_irr = 0; } + e->fields.remote_irr = old_remote_irr; + e->fields.delivery_status = old_delivery_status; + + /* + * Some OSes (Linux, Xen) assume that Remote IRR bit will + * be cleared by IOAPIC hardware when the entry is configured + * as edge-triggered. This behavior is used to simulate an + * explicit EOI on IOAPICs that don't have the EOI register. + */ + if (e->fields.trig_mode == IOAPIC_EDGE_TRIG) + e->fields.remote_irr = 0; + mask_after = e->fields.mask; if (mask_before != mask_after) kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after); @@ -324,7 +338,9 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) struct kvm_lapic_irq irqe; int ret; - if (entry->fields.mask) + if (entry->fields.mask || + (entry->fields.trig_mode == IOAPIC_LEVEL_TRIG && + entry->fields.remote_irr)) return -1; ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x " diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 943acbf00c69..e2c1fb8d35ce 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -266,9 +266,14 @@ static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id) recalculate_apic_map(apic->vcpu->kvm); } +static inline u32 kvm_apic_calc_x2apic_ldr(u32 id) +{ + return ((id >> 4) << 16) | (1 << (id & 0xf)); +} + static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { - u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + u32 ldr = kvm_apic_calc_x2apic_ldr(id); WARN_ON_ONCE(id != apic->vcpu->vcpu_id); @@ -2245,6 +2250,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, { if (apic_x2apic_mode(vcpu->arch.apic)) { u32 *id = (u32 *)(s->regs + APIC_ID); + u32 *ldr = (u32 *)(s->regs + APIC_LDR); if (vcpu->kvm->arch.x2apic_format) { if (*id != vcpu->vcpu_id) @@ -2255,6 +2261,10 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, else *id <<= 24; } + + /* In x2APIC mode, the LDR is fixed and based on the id */ + if (set) + *ldr = kvm_apic_calc_x2apic_ldr(*id); } return 0; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b71daed3cca2..eb714f1cdf7e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -361,6 +361,7 @@ static void recalc_intercepts(struct vcpu_svm *svm) { struct vmcb_control_area *c, *h; struct nested_state *g; + u32 h_intercept_exceptions; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -371,9 +372,14 @@ static void recalc_intercepts(struct vcpu_svm *svm) h = &svm->nested.hsave->control; g = &svm->nested; + /* No need to intercept #UD if L1 doesn't intercept it */ + h_intercept_exceptions = + h->intercept_exceptions & ~(1U << UD_VECTOR); + c->intercept_cr = h->intercept_cr | g->intercept_cr; c->intercept_dr = h->intercept_dr | g->intercept_dr; - c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions; + c->intercept_exceptions = + h_intercept_exceptions | g->intercept_exceptions; c->intercept = h->intercept | g->intercept; } @@ -2196,7 +2202,10 @@ static int ud_interception(struct vcpu_svm *svm) { int er; + WARN_ON_ONCE(is_guest_mode(&svm->vcpu)); er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; @@ -3671,6 +3680,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) u32 ecx = msr->index; u64 data = msr->data; switch (ecx) { + case MSR_IA32_CR_PAT: + if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) + return 1; + vcpu->arch.pat = data; + svm->vmcb->save.g_pat = data; + mark_dirty(svm->vmcb, VMCB_NPT); + break; case MSR_IA32_TSC: kvm_write_tsc(vcpu, msr); break; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7c3522a989d0..4704aaf6d19e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id); static bool __read_mostly enable_vpid = 1; module_param_named(vpid, enable_vpid, bool, 0444); +static bool __read_mostly enable_vnmi = 1; +module_param_named(vnmi, enable_vnmi, bool, S_IRUGO); + static bool __read_mostly flexpriority_enabled = 1; module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); @@ -202,6 +205,10 @@ struct loaded_vmcs { bool nmi_known_unmasked; unsigned long vmcs_host_cr3; /* May not match real cr3 */ unsigned long vmcs_host_cr4; /* May not match real cr4 */ + /* Support for vnmi-less CPUs */ + int soft_vnmi_blocked; + ktime_t entry_time; + s64 vnmi_blocked_time; struct list_head loaded_vmcss_on_cpu_link; }; @@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void) SECONDARY_EXEC_ENABLE_INVPCID; } +static inline bool cpu_has_virtual_nmis(void) +{ + return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; +} + static inline bool cpu_has_vmx_wbinvd_exit(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit) (vmcs12->secondary_vm_exec_control & bit); } -static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) -{ - return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; -} - static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) { return vmcs12->pin_based_vm_exec_control & @@ -1880,7 +1887,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) { u32 eb; - eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | + eb = (1u << PF_VECTOR) | (1u << MC_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == @@ -1898,6 +1905,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; + else + eb |= 1u << UD_VECTOR; vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -3712,9 +3721,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) &_vmexit_control) < 0) return -EIO; - min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING | - PIN_BASED_VIRTUAL_NMIS; - opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER; + min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; + opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR | + PIN_BASED_VMX_PREEMPTION_TIMER; if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, &_pin_based_exec_control) < 0) return -EIO; @@ -5232,6 +5241,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) if (!kvm_vcpu_apicv_active(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; + + if (!enable_vnmi) + pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS; + /* Enable the preemption timer dynamically */ pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER; return pin_based_exec_ctrl; @@ -5589,7 +5602,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write64(GUEST_IA32_DEBUGCTL, 0); } - vmcs_writel(GUEST_RFLAGS, 0x02); + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); kvm_rip_write(vcpu, 0xfff0); vmcs_writel(GUEST_GDTR_BASE, 0); @@ -5666,7 +5679,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) static void enable_nmi_window(struct kvm_vcpu *vcpu) { - if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { + if (!enable_vnmi || + vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { enable_irq_window(vcpu); return; } @@ -5706,6 +5720,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (!enable_vnmi) { + /* + * Tracking the NMI-blocked state in software is built upon + * finding the next open IRQ window. This, in turn, depends on + * well-behaving guests: They have to keep IRQs disabled at + * least as long as the NMI handler runs. Otherwise we may + * cause NMI nesting, maybe breaking the guest. But as this is + * highly unlikely, we can live with the residual risk. + */ + vmx->loaded_vmcs->soft_vnmi_blocked = 1; + vmx->loaded_vmcs->vnmi_blocked_time = 0; + } + ++vcpu->stat.nmi_injections; vmx->loaded_vmcs->nmi_known_unmasked = false; @@ -5724,6 +5751,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); bool masked; + if (!enable_vnmi) + return vmx->loaded_vmcs->soft_vnmi_blocked; if (vmx->loaded_vmcs->nmi_known_unmasked) return false; masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; @@ -5735,13 +5764,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) { struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx->loaded_vmcs->nmi_known_unmasked = !masked; - if (masked) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); + if (!enable_vnmi) { + if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) { + vmx->loaded_vmcs->soft_vnmi_blocked = masked; + vmx->loaded_vmcs->vnmi_blocked_time = 0; + } + } else { + vmx->loaded_vmcs->nmi_known_unmasked = !masked; + if (masked) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + } } static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) @@ -5749,6 +5785,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) if (to_vmx(vcpu)->nested.nested_run_pending) return 0; + if (!enable_vnmi && + to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) + return 0; + return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | GUEST_INTR_STATE_NMI)); @@ -5877,11 +5917,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) return 1; /* already handled by vmx_vcpu_run() */ if (is_invalid_opcode(intr_info)) { - if (is_guest_mode(vcpu)) { - kvm_queue_exception(vcpu, UD_VECTOR); - return 1; - } + WARN_ON_ONCE(is_guest_mode(vcpu)); er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); + if (er == EMULATE_USER_EXIT) + return 0; if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); return 1; @@ -6476,6 +6515,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) * AAK134, BY25. */ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + enable_vnmi && (exit_qualification & INTR_INFO_UNBLOCK_NMI)) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -6535,6 +6575,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { + WARN_ON_ONCE(!enable_vnmi); vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_VIRTUAL_NMI_PENDING); ++vcpu->stat.nmi_window_exits; @@ -6562,7 +6603,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_EVENT, vcpu)) return 1; - err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE); + err = emulate_instruction(vcpu, 0); if (err == EMULATE_USER_EXIT) { ++vcpu->stat.mmio_exits; @@ -6758,6 +6799,9 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_flexpriority()) flexpriority_enabled = 0; + if (!cpu_has_virtual_nmis()) + enable_vnmi = 0; + /* * set_apic_access_page_addr() is used to reload apic access * page upon invalidation. No need to do anything if not @@ -6962,7 +7006,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) } /* Create a new VMCS */ - item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL); + item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL); if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); @@ -7371,10 +7415,11 @@ static inline void nested_release_vmcs12(struct vcpu_vmx *vmx) */ static void free_nested(struct vcpu_vmx *vmx) { - if (!vmx->nested.vmxon) + if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon) return; vmx->nested.vmxon = false; + vmx->nested.smm.vmxon = false; free_vpid(vmx->nested.vpid02); vmx->nested.posted_intr_nv = -1; vmx->nested.current_vmptr = -1ull; @@ -7979,6 +8024,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) * "blocked by NMI" bit has to be set before next VM entry. */ if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && + enable_vnmi && (exit_qualification & INTR_INFO_UNBLOCK_NMI)) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -8823,6 +8869,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) return 0; } + if (unlikely(!enable_vnmi && + vmx->loaded_vmcs->soft_vnmi_blocked)) { + if (vmx_interrupt_allowed(vcpu)) { + vmx->loaded_vmcs->soft_vnmi_blocked = 0; + } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && + vcpu->arch.nmi_pending) { + /* + * This CPU don't support us in finding the end of an + * NMI-blocked window if the guest runs with IRQs + * disabled. So we pull the trigger after 1 s of + * futile waiting, but inform the user about this. + */ + printk(KERN_WARNING "%s: Breaking out of NMI-blocked " + "state on VCPU %d after 1 s timeout\n", + __func__, vcpu->vcpu_id); + vmx->loaded_vmcs->soft_vnmi_blocked = 0; + } + } + if (exit_reason < kvm_vmx_max_exit_handlers && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); @@ -9105,33 +9170,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - if (vmx->loaded_vmcs->nmi_known_unmasked) - return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; - vector = exit_intr_info & INTR_INFO_VECTOR_MASK; - /* - * SDM 3: 27.7.1.2 (September 2008) - * Re-set bit "block by NMI" before VM entry if vmexit caused by - * a guest IRET fault. - * SDM 3: 23.2.2 (September 2008) - * Bit 12 is undefined in any of the following cases: - * If the VM exit sets the valid bit in the IDT-vectoring - * information field. - * If the VM exit is due to a double fault. - */ - if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && - vector != DF_VECTOR && !idtv_info_valid) - vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, - GUEST_INTR_STATE_NMI); - else - vmx->loaded_vmcs->nmi_known_unmasked = - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) - & GUEST_INTR_STATE_NMI); + if (enable_vnmi) { + if (vmx->loaded_vmcs->nmi_known_unmasked) + return; + /* + * Can't use vmx->exit_intr_info since we're not sure what + * the exit reason is. + */ + exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; + vector = exit_intr_info & INTR_INFO_VECTOR_MASK; + /* + * SDM 3: 27.7.1.2 (September 2008) + * Re-set bit "block by NMI" before VM entry if vmexit caused by + * a guest IRET fault. + * SDM 3: 23.2.2 (September 2008) + * Bit 12 is undefined in any of the following cases: + * If the VM exit sets the valid bit in the IDT-vectoring + * information field. + * If the VM exit is due to a double fault. + */ + if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && + vector != DF_VECTOR && !idtv_info_valid) + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, + GUEST_INTR_STATE_NMI); + else + vmx->loaded_vmcs->nmi_known_unmasked = + !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) + & GUEST_INTR_STATE_NMI); + } else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked)) + vmx->loaded_vmcs->vnmi_blocked_time += + ktime_to_ns(ktime_sub(ktime_get(), + vmx->loaded_vmcs->entry_time)); } static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu, @@ -9248,6 +9318,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long debugctlmsr, cr3, cr4; + /* Record the guest's net vcpu time for enforced NMI injections. */ + if (unlikely(!enable_vnmi && + vmx->loaded_vmcs->soft_vnmi_blocked)) + vmx->loaded_vmcs->entry_time = ktime_get(); + /* Don't enter VMX if guest state is invalid, let the exit handler start emulation until we arrive back to a valid state */ if (vmx->emulation_required) @@ -9727,8 +9802,7 @@ static void nested_vmx_cr_fixed1_bits_update(struct kvm_vcpu *vcpu) cr4_fixed1_update(X86_CR4_SMEP, ebx, bit(X86_FEATURE_SMEP)); cr4_fixed1_update(X86_CR4_SMAP, ebx, bit(X86_FEATURE_SMAP)); cr4_fixed1_update(X86_CR4_PKE, ecx, bit(X86_FEATURE_PKU)); - /* TODO: Use X86_CR4_UMIP and X86_FEATURE_UMIP macros */ - cr4_fixed1_update(bit(11), ecx, bit(2)); + cr4_fixed1_update(X86_CR4_UMIP, ecx, bit(X86_FEATURE_UMIP)); #undef cr4_fixed1_update } @@ -10802,6 +10876,11 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 1; } + if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) && + (is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu) || + (vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))) + return 1; + return 0; } @@ -11026,13 +11105,12 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long exit_qual; - - if (kvm_event_needs_reinjection(vcpu)) - return -EBUSY; + bool block_nested_events = + vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu); if (vcpu->arch.exception.pending && nested_vmx_check_exception(vcpu, &exit_qual)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); vcpu->arch.exception.pending = false; @@ -11041,14 +11119,14 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | @@ -11064,7 +11142,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && nested_exit_on_intr(vcpu)) { - if (vmx->nested.nested_run_pending) + if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; @@ -11251,6 +11329,24 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } +static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12) +{ + u32 entry_failure_code; + + nested_ept_uninit_mmu_context(vcpu); + + /* + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; +} + /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -11264,7 +11360,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -11291,17 +11386,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; + load_vmcs12_mmu_host_state(vcpu, vmcs12); if (enable_vpid) { /* @@ -11531,6 +11616,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, * accordingly. */ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); + + load_vmcs12_mmu_host_state(vcpu, vmcs12); + /* * The emulated instruction was already skipped in * nested_vmx_run, but the updated RIP was never diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 34c85aa2e2d1..eee8e7faf1af 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -107,6 +107,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool __read_mostly ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +static bool __read_mostly report_ignored_msrs = true; +module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); + unsigned int min_timer_period_us = 500; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); @@ -1795,10 +1798,13 @@ u64 get_kvmclock_ns(struct kvm *kvm) /* both __this_cpu_read() and rdtsc() should be on the same cpu */ get_cpu(); - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, - &hv_clock.tsc_shift, - &hv_clock.tsc_to_system_mul); - ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + if (__this_cpu_read(cpu_tsc_khz)) { + kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + &hv_clock.tsc_shift, + &hv_clock.tsc_to_system_mul); + ret = __pvclock_read_cycles(&hv_clock, rdtsc()); + } else + ret = ktime_get_boot_ns() + ka->kvmclock_offset; put_cpu(); @@ -1830,6 +1836,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) */ BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); + if (guest_hv_clock.version & 1) + ++guest_hv_clock.version; /* first time write, random junk */ + vcpu->hv_clock.version = guest_hv_clock.version + 1; kvm_write_guest_cached(v->kvm, &vcpu->pv_time, &vcpu->hv_clock, @@ -2322,7 +2331,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has(vcpu, X86_FEATURE_OSVW)) @@ -2359,8 +2370,10 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", - msr, data); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, + "ignored wrmsr: 0x%x data 0x%llx\n", + msr, data); break; } } @@ -2578,7 +2591,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->index); return 1; } else { - vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", msr_info->index); + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", + msr_info->index); msr_info->data = 0; } break; @@ -5430,7 +5445,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu) vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; vcpu->run->internal.ndata = 0; - r = EMULATE_FAIL; + r = EMULATE_USER_EXIT; } kvm_queue_exception(vcpu, UD_VECTOR); @@ -5722,6 +5737,8 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, if (reexecute_instruction(vcpu, cr2, write_fault_to_spt, emulation_type)) return EMULATE_DONE; + if (ctxt->have_exception && inject_emulated_exception(vcpu)) + return EMULATE_DONE; if (emulation_type & EMULTYPE_SKIP) return EMULATE_FAIL; return handle_emulation_failure(vcpu); @@ -7250,12 +7267,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { struct fpu *fpu = ¤t->thread.fpu; int r; - sigset_t sigsaved; fpu__initialize(fpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + kvm_sigset_activate(vcpu); if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { if (kvm_run->immediate_exit) { @@ -7298,8 +7313,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: post_kvm_run_save(vcpu); - if (vcpu->sigset_active) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); + kvm_sigset_deactivate(vcpu); return r; } diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 35625d279458..9119d8e41f1f 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -733,11 +733,11 @@ static unsigned long get_seg_limit(struct pt_regs *regs, int seg_reg_idx) * * Returns: * - * A signed 8-bit value containing the default parameters on success. + * An int containing ORed-in default parameters on success. * * -EINVAL on error. */ -char insn_get_code_seg_params(struct pt_regs *regs) +int insn_get_code_seg_params(struct pt_regs *regs) { struct desc_struct *desc; short sel; diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 12e377184ee4..c4d55919fac1 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -896,7 +896,7 @@ EndTable GrpTable: Grp3_1 0: TEST Eb,Ib -1: +1: TEST Eb,Ib 2: NOT Eb 3: NEG Eb 4: MUL AL,Eb diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 8ae0000cbdb3..00b296617ca4 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -158,6 +158,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (len > TASK_SIZE) return -ENOMEM; + /* No address checking. See comment at mmap_address_hint_valid() */ if (flags & MAP_FIXED) { if (prepare_hugepage_range(file, addr, len)) return -EINVAL; @@ -165,12 +166,16 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } if (addr) { - addr = ALIGN(addr, huge_page_size(h)); + addr &= huge_page_mask(h); + if (!mmap_address_hint_valid(addr, len)) + goto get_unmapped_area; + vma = find_vma(mm, addr); - if (TASK_SIZE - len >= addr && - (!vma || addr + len <= vm_start_gap(vma))) + if (!vma || addr + len <= vm_start_gap(vma)) return addr; } + +get_unmapped_area: if (mm->get_unmapped_area == arch_get_unmapped_area) return hugetlb_get_unmapped_area_bottomup(file, addr, len, pgoff, flags); diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index a99679826846..155ecbac9e28 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -33,6 +33,8 @@ #include <linux/compat.h> #include <asm/elf.h> +#include "physaddr.h" + struct va_alignment __read_mostly va_align = { .flags = -1, }; @@ -174,3 +176,63 @@ const char *arch_vma_name(struct vm_area_struct *vma) return "[mpx]"; return NULL; } + +/** + * mmap_address_hint_valid - Validate the address hint of mmap + * @addr: Address hint + * @len: Mapping length + * + * Check whether @addr and @addr + @len result in a valid mapping. + * + * On 32bit this only checks whether @addr + @len is <= TASK_SIZE. + * + * On 64bit with 5-level page tables another sanity check is required + * because mappings requested by mmap(@addr, 0) which cross the 47-bit + * virtual address boundary can cause the following theoretical issue: + * + * An application calls mmap(addr, 0), i.e. without MAP_FIXED, where @addr + * is below the border of the 47-bit address space and @addr + @len is + * above the border. + * + * With 4-level paging this request succeeds, but the resulting mapping + * address will always be within the 47-bit virtual address space, because + * the hint address does not result in a valid mapping and is + * ignored. Hence applications which are not prepared to handle virtual + * addresses above 47-bit work correctly. + * + * With 5-level paging this request would be granted and result in a + * mapping which crosses the border of the 47-bit virtual address + * space. If the application cannot handle addresses above 47-bit this + * will lead to misbehaviour and hard to diagnose failures. + * + * Therefore ignore address hints which would result in a mapping crossing + * the 47-bit virtual address boundary. + * + * Note, that in the same scenario with MAP_FIXED the behaviour is + * different. The request with @addr < 47-bit and @addr + @len > 47-bit + * fails on a 4-level paging machine but succeeds on a 5-level paging + * machine. It is reasonable to expect that an application does not rely on + * the failure of such a fixed mapping request, so the restriction is not + * applied. + */ +bool mmap_address_hint_valid(unsigned long addr, unsigned long len) +{ + if (TASK_SIZE - len < addr) + return false; + + return (addr > DEFAULT_MAP_WINDOW) == (addr + len > DEFAULT_MAP_WINDOW); +} + +/* Can we access it for direct reading/writing? Must be RAM: */ +int valid_phys_addr_range(phys_addr_t addr, size_t count) +{ + return addr + count <= __pa(high_memory); +} + +/* Can we access it through mmap? Must be a valid physical address: */ +int valid_mmap_phys_addr_range(unsigned long pfn, size_t count) +{ + phys_addr_t addr = (phys_addr_t)pfn << PAGE_SHIFT; + + return phys_addr_valid(addr + count - 1); +} diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h index 2bfe590694fc..3e9d01ada81f 100644 --- a/arch/xtensa/include/uapi/asm/mman.h +++ b/arch/xtensa/include/uapi/asm/mman.h @@ -36,6 +36,7 @@ */ #define MAP_SHARED 0x001 /* Share changes */ #define MAP_PRIVATE 0x002 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x003 /* share + validate extension flags */ #define MAP_TYPE 0x00f /* Mask for type of mapping */ #define MAP_FIXED 0x010 /* Interpret addr exactly */ |