diff options
Diffstat (limited to 'arch/s390/kernel')
33 files changed, 607 insertions, 647 deletions
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 5e6a23299790..8983837b3565 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -58,6 +58,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KPROBES) += kprobes_insn_page.o obj-$(CONFIG_KPROBES) += mcount.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o @@ -69,7 +70,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o -obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o +obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index fb92e8ed0525..f9efc54ec4b7 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -3,12 +3,7 @@ #include <linux/pgtable.h> #include <asm/abs_lowcore.h> -#define ABS_LOWCORE_UNMAPPED 1 -#define ABS_LOWCORE_LAP_ON 2 -#define ABS_LOWCORE_IRQS_ON 4 - unsigned long __bootdata_preserved(__abs_lowcore); -bool __ro_after_init abs_lowcore_mapped; int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { @@ -49,47 +44,3 @@ void abs_lowcore_unmap(int cpu) addr += PAGE_SIZE; } } - -struct lowcore *get_abs_lowcore(unsigned long *flags) -{ - unsigned long irq_flags; - union ctlreg0 cr0; - int cpu; - - *flags = 0; - cpu = get_cpu(); - if (abs_lowcore_mapped) { - return ((struct lowcore *)__abs_lowcore) + cpu; - } else { - if (cpu != 0) - panic("Invalid unmapped absolute lowcore access\n"); - local_irq_save(irq_flags); - if (!irqs_disabled_flags(irq_flags)) - *flags |= ABS_LOWCORE_IRQS_ON; - __ctl_store(cr0.val, 0, 0); - if (cr0.lap) { - *flags |= ABS_LOWCORE_LAP_ON; - __ctl_clear_bit(0, 28); - } - *flags |= ABS_LOWCORE_UNMAPPED; - return lowcore_ptr[0]; - } -} - -void put_abs_lowcore(struct lowcore *lc, unsigned long flags) -{ - if (abs_lowcore_mapped) { - if (flags) - panic("Invalid mapped absolute lowcore release\n"); - } else { - if (smp_processor_id() != 0) - panic("Invalid mapped absolute lowcore access\n"); - if (!(flags & ABS_LOWCORE_UNMAPPED)) - panic("Invalid unmapped absolute lowcore release\n"); - if (flags & ABS_LOWCORE_LAP_ON) - __ctl_set_bit(0, 28); - if (flags & ABS_LOWCORE_IRQS_ON) - local_irq_enable(); - } - put_cpu(); -} diff --git a/arch/s390/kernel/cache.c b/arch/s390/kernel/cache.c index 7ee3651d00ab..56254fa06f99 100644 --- a/arch/s390/kernel/cache.c +++ b/arch/s390/kernel/cache.c @@ -46,7 +46,7 @@ struct cache_info { #define CACHE_MAX_LEVEL 8 union cache_topology { struct cache_info ci[CACHE_MAX_LEVEL]; - unsigned long long raw; + unsigned long raw; }; static const char * const cache_type_string[] = { diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index eee1ad3e1b29..cecedd01d4ec 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -139,7 +139,7 @@ static int save_sigregs_ext32(struct pt_regs *regs, /* Save vector registers to signal stack */ if (MACHINE_HAS_VX) { for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); + vxrs[i] = current->thread.fpu.vxrs[i].low; if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, sizeof(sregs_ext->vxrs_low)) || __copy_to_user(&sregs_ext->vxrs_high, @@ -173,7 +173,7 @@ static int restore_sigregs_ext32(struct pt_regs *regs, sizeof(sregs_ext->vxrs_high))) return -EFAULT; for (i = 0; i < __NUM_VXRS_LOW; i++) - *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i]; + current->thread.fpu.vxrs[i].low = vxrs[i]; } return 0; } diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index c13b1455ec8c..8a617be28bb4 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -110,7 +110,7 @@ void __init save_area_add_vxrs(struct save_area *sa, __vector128 *vxrs) /* Copy lower halves of vector registers 0-15 */ for (i = 0; i < 16; i++) - memcpy(&sa->vxrs_low[i], &vxrs[i].u[2], 8); + sa->vxrs_low[i] = vxrs[i].low; /* Copy vector registers 16-31 */ memcpy(sa->vxrs_high, vxrs + 16, 16 * sizeof(__vector128)); } diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index a778714e4d8b..82079f2d8583 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -35,6 +35,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X014] = { .code = 0x014, .name = "Spool File Services" }, [DIAG_STAT_X044] = { .code = 0x044, .name = "Voluntary Timeslice End" }, [DIAG_STAT_X064] = { .code = 0x064, .name = "NSS Manipulation" }, + [DIAG_STAT_X08C] = { .code = 0x08c, .name = "Access 3270 Display Device Information" }, [DIAG_STAT_X09C] = { .code = 0x09c, .name = "Relinquish Timeslice" }, [DIAG_STAT_X0DC] = { .code = 0x0dc, .name = "Appldata Control" }, [DIAG_STAT_X204] = { .code = 0x204, .name = "Logical-CPU Utilization" }, @@ -57,12 +58,16 @@ struct diag_ops __amode31_ref diag_amode31_ops = { .diag26c = _diag26c_amode31, .diag14 = _diag14_amode31, .diag0c = _diag0c_amode31, + .diag8c = _diag8c_amode31, .diag308_reset = _diag308_reset_amode31 }; static struct diag210 _diag210_tmp_amode31 __section(".amode31.data"); struct diag210 __amode31_ref *__diag210_tmp_amode31 = &_diag210_tmp_amode31; +static struct diag8c _diag8c_tmp_amode31 __section(".amode31.data"); +static struct diag8c __amode31_ref *__diag8c_tmp_amode31 = &_diag8c_tmp_amode31; + static int show_diag_stat(struct seq_file *m, void *v) { struct diag_stat *stat; @@ -194,6 +199,27 @@ int diag210(struct diag210 *addr) } EXPORT_SYMBOL(diag210); +/* + * Diagnose 210: Get information about a virtual device + */ +int diag8c(struct diag8c *addr, struct ccw_dev_id *devno) +{ + static DEFINE_SPINLOCK(diag8c_lock); + unsigned long flags; + int ccode; + + spin_lock_irqsave(&diag8c_lock, flags); + + diag_stat_inc(DIAG_STAT_X08C); + ccode = diag_amode31_ops.diag8c(__diag8c_tmp_amode31, devno, sizeof(*addr)); + + *addr = *__diag8c_tmp_amode31; + spin_unlock_irqrestore(&diag8c_lock, flags); + + return ccode; +} +EXPORT_SYMBOL(diag8c); + int diag224(void *ptr) { int rc = -EOPNOTSUPP; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 6030fdd6997b..59eba19ae0f2 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -18,6 +18,7 @@ #include <linux/uaccess.h> #include <linux/kernel.h> #include <asm/asm-extable.h> +#include <linux/memblock.h> #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/ipl.h> @@ -160,9 +161,7 @@ static noinline __init void setup_lowcore_early(void) psw_t psw; psw.addr = (unsigned long)early_pgm_check_handler; - psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA; - if (IS_ENABLED(CONFIG_KASAN)) - psw.mask |= PSW_MASK_DAT; + psw.mask = PSW_KERNEL_BITS; S390_lowcore.program_new_psw = psw; S390_lowcore.preempt_count = INIT_PREEMPT_COUNT; } @@ -227,6 +226,8 @@ static __init void detect_machine_facilities(void) S390_lowcore.machine_flags |= MACHINE_FLAG_PCI_MIO; /* the control bit is set during PCI initialization */ } + if (test_facility(194)) + S390_lowcore.machine_flags |= MACHINE_FLAG_RDP; } static inline void save_vector_registers(void) @@ -288,7 +289,6 @@ static void __init sort_amode31_extable(void) void __init startup_init(void) { - sclp_early_adjust_va(); reset_tod_clock(); check_image_bootable(); time_early_init(); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 0f423e9df095..c8d8c9960936 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -137,19 +137,13 @@ _LPP_OFFSET = __LC_LPP lgr %r14,\reg larl %r13,\start slgr %r14,%r13 -#ifdef CONFIG_AS_IS_LLVM clgfrl %r14,.Lrange_size\@ -#else - clgfi %r14,\end - \start -#endif jhe \outside_label -#ifdef CONFIG_AS_IS_LLVM .section .rodata, "a" .align 4 .Lrange_size\@: .long \end - \start .previous -#endif .endm .macro SIEEXIT diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 995ec7449feb..34674e38826b 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -73,6 +73,5 @@ extern struct exception_table_entry _stop_amode31_ex_table[]; #define __amode31_data __section(".amode31.data") #define __amode31_ref __section(".amode31.refs") extern long _start_amode31_refs[], _end_amode31_refs[]; -extern unsigned long __amode31_base; #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index d7b8b6ad574d..3b3bf8329e6c 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -25,6 +25,7 @@ ENTRY(startup_continue) larl %r14,init_task stg %r14,__LC_CURRENT larl %r15,init_thread_union+THREAD_SIZE-STACK_FRAME_OVERHEAD-__PT_SIZE + brasl %r14,sclp_early_adjust_va # allow sclp_early_printk #ifdef CONFIG_KASAN brasl %r14,kasan_early_init #endif diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 4bf1ee293f2b..38e267c7bff7 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -12,9 +12,9 @@ #include <linux/notifier.h> #include <linux/init.h> #include <linux/cpu.h> -#include <linux/sched/cputime.h> #include <trace/events/power.h> #include <asm/cpu_mf.h> +#include <asm/cputime.h> #include <asm/nmi.h> #include <asm/smp.h> #include "entry.h" @@ -24,117 +24,61 @@ static DEFINE_PER_CPU(struct s390_idle_data, s390_idle); void account_idle_time_irq(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); + unsigned long idle_time; u64 cycles_new[8]; int i; - clear_cpu_flag(CIF_ENABLED_WAIT); if (smp_cpu_mtid) { stcctm(MT_DIAG, smp_cpu_mtid, cycles_new); for (i = 0; i < smp_cpu_mtid; i++) this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]); } - idle->clock_idle_exit = S390_lowcore.int_clock; - idle->timer_idle_exit = S390_lowcore.sys_enter_timer; + idle_time = S390_lowcore.int_clock - idle->clock_idle_enter; S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock; - S390_lowcore.last_update_clock = idle->clock_idle_exit; + S390_lowcore.last_update_clock = S390_lowcore.int_clock; S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter; - S390_lowcore.last_update_timer = idle->timer_idle_exit; + S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer; + + /* Account time spent with enabled wait psw loaded as idle time. */ + WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time); + WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1); + account_idle_time(cputime_to_nsecs(idle_time)); } -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { struct s390_idle_data *idle = this_cpu_ptr(&s390_idle); - unsigned long idle_time; unsigned long psw_mask; /* Wait for external, I/O or machine check interrupt. */ - psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | + PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; clear_cpu_flag(CIF_NOHZ_DELAY); /* psw_idle() returns with interrupts disabled. */ psw_idle(idle, psw_mask); - - /* Account time spent with enabled wait psw loaded as idle time. */ - raw_write_seqcount_begin(&idle->seqcount); - idle_time = idle->clock_idle_exit - idle->clock_idle_enter; - idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; - idle->idle_time += idle_time; - idle->idle_count++; - account_idle_time(cputime_to_nsecs(idle_time)); - raw_write_seqcount_end(&idle->seqcount); - raw_local_irq_enable(); } static ssize_t show_idle_count(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned long idle_count; - unsigned int seq; - - do { - seq = read_seqcount_begin(&idle->seqcount); - idle_count = READ_ONCE(idle->idle_count); - if (READ_ONCE(idle->clock_idle_enter)) - idle_count++; - } while (read_seqcount_retry(&idle->seqcount, seq)); - return sprintf(buf, "%lu\n", idle_count); + + return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count)); } DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); static ssize_t show_idle_time(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, char *buf) { - unsigned long now, idle_time, idle_enter, idle_exit, in_idle; struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); - unsigned int seq; - - do { - seq = read_seqcount_begin(&idle->seqcount); - idle_time = READ_ONCE(idle->idle_time); - idle_enter = READ_ONCE(idle->clock_idle_enter); - idle_exit = READ_ONCE(idle->clock_idle_exit); - } while (read_seqcount_retry(&idle->seqcount, seq)); - in_idle = 0; - now = get_tod_clock(); - if (idle_enter) { - if (idle_exit) { - in_idle = idle_exit - idle_enter; - } else if (now > idle_enter) { - in_idle = now - idle_enter; - } - } - idle_time += in_idle; - return sprintf(buf, "%lu\n", idle_time >> 12); -} -DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); -u64 arch_cpu_idle_time(int cpu) -{ - struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); - unsigned long now, idle_enter, idle_exit, in_idle; - unsigned int seq; - - do { - seq = read_seqcount_begin(&idle->seqcount); - idle_enter = READ_ONCE(idle->clock_idle_enter); - idle_exit = READ_ONCE(idle->clock_idle_exit); - } while (read_seqcount_retry(&idle->seqcount, seq)); - in_idle = 0; - now = get_tod_clock(); - if (idle_enter) { - if (idle_exit) { - in_idle = idle_exit - idle_enter; - } else if (now > idle_enter) { - in_idle = now - idle_enter; - } - } - return cputime_to_nsecs(in_idle); + return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12); } +DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); void arch_cpu_idle_enter(void) { diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index fbd646dbf440..5f0f5c86963a 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -593,6 +593,7 @@ static struct attribute *ipl_eckd_attrs[] = { &sys_ipl_type_attr.attr, &sys_ipl_eckd_bootprog_attr.attr, &sys_ipl_eckd_br_chr_attr.attr, + &sys_ipl_ccw_loadparm_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_secure_attr.attr, &sys_ipl_has_secure_attr.attr, @@ -888,23 +889,27 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb, return len; } -/* FCP wrapper */ -static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj, - struct kobj_attribute *attr, char *page) -{ - return reipl_generic_loadparm_show(reipl_block_fcp, page); -} - -static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t len) -{ - return reipl_generic_loadparm_store(reipl_block_fcp, buf, len); -} - -static struct kobj_attribute sys_reipl_fcp_loadparm_attr = - __ATTR(loadparm, 0644, reipl_fcp_loadparm_show, - reipl_fcp_loadparm_store); +#define DEFINE_GENERIC_LOADPARM(name) \ +static ssize_t reipl_##name##_loadparm_show(struct kobject *kobj, \ + struct kobj_attribute *attr, char *page) \ +{ \ + return reipl_generic_loadparm_show(reipl_block_##name, page); \ +} \ +static ssize_t reipl_##name##_loadparm_store(struct kobject *kobj, \ + struct kobj_attribute *attr, \ + const char *buf, size_t len) \ +{ \ + return reipl_generic_loadparm_store(reipl_block_##name, buf, len); \ +} \ +static struct kobj_attribute sys_reipl_##name##_loadparm_attr = \ + __ATTR(loadparm, 0644, reipl_##name##_loadparm_show, \ + reipl_##name##_loadparm_store) + +DEFINE_GENERIC_LOADPARM(fcp); +DEFINE_GENERIC_LOADPARM(nvme); +DEFINE_GENERIC_LOADPARM(ccw); +DEFINE_GENERIC_LOADPARM(nss); +DEFINE_GENERIC_LOADPARM(eckd); static ssize_t reipl_fcp_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) @@ -994,24 +999,6 @@ DEFINE_IPL_ATTR_RW(reipl_nvme, bootprog, "%lld\n", "%lld\n", DEFINE_IPL_ATTR_RW(reipl_nvme, br_lba, "%lld\n", "%lld\n", reipl_block_nvme->nvme.br_lba); -/* nvme wrapper */ -static ssize_t reipl_nvme_loadparm_show(struct kobject *kobj, - struct kobj_attribute *attr, char *page) -{ - return reipl_generic_loadparm_show(reipl_block_nvme, page); -} - -static ssize_t reipl_nvme_loadparm_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t len) -{ - return reipl_generic_loadparm_store(reipl_block_nvme, buf, len); -} - -static struct kobj_attribute sys_reipl_nvme_loadparm_attr = - __ATTR(loadparm, 0644, reipl_nvme_loadparm_show, - reipl_nvme_loadparm_store); - static struct attribute *reipl_nvme_attrs[] = { &sys_reipl_nvme_fid_attr.attr, &sys_reipl_nvme_nsid_attr.attr, @@ -1047,38 +1034,6 @@ static struct kobj_attribute sys_reipl_nvme_clear_attr = /* CCW reipl device attributes */ DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw); -/* NSS wrapper */ -static ssize_t reipl_nss_loadparm_show(struct kobject *kobj, - struct kobj_attribute *attr, char *page) -{ - return reipl_generic_loadparm_show(reipl_block_nss, page); -} - -static ssize_t reipl_nss_loadparm_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t len) -{ - return reipl_generic_loadparm_store(reipl_block_nss, buf, len); -} - -/* CCW wrapper */ -static ssize_t reipl_ccw_loadparm_show(struct kobject *kobj, - struct kobj_attribute *attr, char *page) -{ - return reipl_generic_loadparm_show(reipl_block_ccw, page); -} - -static ssize_t reipl_ccw_loadparm_store(struct kobject *kobj, - struct kobj_attribute *attr, - const char *buf, size_t len) -{ - return reipl_generic_loadparm_store(reipl_block_ccw, buf, len); -} - -static struct kobj_attribute sys_reipl_ccw_loadparm_attr = - __ATTR(loadparm, 0644, reipl_ccw_loadparm_show, - reipl_ccw_loadparm_store); - static ssize_t reipl_ccw_clear_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { @@ -1176,6 +1131,7 @@ static struct attribute *reipl_eckd_attrs[] = { &sys_reipl_eckd_device_attr.attr, &sys_reipl_eckd_bootprog_attr.attr, &sys_reipl_eckd_br_chr_attr.attr, + &sys_reipl_eckd_loadparm_attr.attr, NULL, }; @@ -1194,7 +1150,7 @@ static ssize_t reipl_eckd_clear_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t len) { - if (strtobool(buf, &reipl_eckd_clear) < 0) + if (kstrtobool(buf, &reipl_eckd_clear) < 0) return -EINVAL; return len; } @@ -1251,10 +1207,6 @@ static struct kobj_attribute sys_reipl_nss_name_attr = __ATTR(name, 0644, reipl_nss_name_show, reipl_nss_name_store); -static struct kobj_attribute sys_reipl_nss_loadparm_attr = - __ATTR(loadparm, 0644, reipl_nss_loadparm_show, - reipl_nss_loadparm_store); - static struct attribute *reipl_nss_attrs[] = { &sys_reipl_nss_name_attr.attr, &sys_reipl_nss_loadparm_attr.attr, @@ -1986,15 +1938,14 @@ static void dump_reipl_run(struct shutdown_trigger *trigger) { unsigned long ipib = (unsigned long) reipl_block_actual; struct lowcore *abs_lc; - unsigned long flags; unsigned int csum; csum = (__force unsigned int) csum_partial(reipl_block_actual, reipl_block_actual->hdr.len, 0); - abs_lc = get_abs_lowcore(&flags); + abs_lc = get_abs_lowcore(); abs_lc->ipib = ipib; abs_lc->ipib_checksum = csum; - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(abs_lc); dump_run(trigger); } diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 45393919fe61..b020ff17d206 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -136,7 +136,7 @@ void noinstr do_io_irq(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); struct pt_regs *old_regs = set_irq_regs(regs); - int from_idle; + bool from_idle; irq_enter_rcu(); @@ -146,7 +146,7 @@ void noinstr do_io_irq(struct pt_regs *regs) current->thread.last_break = regs->last_break; } - from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); if (from_idle) account_idle_time_irq(); @@ -171,7 +171,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) { irqentry_state_t state = irqentry_enter(regs); struct pt_regs *old_regs = set_irq_regs(regs); - int from_idle; + bool from_idle; irq_enter_rcu(); @@ -185,7 +185,7 @@ void noinstr do_ext_irq(struct pt_regs *regs) regs->int_parm = S390_lowcore.ext_params; regs->int_parm_long = S390_lowcore.ext_params2; - from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit; + from_idle = test_and_clear_cpu_flag(CIF_ENABLED_WAIT); if (from_idle) account_idle_time_irq(); diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 401f9c933ff9..5e713f318de3 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -281,16 +281,6 @@ static void pop_kprobe(struct kprobe_ctlblk *kcb) } NOKPROBE_SYMBOL(pop_kprobe); -void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->gprs[14]; - ri->fp = (void *)regs->gprs[15]; - - /* Replace the return addr with trampoline addr */ - regs->gprs[14] = (unsigned long)&__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - static void kprobe_reenter_check(struct kprobe_ctlblk *kcb, struct kprobe *p) { switch (kcb->kprobe_status) { @@ -371,26 +361,6 @@ static int kprobe_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_handler); -void arch_kretprobe_fixup_return(struct pt_regs *regs, - kprobe_opcode_t *correct_ret_addr) -{ - /* Replace fake return address with real one. */ - regs->gprs[14] = (unsigned long)correct_ret_addr; -} -NOKPROBE_SYMBOL(arch_kretprobe_fixup_return); - -/* - * Called from __kretprobe_trampoline - */ -void trampoline_probe_handler(struct pt_regs *regs) -{ - kretprobe_trampoline_handler(regs, (void *)regs->gprs[15]); -} -NOKPROBE_SYMBOL(trampoline_probe_handler); - -/* assembler function that handles the kretprobes must not be probed itself */ -NOKPROBE_SYMBOL(__kretprobe_trampoline); - /* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "breakpoint" diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 4579b42286d5..2a8e73266428 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -224,7 +224,6 @@ void machine_kexec_cleanup(struct kimage *image) void arch_crash_save_vmcoreinfo(void) { struct lowcore *abs_lc; - unsigned long flags; VMCOREINFO_SYMBOL(lowcore_ptr); VMCOREINFO_SYMBOL(high_memory); @@ -232,9 +231,9 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("SAMODE31=%lx\n", __samode31); vmcoreinfo_append_str("EAMODE31=%lx\n", __eamode31); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); - abs_lc = get_abs_lowcore(&flags); + abs_lc = get_abs_lowcore(); abs_lc->vmcore_info = paddr_vmcoreinfo_note(); - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(abs_lc); } void machine_shutdown(void) diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 4786bfe02144..43ff91073d2a 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -135,9 +135,9 @@ SYM_FUNC_END(return_to_handler) #endif #endif /* CONFIG_FUNCTION_TRACER */ -#ifdef CONFIG_KPROBES +#ifdef CONFIG_RETHOOK -SYM_FUNC_START(__kretprobe_trampoline) +SYM_FUNC_START(arch_rethook_trampoline) stg %r14,(__SF_GPRS+8*8)(%r15) lay %r15,-STACK_FRAME_SIZE(%r15) @@ -152,16 +152,16 @@ SYM_FUNC_START(__kretprobe_trampoline) epsw %r2,%r3 risbg %r3,%r2,0,31,32 stg %r3,STACK_PTREGS_PSW(%r15) - larl %r1,__kretprobe_trampoline + larl %r1,arch_rethook_trampoline stg %r1,STACK_PTREGS_PSW+8(%r15) lay %r2,STACK_PTREGS(%r15) - brasl %r14,trampoline_probe_handler + brasl %r14,arch_rethook_trampoline_callback mvc __SF_EMPTY(16,%r7),STACK_PTREGS_PSW(%r15) lmg %r0,%r15,STACK_PTREGS_GPRS(%r15) lpswe __SF_EMPTY(%r15) -SYM_FUNC_END(__kretprobe_trampoline) +SYM_FUNC_END(arch_rethook_trampoline) -#endif /* CONFIG_KPROBES */ +#endif /* CONFIG_RETHOOK */ diff --git a/arch/s390/kernel/os_info.c b/arch/s390/kernel/os_info.c index ec0bd9457e90..6e1824141b29 100644 --- a/arch/s390/kernel/os_info.c +++ b/arch/s390/kernel/os_info.c @@ -59,15 +59,14 @@ void os_info_entry_add(int nr, void *ptr, u64 size) void __init os_info_init(void) { struct lowcore *abs_lc; - unsigned long flags; os_info.version_major = OS_INFO_VERSION_MAJOR; os_info.version_minor = OS_INFO_VERSION_MINOR; os_info.magic = OS_INFO_MAGIC; os_info.csum = os_info_csum(&os_info); - abs_lc = get_abs_lowcore(&flags); + abs_lc = get_abs_lowcore(); abs_lc->os_info = __pa(&os_info); - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(abs_lc); } #ifdef CONFIG_CRASH_DUMP diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index f043a7ff220b..c9ab971498d6 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -2,7 +2,7 @@ /* * Performance event support for s390x - CPU-measurement Counter Facility * - * Copyright IBM Corp. 2012, 2021 + * Copyright IBM Corp. 2012, 2023 * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> * Thomas Richter <tmricht@linux.ibm.com> */ @@ -16,11 +16,82 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/miscdevice.h> +#include <linux/perf_event.h> -#include <asm/cpu_mcf.h> +#include <asm/cpu_mf.h> #include <asm/hwctrset.h> #include <asm/debug.h> +enum cpumf_ctr_set { + CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */ + CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */ + CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */ + CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */ + CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */ + + /* Maximum number of counter sets */ + CPUMF_CTR_SET_MAX, +}; + +#define CPUMF_LCCTL_ENABLE_SHIFT 16 +#define CPUMF_LCCTL_ACTCTL_SHIFT 0 + +static inline void ctr_set_enable(u64 *state, u64 ctrsets) +{ + *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT; +} + +static inline void ctr_set_disable(u64 *state, u64 ctrsets) +{ + *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT); +} + +static inline void ctr_set_start(u64 *state, u64 ctrsets) +{ + *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT; +} + +static inline void ctr_set_stop(u64 *state, u64 ctrsets) +{ + *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT); +} + +static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest) +{ + switch (set) { + case CPUMF_CTR_SET_BASIC: + return stcctm(BASIC, range, dest); + case CPUMF_CTR_SET_USER: + return stcctm(PROBLEM_STATE, range, dest); + case CPUMF_CTR_SET_CRYPTO: + return stcctm(CRYPTO_ACTIVITY, range, dest); + case CPUMF_CTR_SET_EXT: + return stcctm(EXTENDED, range, dest); + case CPUMF_CTR_SET_MT_DIAG: + return stcctm(MT_DIAG_CLEARING, range, dest); + case CPUMF_CTR_SET_MAX: + return 3; + } + return 3; +} + +struct cpu_cf_events { + struct cpumf_ctr_info info; + atomic_t ctr_set[CPUMF_CTR_SET_MAX]; + u64 state; /* For perf_event_open SVC */ + u64 dev_state; /* For /dev/hwctr */ + unsigned int flags; + size_t used; /* Bytes used in data */ + size_t usedss; /* Bytes used in start/stop */ + unsigned char start[PAGE_SIZE]; /* Counter set at event add */ + unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */ + unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */ + unsigned int sets; /* # Counter set saved in memory */ +}; + +/* Per-CPU event structure for the counter facility */ +static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events); + static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */ static debug_info_t *cf_dbg; @@ -112,6 +183,53 @@ static void cfdiag_trailer(struct cf_trailer_entry *te) te->timestamp = get_tod_clock_fast(); } +/* + * Return the maximum possible counter set size (in number of 8 byte counters) + * depending on type and model number. + */ +static size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, + struct cpumf_ctr_info *info) +{ + size_t ctrset_size = 0; + + switch (ctrset) { + case CPUMF_CTR_SET_BASIC: + if (info->cfvn >= 1) + ctrset_size = 6; + break; + case CPUMF_CTR_SET_USER: + if (info->cfvn == 1) + ctrset_size = 6; + else if (info->cfvn >= 3) + ctrset_size = 2; + break; + case CPUMF_CTR_SET_CRYPTO: + if (info->csvn >= 1 && info->csvn <= 5) + ctrset_size = 16; + else if (info->csvn == 6 || info->csvn == 7) + ctrset_size = 20; + break; + case CPUMF_CTR_SET_EXT: + if (info->csvn == 1) + ctrset_size = 32; + else if (info->csvn == 2) + ctrset_size = 48; + else if (info->csvn >= 3 && info->csvn <= 5) + ctrset_size = 128; + else if (info->csvn == 6 || info->csvn == 7) + ctrset_size = 160; + break; + case CPUMF_CTR_SET_MT_DIAG: + if (info->csvn > 3) + ctrset_size = 48; + break; + case CPUMF_CTR_SET_MAX: + break; + } + + return ctrset_size; +} + /* Read a counter set. The counter set number determines the counter set and * the CPUM-CF first and second version number determine the number of * available counters in each counter set. @@ -388,6 +506,47 @@ static void cpumf_pmu_disable(struct pmu *pmu) cpuhw->flags &= ~PMU_F_ENABLED; } +#define PMC_INIT 0UL +#define PMC_RELEASE 1UL + +static void cpum_cf_setup_cpu(void *flags) +{ + struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + + switch ((unsigned long)flags) { + case PMC_INIT: + memset(&cpuhw->info, 0, sizeof(cpuhw->info)); + qctri(&cpuhw->info); + cpuhw->flags |= PMU_F_RESERVED; + break; + + case PMC_RELEASE: + cpuhw->flags &= ~PMU_F_RESERVED; + break; + } + + /* Disable CPU counter sets */ + lcctl(0); + debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n", + __func__, *(int *)flags, cpuhw->flags, + cpuhw->state); +} + +/* Initialize the CPU-measurement counter facility */ +static int __kernel_cpumcf_begin(void) +{ + on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1); + irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); + + return 0; +} + +/* Release the CPU-measurement counter facility */ +static void __kernel_cpumcf_end(void) +{ + on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1); + irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); +} /* Number of perf events counting hardware events */ static atomic_t num_events = ATOMIC_INIT(0); @@ -397,12 +556,10 @@ static DEFINE_MUTEX(pmc_reserve_mutex); /* Release the PMU if event is the last perf event */ static void hw_perf_event_destroy(struct perf_event *event) { - if (!atomic_add_unless(&num_events, -1, 1)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_events) == 0) - __kernel_cpumcf_end(); - mutex_unlock(&pmc_reserve_mutex); - } + mutex_lock(&pmc_reserve_mutex); + if (atomic_dec_return(&num_events) == 0) + __kernel_cpumcf_end(); + mutex_unlock(&pmc_reserve_mutex); } /* CPUMF <-> perf event mappings for kernel+userspace (basic set) */ @@ -434,6 +591,12 @@ static void cpumf_hw_inuse(void) mutex_unlock(&pmc_reserve_mutex); } +static int is_userspace_event(u64 ev) +{ + return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev || + cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev; +} + static int __hw_perf_event_init(struct perf_event *event, unsigned int type) { struct perf_event_attr *attr = &event->attr; @@ -456,19 +619,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type) if (is_sampling_event(event)) /* No sampling support */ return -ENOENT; ev = attr->config; - /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { - if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) - return -EOPNOTSUPP; - ev = cpumf_generic_events_user[ev]; - - /* No support for kernel space counters only */ + /* + * Count user space (problem-state) only + * Handle events 32 and 33 as 0:u and 1:u + */ + if (!is_userspace_event(ev)) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_user)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_user[ev]; + } } else if (!attr->exclude_kernel && attr->exclude_user) { + /* No support for kernel space counters only */ return -EOPNOTSUPP; - } else { /* Count user and kernel space */ - if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) - return -EOPNOTSUPP; - ev = cpumf_generic_events_basic[ev]; + } else { + /* Count user and kernel space, incl. events 32 + 33 */ + if (!is_userspace_event(ev)) { + if (ev >= ARRAY_SIZE(cpumf_generic_events_basic)) + return -EOPNOTSUPP; + ev = cpumf_generic_events_basic[ev]; + } } break; @@ -662,9 +832,7 @@ static int cfdiag_push_sample(struct perf_event *event, if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = cpuhw->usedss; raw.frag.data = cpuhw->stop; - raw.size = raw.frag.size; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } overflow = perf_event_overflow(event, &data, ®s); @@ -763,31 +931,120 @@ static struct pmu cpumf_pmu = { .read = cpumf_pmu_read, }; +static int cpum_cf_setup(unsigned int cpu, unsigned long flags) +{ + local_irq_disable(); + cpum_cf_setup_cpu((void *)flags); + local_irq_enable(); + return 0; +} + +static int cfset_online_cpu(unsigned int cpu); +static int cpum_cf_online_cpu(unsigned int cpu) +{ + debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__, + cpu, in_interrupt()); + cpum_cf_setup(cpu, PMC_INIT); + return cfset_online_cpu(cpu); +} + +static int cfset_offline_cpu(unsigned int cpu); +static int cpum_cf_offline_cpu(unsigned int cpu) +{ + debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu); + cfset_offline_cpu(cpu); + return cpum_cf_setup(cpu, PMC_RELEASE); +} + +/* Return true if store counter set multiple instruction is available */ +static inline int stccm_avail(void) +{ + return test_facility(142); +} + +/* CPU-measurement alerts for the counter facility */ +static void cpumf_measurement_alert(struct ext_code ext_code, + unsigned int alert, unsigned long unused) +{ + struct cpu_cf_events *cpuhw; + + if (!(alert & CPU_MF_INT_CF_MASK)) + return; + + inc_irq_stat(IRQEXT_CMC); + cpuhw = this_cpu_ptr(&cpu_cf_events); + + /* + * Measurement alerts are shared and might happen when the PMU + * is not reserved. Ignore these alerts in this case. + */ + if (!(cpuhw->flags & PMU_F_RESERVED)) + return; + + /* counter authorization change alert */ + if (alert & CPU_MF_INT_CF_CACA) + qctri(&cpuhw->info); + + /* loss of counter data alert */ + if (alert & CPU_MF_INT_CF_LCDA) + pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); + + /* loss of MT counter data alert */ + if (alert & CPU_MF_INT_CF_MTDA) + pr_warn("CPU[%i] MT counter data was lost\n", + smp_processor_id()); +} + static int cfset_init(void); static int __init cpumf_pmu_init(void) { int rc; - if (!kernel_cpumcf_avail()) + if (!cpum_cf_avail()) return -ENODEV; + /* + * Clear bit 15 of cr0 to unauthorize problem-state to + * extract measurement counters + */ + ctl_clear_bit(0, 48); + + /* register handler for measurement-alert interruptions */ + rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, + cpumf_measurement_alert); + if (rc) { + pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc); + return rc; + } + /* Setup s390dbf facility */ cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128); if (!cf_dbg) { pr_err("Registration of s390dbf(cpum_cf) failed\n"); - return -ENOMEM; + rc = -ENOMEM; + goto out1; } debug_register_view(cf_dbg, &debug_sprintf_view); cpumf_pmu.attr_groups = cpumf_cf_event_group(); rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1); if (rc) { - debug_unregister_view(cf_dbg, &debug_sprintf_view); - debug_unregister(cf_dbg); pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc); + goto out2; } else if (stccm_avail()) { /* Setup counter set device */ cfset_init(); } + + rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, + "perf/s390/cf:online", + cpum_cf_online_cpu, cpum_cf_offline_cpu); + return rc; + +out2: + debug_unregister_view(cf_dbg, &debug_sprintf_view); + debug_unregister(cf_dbg); +out1: + unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert); return rc; } @@ -1005,7 +1262,6 @@ static int cfset_all_start(struct cfset_request *req) return rc; } - /* Return the maximum required space for all possible CPUs in case one * CPU will be onlined during the START, READ, STOP cycles. * To find out the size of the counter sets, any one CPU will do. They @@ -1268,7 +1524,7 @@ static struct miscdevice cfset_dev = { /* Hotplug add of a CPU. Scan through all active processes and add * that CPU to the list of CPUs supplied with ioctl(..., START, ...). */ -int cfset_online_cpu(unsigned int cpu) +static int cfset_online_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; struct cfset_request *rp; @@ -1288,7 +1544,7 @@ int cfset_online_cpu(unsigned int cpu) /* Hotplug remove of a CPU. Scan through all active processes and clear * that CPU from the list of CPUs supplied with ioctl(..., START, ...). */ -int cfset_offline_cpu(unsigned int cpu) +static int cfset_offline_cpu(unsigned int cpu) { struct cfset_call_on_cpu_parm p; struct cfset_request *rp; diff --git a/arch/s390/kernel/perf_cpum_cf_common.c b/arch/s390/kernel/perf_cpum_cf_common.c deleted file mode 100644 index 8ee48672233f..000000000000 --- a/arch/s390/kernel/perf_cpum_cf_common.c +++ /dev/null @@ -1,233 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * CPU-Measurement Counter Facility Support - Common Layer - * - * Copyright IBM Corp. 2019 - * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com> - */ -#define KMSG_COMPONENT "cpum_cf_common" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/percpu.h> -#include <linux/notifier.h> -#include <linux/init.h> -#include <linux/export.h> -#include <asm/ctl_reg.h> -#include <asm/irq.h> -#include <asm/cpu_mcf.h> - -/* Per-CPU event structure for the counter facility */ -DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events) = { - .ctr_set = { - [CPUMF_CTR_SET_BASIC] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_USER] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_CRYPTO] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_EXT] = ATOMIC_INIT(0), - [CPUMF_CTR_SET_MT_DIAG] = ATOMIC_INIT(0), - }, - .alert = ATOMIC64_INIT(0), - .state = 0, - .dev_state = 0, - .flags = 0, - .used = 0, - .usedss = 0, - .sets = 0 -}; -/* Indicator whether the CPU-Measurement Counter Facility Support is ready */ -static bool cpum_cf_initalized; - -/* CPU-measurement alerts for the counter facility */ -static void cpumf_measurement_alert(struct ext_code ext_code, - unsigned int alert, unsigned long unused) -{ - struct cpu_cf_events *cpuhw; - - if (!(alert & CPU_MF_INT_CF_MASK)) - return; - - inc_irq_stat(IRQEXT_CMC); - cpuhw = this_cpu_ptr(&cpu_cf_events); - - /* Measurement alerts are shared and might happen when the PMU - * is not reserved. Ignore these alerts in this case. */ - if (!(cpuhw->flags & PMU_F_RESERVED)) - return; - - /* counter authorization change alert */ - if (alert & CPU_MF_INT_CF_CACA) - qctri(&cpuhw->info); - - /* loss of counter data alert */ - if (alert & CPU_MF_INT_CF_LCDA) - pr_err("CPU[%i] Counter data was lost\n", smp_processor_id()); - - /* loss of MT counter data alert */ - if (alert & CPU_MF_INT_CF_MTDA) - pr_warn("CPU[%i] MT counter data was lost\n", - smp_processor_id()); - - /* store alert for special handling by in-kernel users */ - atomic64_or(alert, &cpuhw->alert); -} - -#define PMC_INIT 0 -#define PMC_RELEASE 1 -static void cpum_cf_setup_cpu(void *flags) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - - switch (*((int *) flags)) { - case PMC_INIT: - memset(&cpuhw->info, 0, sizeof(cpuhw->info)); - qctri(&cpuhw->info); - cpuhw->flags |= PMU_F_RESERVED; - break; - - case PMC_RELEASE: - cpuhw->flags &= ~PMU_F_RESERVED; - break; - } - - /* Disable CPU counter sets */ - lcctl(0); -} - -bool kernel_cpumcf_avail(void) -{ - return cpum_cf_initalized; -} -EXPORT_SYMBOL(kernel_cpumcf_avail); - -/* Initialize the CPU-measurement counter facility */ -int __kernel_cpumcf_begin(void) -{ - int flags = PMC_INIT; - - on_each_cpu(cpum_cf_setup_cpu, &flags, 1); - irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT); - - return 0; -} -EXPORT_SYMBOL(__kernel_cpumcf_begin); - -/* Obtain the CPU-measurement alerts for the counter facility */ -unsigned long kernel_cpumcf_alert(int clear) -{ - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); - unsigned long alert; - - alert = atomic64_read(&cpuhw->alert); - if (clear) - atomic64_set(&cpuhw->alert, 0); - - return alert; -} -EXPORT_SYMBOL(kernel_cpumcf_alert); - -/* Release the CPU-measurement counter facility */ -void __kernel_cpumcf_end(void) -{ - int flags = PMC_RELEASE; - - on_each_cpu(cpum_cf_setup_cpu, &flags, 1); - irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT); -} -EXPORT_SYMBOL(__kernel_cpumcf_end); - -static int cpum_cf_setup(unsigned int cpu, int flags) -{ - local_irq_disable(); - cpum_cf_setup_cpu(&flags); - local_irq_enable(); - return 0; -} - -static int cpum_cf_online_cpu(unsigned int cpu) -{ - cpum_cf_setup(cpu, PMC_INIT); - return cfset_online_cpu(cpu); -} - -static int cpum_cf_offline_cpu(unsigned int cpu) -{ - cfset_offline_cpu(cpu); - return cpum_cf_setup(cpu, PMC_RELEASE); -} - -/* Return the maximum possible counter set size (in number of 8 byte counters) - * depending on type and model number. - */ -size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset, - struct cpumf_ctr_info *info) -{ - size_t ctrset_size = 0; - - switch (ctrset) { - case CPUMF_CTR_SET_BASIC: - if (info->cfvn >= 1) - ctrset_size = 6; - break; - case CPUMF_CTR_SET_USER: - if (info->cfvn == 1) - ctrset_size = 6; - else if (info->cfvn >= 3) - ctrset_size = 2; - break; - case CPUMF_CTR_SET_CRYPTO: - if (info->csvn >= 1 && info->csvn <= 5) - ctrset_size = 16; - else if (info->csvn == 6 || info->csvn == 7) - ctrset_size = 20; - break; - case CPUMF_CTR_SET_EXT: - if (info->csvn == 1) - ctrset_size = 32; - else if (info->csvn == 2) - ctrset_size = 48; - else if (info->csvn >= 3 && info->csvn <= 5) - ctrset_size = 128; - else if (info->csvn == 6 || info->csvn == 7) - ctrset_size = 160; - break; - case CPUMF_CTR_SET_MT_DIAG: - if (info->csvn > 3) - ctrset_size = 48; - break; - case CPUMF_CTR_SET_MAX: - break; - } - - return ctrset_size; -} - -static int __init cpum_cf_init(void) -{ - int rc; - - if (!cpum_cf_avail()) - return -ENODEV; - - /* clear bit 15 of cr0 to unauthorize problem-state to - * extract measurement counters */ - ctl_clear_bit(0, 48); - - /* register handler for measurement-alert interruptions */ - rc = register_external_irq(EXT_IRQ_MEASURE_ALERT, - cpumf_measurement_alert); - if (rc) { - pr_err("Registering for CPU-measurement alerts " - "failed with rc=%i\n", rc); - return rc; - } - - rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE, - "perf/s390/cf:online", - cpum_cf_online_cpu, cpum_cf_offline_cpu); - if (!rc) - cpum_cf_initalized = true; - - return rc; -} -early_initcall(cpum_cf_init); diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index ce886a03545a..79904a839fb9 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -22,6 +22,7 @@ #include <asm/irq.h> #include <asm/debug.h> #include <asm/timex.h> +#include <asm-generic/io.h> /* Minimum number of sample-data-block-tables: * At least one table is required for the sampling buffer structure. @@ -99,6 +100,57 @@ static DEFINE_PER_CPU(struct cpu_hw_sf, cpu_hw_sf); /* Debug feature */ static debug_info_t *sfdbg; +/* Sampling control helper functions */ +static inline unsigned long freq_to_sample_rate(struct hws_qsi_info_block *qsi, + unsigned long freq) +{ + return (USEC_PER_SEC / freq) * qsi->cpu_speed; +} + +static inline unsigned long sample_rate_to_freq(struct hws_qsi_info_block *qsi, + unsigned long rate) +{ + return USEC_PER_SEC * qsi->cpu_speed / rate; +} + +/* Return TOD timestamp contained in an trailer entry */ +static inline unsigned long long trailer_timestamp(struct hws_trailer_entry *te) +{ + /* TOD in STCKE format */ + if (te->header.t) + return *((unsigned long long *)&te->timestamp[1]); + + /* TOD in STCK format */ + return *((unsigned long long *)&te->timestamp[0]); +} + +/* Return pointer to trailer entry of an sample data block */ +static inline struct hws_trailer_entry *trailer_entry_ptr(unsigned long v) +{ + void *ret; + + ret = (void *)v; + ret += PAGE_SIZE; + ret -= sizeof(struct hws_trailer_entry); + + return ret; +} + +/* + * Return true if the entry in the sample data block table (sdbt) + * is a link to the next sdbt + */ +static inline int is_link_entry(unsigned long *s) +{ + return *s & 0x1UL ? 1 : 0; +} + +/* Return pointer to the linked sdbt */ +static inline unsigned long *get_next_sdbt(unsigned long *s) +{ + return phys_to_virt(*s & ~0x1UL); +} + /* * sf_disable() - Switch off sampling facility */ @@ -150,7 +202,7 @@ static void free_sampling_buffer(struct sf_buffer *sfb) } else { /* Process SDB pointer */ if (*curr) { - free_page(*curr); + free_page((unsigned long)phys_to_virt(*curr)); curr++; } } @@ -170,11 +222,11 @@ static int alloc_sample_data_block(unsigned long *sdbt, gfp_t gfp_flags) sdb = get_zeroed_page(gfp_flags); if (!sdb) return -ENOMEM; - te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te = trailer_entry_ptr(sdb); te->header.a = 1; /* Link SDB into the sample-data-block-table */ - *sdbt = sdb; + *sdbt = virt_to_phys((void *)sdb); return 0; } @@ -233,7 +285,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, } sfb->num_sdbt++; /* Link current page to tail of chain */ - *tail = (unsigned long)(void *) new + 1; + *tail = virt_to_phys((void *)new) + 1; tail_prev = tail; tail = new; } @@ -263,7 +315,7 @@ static int realloc_sampling_buffer(struct sf_buffer *sfb, } /* Link sampling buffer to its origin */ - *tail = (unsigned long) sfb->sdbt + 1; + *tail = virt_to_phys(sfb->sdbt) + 1; sfb->tail = tail; debug_sprintf_event(sfdbg, 4, "%s: new buffer" @@ -301,7 +353,7 @@ static int alloc_sampling_buffer(struct sf_buffer *sfb, unsigned long num_sdb) * realloc_sampling_buffer() invocation. */ sfb->tail = sfb->sdbt; - *sfb->tail = (unsigned long)(void *) sfb->sdbt + 1; + *sfb->tail = virt_to_phys((void *)sfb->sdbt) + 1; /* Allocate requested number of sample-data-blocks */ rc = realloc_sampling_buffer(sfb, num_sdb, GFP_KERNEL); @@ -557,9 +609,6 @@ static void setup_pmc_cpu(void *flags) if (err) pr_err("Switching off the sampling facility failed " "with rc %i\n", err); - debug_sprintf_event(sfdbg, 5, - "%s: initialized: cpuhw %p\n", __func__, - cpusf); break; case PMC_RELEASE: cpusf->flags &= ~PMU_F_RESERVED; @@ -569,9 +618,6 @@ static void setup_pmc_cpu(void *flags) "with rc %i\n", err); } else deallocate_buffers(cpusf); - debug_sprintf_event(sfdbg, 5, - "%s: released: cpuhw %p\n", __func__, - cpusf); break; } if (err) @@ -672,7 +718,8 @@ static void cpumsf_output_event_pid(struct perf_event *event, /* Protect callchain buffers, tasks */ rcu_read_lock(); - perf_prepare_sample(&header, data, event, regs); + perf_prepare_sample(data, event, regs); + perf_prepare_header(&header, data, event, regs); if (perf_output_begin(&handle, data, event, header.size)) goto out; @@ -1176,8 +1223,8 @@ static void hw_collect_samples(struct perf_event *event, unsigned long *sdbt, struct hws_trailer_entry *te; struct hws_basic_entry *sample; - te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); - sample = (struct hws_basic_entry *) *sdbt; + te = trailer_entry_ptr((unsigned long)sdbt); + sample = (struct hws_basic_entry *)sdbt; while ((unsigned long *) sample < (unsigned long *) te) { /* Check for an empty sample */ if (!sample->def || sample->LS) @@ -1258,7 +1305,7 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) union hws_trailer_header old, prev, new; struct hw_perf_event *hwc = &event->hw; struct hws_trailer_entry *te; - unsigned long *sdbt; + unsigned long *sdbt, sdb; int done; /* @@ -1275,7 +1322,8 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) done = event_overflow = sampl_overflow = num_sdb = 0; while (!done) { /* Get the trailer entry of the sample-data-block */ - te = (struct hws_trailer_entry *) trailer_entry_ptr(*sdbt); + sdb = (unsigned long)phys_to_virt(*sdbt); + te = trailer_entry_ptr(sdb); /* Leave loop if no more work to do (block full indicator) */ if (!te->header.f) { @@ -1293,16 +1341,17 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) sampl_overflow += te->header.overflow; /* Timestamps are valid for full sample-data-blocks only */ - debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx " + debug_sprintf_event(sfdbg, 6, "%s: sdbt %#lx/%#lx " "overflow %llu timestamp %#llx\n", - __func__, (unsigned long)sdbt, te->header.overflow, + __func__, sdb, (unsigned long)sdbt, + te->header.overflow, (te->header.f) ? trailer_timestamp(te) : 0ULL); /* Collect all samples from a single sample-data-block and * flag if an (perf) event overflow happened. If so, the PMU * is stopped and remaining samples will be discarded. */ - hw_collect_samples(event, sdbt, &event_overflow); + hw_collect_samples(event, (unsigned long *)sdb, &event_overflow); num_sdb++; /* Reset trailer (using compare-double-and-swap) */ @@ -1360,10 +1409,26 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all) OVERFLOW_REG(hwc), num_sdb); } -#define AUX_SDB_INDEX(aux, i) ((i) % aux->sfb.num_sdb) -#define AUX_SDB_NUM(aux, start, end) (end >= start ? end - start + 1 : 0) -#define AUX_SDB_NUM_ALERT(aux) AUX_SDB_NUM(aux, aux->head, aux->alert_mark) -#define AUX_SDB_NUM_EMPTY(aux) AUX_SDB_NUM(aux, aux->head, aux->empty_mark) +static inline unsigned long aux_sdb_index(struct aux_buffer *aux, + unsigned long i) +{ + return i % aux->sfb.num_sdb; +} + +static inline unsigned long aux_sdb_num(unsigned long start, unsigned long end) +{ + return end >= start ? end - start + 1 : 0; +} + +static inline unsigned long aux_sdb_num_alert(struct aux_buffer *aux) +{ + return aux_sdb_num(aux->head, aux->alert_mark); +} + +static inline unsigned long aux_sdb_num_empty(struct aux_buffer *aux) +{ + return aux_sdb_num(aux->head, aux->empty_mark); +} /* * Get trailer entry by index of SDB. @@ -1373,9 +1438,9 @@ static struct hws_trailer_entry *aux_sdb_trailer(struct aux_buffer *aux, { unsigned long sdb; - index = AUX_SDB_INDEX(aux, index); + index = aux_sdb_index(aux, index); sdb = aux->sdb_index[index]; - return (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + return trailer_entry_ptr(sdb); } /* @@ -1397,7 +1462,7 @@ static void aux_output_end(struct perf_output_handle *handle) if (!aux) return; - range_scan = AUX_SDB_NUM_ALERT(aux); + range_scan = aux_sdb_num_alert(aux); for (i = 0, idx = aux->head; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); if (!te->header.f) @@ -1427,9 +1492,7 @@ static int aux_output_begin(struct perf_output_handle *handle, struct aux_buffer *aux, struct cpu_hw_sf *cpuhw) { - unsigned long range; - unsigned long i, range_scan, idx; - unsigned long head, base, offset; + unsigned long range, i, range_scan, idx, head, base, offset; struct hws_trailer_entry *te; if (WARN_ON_ONCE(handle->head & ~PAGE_MASK)) @@ -1448,8 +1511,8 @@ static int aux_output_begin(struct perf_output_handle *handle, "%s: range %ld head %ld alert %ld empty %ld\n", __func__, range, aux->head, aux->alert_mark, aux->empty_mark); - if (range > AUX_SDB_NUM_EMPTY(aux)) { - range_scan = range - AUX_SDB_NUM_EMPTY(aux); + if (range > aux_sdb_num_empty(aux)) { + range_scan = range - aux_sdb_num_empty(aux); idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); @@ -1467,11 +1530,11 @@ static int aux_output_begin(struct perf_output_handle *handle, te->header.a = 1; /* Reset hardware buffer head */ - head = AUX_SDB_INDEX(aux, aux->head); + head = aux_sdb_index(aux, aux->head); base = aux->sdbt_index[head / CPUM_SF_SDB_PER_TABLE]; offset = head % CPUM_SF_SDB_PER_TABLE; - cpuhw->lsctl.tear = base + offset * sizeof(unsigned long); - cpuhw->lsctl.dear = aux->sdb_index[head]; + cpuhw->lsctl.tear = virt_to_phys((void *)base) + offset * sizeof(unsigned long); + cpuhw->lsctl.dear = virt_to_phys((void *)aux->sdb_index[head]); debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld empty %ld " "index %ld tear %#lx dear %#lx\n", __func__, @@ -1549,7 +1612,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, debug_sprintf_event(sfdbg, 6, "%s: range %ld head %ld alert %ld " "empty %ld\n", __func__, range, aux->head, aux->alert_mark, aux->empty_mark); - if (range <= AUX_SDB_NUM_EMPTY(aux)) + if (range <= aux_sdb_num_empty(aux)) /* * No need to scan. All SDBs in range are marked as empty. * Just set alert indicator. Should check race with hardware @@ -1570,7 +1633,7 @@ static bool aux_reset_buffer(struct aux_buffer *aux, unsigned long range, * Start scanning from one SDB behind empty_mark. If the new alert * indicator fall into this range, set it. */ - range_scan = range - AUX_SDB_NUM_EMPTY(aux); + range_scan = range - aux_sdb_num_empty(aux); idx_old = idx = aux->empty_mark + 1; for (i = 0; i < range_scan; i++, idx++) { te = aux_sdb_trailer(aux, idx); @@ -1617,7 +1680,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) return; /* Inform user space new data arrived */ - size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + size = aux_sdb_num_alert(aux) << PAGE_SHIFT; debug_sprintf_event(sfdbg, 6, "%s: #alert %ld\n", __func__, size >> PAGE_SHIFT); perf_aux_output_end(handle, size); @@ -1659,7 +1722,7 @@ static void hw_collect_aux(struct cpu_hw_sf *cpuhw) "overflow %lld\n", __func__, aux->head, range, overflow); } else { - size = AUX_SDB_NUM_ALERT(aux) << PAGE_SHIFT; + size = aux_sdb_num_alert(aux) << PAGE_SHIFT; perf_aux_output_end(&cpuhw->handle, size); debug_sprintf_event(sfdbg, 6, "%s: head %ld alert %ld " "already full, try another\n", @@ -1701,7 +1764,7 @@ static void aux_sdb_init(unsigned long sdb) { struct hws_trailer_entry *te; - te = (struct hws_trailer_entry *)trailer_entry_ptr(sdb); + te = trailer_entry_ptr(sdb); /* Save clock base */ te->clock_base = 1; @@ -1781,18 +1844,18 @@ static void *aux_buffer_setup(struct perf_event *event, void **pages, goto no_sdbt; aux->sdbt_index[sfb->num_sdbt++] = (unsigned long)new; /* Link current page to tail of chain */ - *tail = (unsigned long)(void *) new + 1; + *tail = virt_to_phys(new) + 1; tail = new; } /* Tail is the entry in a SDBT */ - *tail = (unsigned long)pages[i]; + *tail = virt_to_phys(pages[i]); aux->sdb_index[i] = (unsigned long)pages[i]; aux_sdb_init((unsigned long)pages[i]); } sfb->num_sdb = nr_pages; /* Link the last entry in the SDBT to the first SDBT */ - *tail = (unsigned long) sfb->sdbt + 1; + *tail = virt_to_phys(sfb->sdbt) + 1; sfb->tail = tail; /* @@ -1932,7 +1995,7 @@ static int cpumsf_pmu_add(struct perf_event *event, int flags) cpuhw->lsctl.h = 1; cpuhw->lsctl.interval = SAMPL_RATE(&event->hw); if (!SAMPL_DIAG_MODE(&event->hw)) { - cpuhw->lsctl.tear = (unsigned long) cpuhw->sfb.sdbt; + cpuhw->lsctl.tear = virt_to_phys(cpuhw->sfb.sdbt); cpuhw->lsctl.dear = *(unsigned long *) cpuhw->sfb.sdbt; TEAR_REG(&event->hw) = (unsigned long) cpuhw->sfb.sdbt; } diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 985e243a2ed8..a7b339c4fd7c 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -362,9 +362,7 @@ static int paicrypt_push_sample(void) if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - raw.size = raw.frag.size; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 1138f57baae3..fcea307d7529 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -16,8 +16,8 @@ #include <linux/init.h> #include <linux/export.h> #include <linux/io.h> +#include <linux/perf_event.h> -#include <asm/cpu_mcf.h> #include <asm/ctl_reg.h> #include <asm/pai.h> #include <asm/debug.h> @@ -451,9 +451,7 @@ static int paiext_push_sample(void) if (event->attr.sample_type & PERF_SAMPLE_RAW) { raw.frag.size = rawsize; raw.frag.data = cpump->save; - raw.size = raw.frag.size; - data.raw = &raw; - data.sample_flags |= PERF_SAMPLE_RAW; + perf_sample_save_raw_data(&data, &raw); } overflow = perf_event_overflow(event, &data, ®s); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 3f5d2db0b854..67df64ef4839 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -147,8 +147,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(args->fn)) { /* kernel thread */ memset(&frame->childregs, 0, sizeof(struct pt_regs)); - frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT | - PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK; + frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO | + PSW_MASK_EXT | PSW_MASK_MCHECK; frame->childregs.psw.addr = (unsigned long)__ret_from_fork; frame->childregs.gprs[9] = (unsigned long)args->fn; diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 53e0209229f8..cf9659e13f03 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -990,7 +990,7 @@ static int s390_vxrs_low_get(struct task_struct *target, if (target == current) save_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); + vxrs[i] = target->thread.fpu.vxrs[i].low; return membuf_write(&to, vxrs, sizeof(vxrs)); } @@ -1008,12 +1008,12 @@ static int s390_vxrs_low_set(struct task_struct *target, save_fpu_regs(); for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(target->thread.fpu.vxrs + i) + 1); + vxrs[i] = target->thread.fpu.vxrs[i].low; rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1); if (rc == 0) for (i = 0; i < __NUM_VXRS_LOW; i++) - *((__u64 *)(target->thread.fpu.vxrs + i) + 1) = vxrs[i]; + target->thread.fpu.vxrs[i].low = vxrs[i]; return rc; } diff --git a/arch/s390/kernel/rethook.c b/arch/s390/kernel/rethook.c new file mode 100644 index 000000000000..af10e6bdd34e --- /dev/null +++ b/arch/s390/kernel/rethook.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +#include <linux/rethook.h> +#include <linux/kprobes.h> +#include "rethook.h" + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + rh->ret_addr = regs->gprs[14]; + rh->frame = regs->gprs[15]; + + /* Replace the return addr with trampoline addr */ + regs->gprs[14] = (unsigned long)&arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); + +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr) +{ + /* Replace fake return address with real one. */ + regs->gprs[14] = correct_ret_addr; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +/* + * Called from arch_rethook_trampoline + */ +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + return rethook_trampoline_handler(regs, regs->gprs[15]); +} +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +/* assembler function that handles the rethook must not be probed itself */ +NOKPROBE_SYMBOL(arch_rethook_trampoline); diff --git a/arch/s390/kernel/rethook.h b/arch/s390/kernel/rethook.h new file mode 100644 index 000000000000..32f069eed3f3 --- /dev/null +++ b/arch/s390/kernel/rethook.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __S390_RETHOOK_H +#define __S390_RETHOOK_H + +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs); + +#endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 696c9e007a36..8ec5cdf9dadc 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -149,6 +149,9 @@ int __bootdata(noexec_disabled); unsigned long __bootdata(ident_map_size); struct mem_detect_info __bootdata(mem_detect); struct initrd_data __bootdata(initrd_data); +unsigned long __bootdata(pgalloc_pos); +unsigned long __bootdata(pgalloc_end); +unsigned long __bootdata(pgalloc_low); unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata(__amode31_base); @@ -411,15 +414,10 @@ void __init arch_call_rest_init(void) call_on_stack_noreturn(rest_init, stack); } -static void __init setup_lowcore_dat_off(void) +static void __init setup_lowcore(void) { - unsigned long int_psw_mask = PSW_KERNEL_BITS; - struct lowcore *abs_lc, *lc; + struct lowcore *lc, *abs_lc; unsigned long mcck_stack; - unsigned long flags; - - if (IS_ENABLED(CONFIG_KASAN)) - int_psw_mask |= PSW_MASK_DAT; /* * Setup lowcore for boot cpu @@ -430,17 +428,17 @@ static void __init setup_lowcore_dat_off(void) panic("%s: Failed to allocate %zu bytes align=%zx\n", __func__, sizeof(*lc), sizeof(*lc)); - lc->restart_psw.mask = PSW_KERNEL_BITS; - lc->restart_psw.addr = (unsigned long) restart_int_handler; - lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; + lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; + lc->restart_psw.addr = __pa(restart_int_handler); + lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->external_new_psw.addr = (unsigned long) ext_int_handler; - lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; + lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->svc_new_psw.addr = (unsigned long) system_call; - lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; + lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->program_new_psw.addr = (unsigned long) pgm_check_handler; - lc->mcck_new_psw.mask = int_psw_mask; + lc->mcck_new_psw.mask = PSW_KERNEL_BITS; lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler; - lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK; + lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK; lc->io_new_psw.addr = (unsigned long) io_int_handler; lc->clock_comparator = clock_comparator_max; lc->nodat_stack = ((unsigned long) &init_thread_union) @@ -477,15 +475,7 @@ static void __init setup_lowcore_dat_off(void) lc->restart_fn = (unsigned long) do_restart; lc->restart_data = 0; lc->restart_source = -1U; - - abs_lc = get_abs_lowcore(&flags); - abs_lc->restart_stack = lc->restart_stack; - abs_lc->restart_fn = lc->restart_fn; - abs_lc->restart_data = lc->restart_data; - abs_lc->restart_source = lc->restart_source; - abs_lc->restart_psw = lc->restart_psw; - abs_lc->mcesad = lc->mcesad; - put_abs_lowcore(abs_lc, flags); + __ctl_store(lc->cregs_save_area, 0, 15); mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE); if (!mcck_stack) @@ -499,34 +489,25 @@ static void __init setup_lowcore_dat_off(void) lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW); lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW); lc->preempt_count = PREEMPT_DISABLED; + lc->kernel_asce = S390_lowcore.kernel_asce; + lc->user_asce = S390_lowcore.user_asce; + + abs_lc = get_abs_lowcore(); + abs_lc->restart_stack = lc->restart_stack; + abs_lc->restart_fn = lc->restart_fn; + abs_lc->restart_data = lc->restart_data; + abs_lc->restart_source = lc->restart_source; + abs_lc->restart_psw = lc->restart_psw; + abs_lc->restart_flags = RESTART_FLAG_CTLREGS; + memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area)); + abs_lc->program_new_psw = lc->program_new_psw; + abs_lc->mcesad = lc->mcesad; + put_abs_lowcore(abs_lc); set_prefix(__pa(lc)); lowcore_ptr[0] = lc; -} - -static void __init setup_lowcore_dat_on(void) -{ - struct lowcore *abs_lc; - unsigned long flags; - int i; - - __ctl_clear_bit(0, 28); - S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT; - S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT; - S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT; - S390_lowcore.mcck_new_psw.mask |= PSW_MASK_DAT; - S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT; - __ctl_set_bit(0, 28); - __ctl_store(S390_lowcore.cregs_save_area, 0, 15); - if (abs_lowcore_map(0, lowcore_ptr[0], true)) + if (abs_lowcore_map(0, lowcore_ptr[0], false)) panic("Couldn't setup absolute lowcore"); - abs_lowcore_mapped = true; - abs_lc = get_abs_lowcore(&flags); - abs_lc->restart_flags = RESTART_FLAG_CTLREGS; - abs_lc->program_new_psw = S390_lowcore.program_new_psw; - for (i = 0; i < 16; i++) - abs_lc->cregs_save_area[i] = S390_lowcore.cregs_save_area[i]; - put_abs_lowcore(abs_lc, flags); } static struct resource code_resource = { @@ -619,7 +600,6 @@ static void __init setup_resources(void) static void __init setup_memory_end(void) { - memblock_remove(ident_map_size, PHYS_ADDR_MAX - ident_map_size); max_pfn = max_low_pfn = PFN_DOWN(ident_map_size); pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20); } @@ -651,6 +631,14 @@ static struct notifier_block kdump_mem_nb = { #endif /* + * Reserve page tables created by decompressor + */ +static void __init reserve_pgtables(void) +{ + memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos); +} + +/* * Reserve memory for kdump kernel to be loaded with kexec */ static void __init reserve_crashkernel(void) @@ -784,10 +772,10 @@ static void __init memblock_add_mem_detect_info(void) get_mem_info_source(), mem_detect.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); - for_each_mem_detect_block(i, &start, &end) { + for_each_mem_detect_usable_block(i, &start, &end) memblock_add(start, end - start); + for_each_mem_detect_block(i, &start, &end) memblock_physmem_add(start, end - start); - } memblock_set_bottom_up(false); memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); } @@ -1005,6 +993,7 @@ void __init setup_arch(char **cmdline_p) setup_control_program_code(); /* Do some memory reservations *before* memory is added to memblock */ + reserve_pgtables(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); @@ -1039,7 +1028,7 @@ void __init setup_arch(char **cmdline_p) #endif setup_resources(); - setup_lowcore_dat_off(); + setup_lowcore(); smp_fill_possible_mask(); cpu_detect_mhz_feature(); cpu_init(); @@ -1051,15 +1040,14 @@ void __init setup_arch(char **cmdline_p) static_branch_enable(&cpu_has_bear); /* - * Create kernel page tables and switch to virtual addressing. + * Create kernel page tables. */ paging_init(); - memcpy_real_init(); + /* * After paging_init created the kernel page table, the new PSWs * in lowcore can now run with DAT enabled. */ - setup_lowcore_dat_on(); #ifdef CONFIG_CRASH_DUMP smp_save_dump_ipl_cpu(); #endif diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 38258f817048..d63557d3868c 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -184,7 +184,7 @@ static int save_sigregs_ext(struct pt_regs *regs, /* Save vector registers to signal stack */ if (MACHINE_HAS_VX) { for (i = 0; i < __NUM_VXRS_LOW; i++) - vxrs[i] = *((__u64 *)(current->thread.fpu.vxrs + i) + 1); + vxrs[i] = current->thread.fpu.vxrs[i].low; if (__copy_to_user(&sregs_ext->vxrs_low, vxrs, sizeof(sregs_ext->vxrs_low)) || __copy_to_user(&sregs_ext->vxrs_high, @@ -210,7 +210,7 @@ static int restore_sigregs_ext(struct pt_regs *regs, sizeof(sregs_ext->vxrs_high))) return -EFAULT; for (i = 0; i < __NUM_VXRS_LOW; i++) - *((__u64 *)(current->thread.fpu.vxrs + i) + 1) = vxrs[i]; + current->thread.fpu.vxrs[i].low = vxrs[i]; } return 0; } diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 0031325ce4bc..23c427284773 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -323,11 +323,10 @@ static void pcpu_delegate(struct pcpu *pcpu, { struct lowcore *lc, *abs_lc; unsigned int source_cpu; - unsigned long flags; lc = lowcore_ptr[pcpu - pcpu_devices]; source_cpu = stap(); - __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); + if (pcpu->address == source_cpu) { call_on_stack(2, stack, void, __pcpu_delegate, pcpu_delegate_fn *, func, void *, data); @@ -341,12 +340,12 @@ static void pcpu_delegate(struct pcpu *pcpu, lc->restart_data = (unsigned long)data; lc->restart_source = source_cpu; } else { - abs_lc = get_abs_lowcore(&flags); + abs_lc = get_abs_lowcore(); abs_lc->restart_stack = stack; abs_lc->restart_fn = (unsigned long)func; abs_lc->restart_data = (unsigned long)data; abs_lc->restart_source = source_cpu; - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(abs_lc); } __bpon(); asm volatile( @@ -488,7 +487,7 @@ void smp_send_stop(void) int cpu; /* Disable all interrupts/machine checks */ - __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT); + __load_psw_mask(PSW_KERNEL_BITS); trace_hardirqs_off(); debug_set_critical(); @@ -593,7 +592,6 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set) { struct ec_creg_mask_parms parms = { .cr = cr, }; struct lowcore *abs_lc; - unsigned long flags; u64 ctlreg; if (set) { @@ -604,11 +602,11 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set) parms.andval = ~(1UL << bit); } spin_lock(&ctl_lock); - abs_lc = get_abs_lowcore(&flags); + abs_lc = get_abs_lowcore(); ctlreg = abs_lc->cregs_save_area[cr]; ctlreg = (ctlreg & parms.andval) | parms.orval; abs_lc->cregs_save_area[cr] = ctlreg; - put_abs_lowcore(abs_lc, flags); + put_abs_lowcore(abs_lc); spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); } diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 7ee455e8e3d5..0787010139f7 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -40,12 +40,12 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry, if (!addr) return -EINVAL; -#ifdef CONFIG_KPROBES +#ifdef CONFIG_RETHOOK /* - * Mark stacktraces with kretprobed functions on them + * Mark stacktraces with krethook functions on them * as unreliable. */ - if (state.ip == (unsigned long)__kretprobe_trampoline) + if (state.ip == (unsigned long)arch_rethook_trampoline) return -EINVAL; #endif diff --git a/arch/s390/kernel/text_amode31.S b/arch/s390/kernel/text_amode31.S index 2c8b14cc5556..e0f01ce251f5 100644 --- a/arch/s390/kernel/text_amode31.S +++ b/arch/s390/kernel/text_amode31.S @@ -63,6 +63,19 @@ ENTRY(_diag210_amode31) ENDPROC(_diag210_amode31) /* + * int diag8c(struct diag8c *addr, struct ccw_dev_id *devno, size_t len) +*/ +ENTRY(_diag8c_amode31) + llgf %r3,0(%r3) + sam31 + diag %r2,%r4,0x8c +.Ldiag8c_ex: + sam64 + lgfr %r2,%r3 + BR_EX_AMODE31_r14 + EX_TABLE_AMODE31(.Ldiag8c_ex, .Ldiag8c_ex) +ENDPROC(_diag8c_amode31) +/* * int _diag26c_amode31(void *req, void *resp, enum diag26c_sc subcode) */ ENTRY(_diag26c_amode31) diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index cbf9c1b0beda..b653ba8d51e6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -44,7 +44,6 @@ SECTIONS HEAD_TEXT TEXT_TEXT SCHED_TEXT - CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT IRQENTRY_TEXT @@ -217,6 +216,9 @@ SECTIONS QUAD(__rela_dyn_start) /* rela_dyn_start */ QUAD(__rela_dyn_end) /* rela_dyn_end */ QUAD(_eamode31 - _samode31) /* amode31_size */ + QUAD(init_mm) + QUAD(swapper_pg_dir) + QUAD(invalid_pg_dir) } :NONE /* Debugging sections. */ @@ -228,5 +230,6 @@ SECTIONS DISCARDS /DISCARD/ : { *(.eh_frame) + *(.interp) } } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 9436f3053b88..e0a88dcaf5cb 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -7,13 +7,13 @@ */ #include <linux/kernel_stat.h> -#include <linux/sched/cputime.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/timex.h> #include <linux/types.h> #include <linux/time.h> #include <asm/alternative.h> +#include <asm/cputime.h> #include <asm/vtimer.h> #include <asm/vtime.h> #include <asm/cpu_mf.h> |