summaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel/perf_cpum_cf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 15:09:17 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-21 15:09:17 -0800
commitbcf5470eb4a13e5670fefb21525b43ef385c6fc6 (patch)
tree33395b005d3642873f385a628b7b7385608739d1 /arch/s390/kernel/perf_cpum_cf.c
parent877934769e5b91798d304d4641647900ee614ce8 (diff)
parent6472a2dcc4274452bb46fb5a0d968a1c1ed772ee (diff)
downloadlinux-stable-bcf5470eb4a13e5670fefb21525b43ef385c6fc6.tar.gz
linux-stable-bcf5470eb4a13e5670fefb21525b43ef385c6fc6.tar.bz2
linux-stable-bcf5470eb4a13e5670fefb21525b43ef385c6fc6.zip
Merge tag 's390-6.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
Pull s390 updates from Heiko Carstens: - Large cleanup of the con3270/tty3270 driver. Among others this fixes: - Background Color Support - ASCII Line Character Support - VT100 Support - Geometries other than 80x24 - Cleanup and improve cmpxchg() code. Also add cmpxchg_user_key() to uaccess functions, which will be used by KVM to access KVM guest memory with a specific storage key - Add support for user space events counting to CPUMF - Cleanup the vfio/ccw code, which also allows now to properly support 2K Format-2 IDALs - Move kernel page table allocation and initialization to decompressor, which finally allows to enter the kernel with dynamic address translation enabled. This in turn allows to get rid of code with special handling in the kernel, which has to distinguish if DAT is on or off - Replace kretprobe with rethook - Various improvements to vfio/ap queue resets: - Use TAPQ to verify completion of a reset in progress rather than multiple invocations of ZAPQ. - Check TAPQ response codes when verifying successful completion of ZAPQ. - Fix erroneous handling of some error response codes. - Increase the maximum amount of time to wait for successful completion of ZAPQ - Rework system call wrappers to get rid of alias functions, which were only left on s390 - Cleanup diag288_wdt watchdog driver. It has been agreed on with Guenter Roeck that this goes upstream via the s390 tree - Add missing loadparm parameter handling for list-directed ECKD ipl/reipl - Various improvements to memory detection code - Remove arch_cpu_idle_time() since the current implementation is broken, and allows user space observable accounted idle times which can temporarily decrease - Add Reset DAT-Protection support: (only) allow to change PTEs from RO to RW with a new RDP instruction. Unlike the currently used IPTE instruction, this does not necessarily guarantee that TLBs of all CPUs are synchronously flushed; and that remote CPUs can see spurious protection faults. The overall improvement for not requiring an all CPU synchronization, like it is required with IPTE, should be beneficial - Fix KFENCE page fault reporting - Smaller cleanups and improvement all over the place * tag 's390-6.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (182 commits) s390/irq,idle: simplify idle check s390/processor: add test_and_set_cpu_flag() and test_and_clear_cpu_flag() s390/processor: let cpu helper functions return boolean values s390/kfence: fix page fault reporting s390/zcrypt: introduce ctfm field in struct CPRBX s390: remove confusing comment from uapi types header file vfio/ccw: remove WARN_ON during shutdown s390/entry: remove toolchain dependent micro-optimization s390/mem_detect: do not truncate online memory ranges info s390/vx: remove __uint128_t type from __vector128 struct again s390/mm: add support for RDP (Reset DAT-Protection) s390/mm: define private VM_FAULT_* reasons from top bits Documentation: s390: correct spelling s390/ap: fix status returned by ap_qact() s390/ap: fix status returned by ap_aqic() s390: vfio-ap: tighten the NIB validity check Revert "s390/mem_detect: do not update output parameters on failure" s390/idle: remove arch_cpu_idle_time() and corresponding code s390/vx: use simple assignments to access __vector128 members s390/vx: add 64 and 128 bit members to __vector128 struct ...
Diffstat (limited to 'arch/s390/kernel/perf_cpum_cf.c')
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c308
1 files changed, 283 insertions, 25 deletions
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index aa38649c7c27..c9ab971498d6 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,7 +2,7 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012, 2021
+ * Copyright IBM Corp. 2012, 2023
* Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
* Thomas Richter <tmricht@linux.ibm.com>
*/
@@ -16,11 +16,82 @@
#include <linux/init.h>
#include <linux/export.h>
#include <linux/miscdevice.h>
+#include <linux/perf_event.h>
-#include <asm/cpu_mcf.h>
+#include <asm/cpu_mf.h>
#include <asm/hwctrset.h>
#include <asm/debug.h>
+enum cpumf_ctr_set {
+ CPUMF_CTR_SET_BASIC = 0, /* Basic Counter Set */
+ CPUMF_CTR_SET_USER = 1, /* Problem-State Counter Set */
+ CPUMF_CTR_SET_CRYPTO = 2, /* Crypto-Activity Counter Set */
+ CPUMF_CTR_SET_EXT = 3, /* Extended Counter Set */
+ CPUMF_CTR_SET_MT_DIAG = 4, /* MT-diagnostic Counter Set */
+
+ /* Maximum number of counter sets */
+ CPUMF_CTR_SET_MAX,
+};
+
+#define CPUMF_LCCTL_ENABLE_SHIFT 16
+#define CPUMF_LCCTL_ACTCTL_SHIFT 0
+
+static inline void ctr_set_enable(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ENABLE_SHIFT;
+}
+
+static inline void ctr_set_disable(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ENABLE_SHIFT);
+}
+
+static inline void ctr_set_start(u64 *state, u64 ctrsets)
+{
+ *state |= ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT;
+}
+
+static inline void ctr_set_stop(u64 *state, u64 ctrsets)
+{
+ *state &= ~(ctrsets << CPUMF_LCCTL_ACTCTL_SHIFT);
+}
+
+static inline int ctr_stcctm(enum cpumf_ctr_set set, u64 range, u64 *dest)
+{
+ switch (set) {
+ case CPUMF_CTR_SET_BASIC:
+ return stcctm(BASIC, range, dest);
+ case CPUMF_CTR_SET_USER:
+ return stcctm(PROBLEM_STATE, range, dest);
+ case CPUMF_CTR_SET_CRYPTO:
+ return stcctm(CRYPTO_ACTIVITY, range, dest);
+ case CPUMF_CTR_SET_EXT:
+ return stcctm(EXTENDED, range, dest);
+ case CPUMF_CTR_SET_MT_DIAG:
+ return stcctm(MT_DIAG_CLEARING, range, dest);
+ case CPUMF_CTR_SET_MAX:
+ return 3;
+ }
+ return 3;
+}
+
+struct cpu_cf_events {
+ struct cpumf_ctr_info info;
+ atomic_t ctr_set[CPUMF_CTR_SET_MAX];
+ u64 state; /* For perf_event_open SVC */
+ u64 dev_state; /* For /dev/hwctr */
+ unsigned int flags;
+ size_t used; /* Bytes used in data */
+ size_t usedss; /* Bytes used in start/stop */
+ unsigned char start[PAGE_SIZE]; /* Counter set at event add */
+ unsigned char stop[PAGE_SIZE]; /* Counter set at event delete */
+ unsigned char data[PAGE_SIZE]; /* Counter set at /dev/hwctr */
+ unsigned int sets; /* # Counter set saved in memory */
+};
+
+/* Per-CPU event structure for the counter facility */
+static DEFINE_PER_CPU(struct cpu_cf_events, cpu_cf_events);
+
static unsigned int cfdiag_cpu_speed; /* CPU speed for CF_DIAG trailer */
static debug_info_t *cf_dbg;
@@ -112,6 +183,53 @@ static void cfdiag_trailer(struct cf_trailer_entry *te)
te->timestamp = get_tod_clock_fast();
}
+/*
+ * Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+static size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
+ struct cpumf_ctr_info *info)
+{
+ size_t ctrset_size = 0;
+
+ switch (ctrset) {
+ case CPUMF_CTR_SET_BASIC:
+ if (info->cfvn >= 1)
+ ctrset_size = 6;
+ break;
+ case CPUMF_CTR_SET_USER:
+ if (info->cfvn == 1)
+ ctrset_size = 6;
+ else if (info->cfvn >= 3)
+ ctrset_size = 2;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ if (info->csvn >= 1 && info->csvn <= 5)
+ ctrset_size = 16;
+ else if (info->csvn == 6 || info->csvn == 7)
+ ctrset_size = 20;
+ break;
+ case CPUMF_CTR_SET_EXT:
+ if (info->csvn == 1)
+ ctrset_size = 32;
+ else if (info->csvn == 2)
+ ctrset_size = 48;
+ else if (info->csvn >= 3 && info->csvn <= 5)
+ ctrset_size = 128;
+ else if (info->csvn == 6 || info->csvn == 7)
+ ctrset_size = 160;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (info->csvn > 3)
+ ctrset_size = 48;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ break;
+ }
+
+ return ctrset_size;
+}
+
/* Read a counter set. The counter set number determines the counter set and
* the CPUM-CF first and second version number determine the number of
* available counters in each counter set.
@@ -388,6 +506,47 @@ static void cpumf_pmu_disable(struct pmu *pmu)
cpuhw->flags &= ~PMU_F_ENABLED;
}
+#define PMC_INIT 0UL
+#define PMC_RELEASE 1UL
+
+static void cpum_cf_setup_cpu(void *flags)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+ switch ((unsigned long)flags) {
+ case PMC_INIT:
+ memset(&cpuhw->info, 0, sizeof(cpuhw->info));
+ qctri(&cpuhw->info);
+ cpuhw->flags |= PMU_F_RESERVED;
+ break;
+
+ case PMC_RELEASE:
+ cpuhw->flags &= ~PMU_F_RESERVED;
+ break;
+ }
+
+ /* Disable CPU counter sets */
+ lcctl(0);
+ debug_sprintf_event(cf_dbg, 5, "%s flags %#x flags %#x state %#llx\n",
+ __func__, *(int *)flags, cpuhw->flags,
+ cpuhw->state);
+}
+
+/* Initialize the CPU-measurement counter facility */
+static int __kernel_cpumcf_begin(void)
+{
+ on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_INIT, 1);
+ irq_subclass_register(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+
+ return 0;
+}
+
+/* Release the CPU-measurement counter facility */
+static void __kernel_cpumcf_end(void)
+{
+ on_each_cpu(cpum_cf_setup_cpu, (void *)PMC_RELEASE, 1);
+ irq_subclass_unregister(IRQ_SUBCLASS_MEASUREMENT_ALERT);
+}
/* Number of perf events counting hardware events */
static atomic_t num_events = ATOMIC_INIT(0);
@@ -397,12 +556,10 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
/* Release the PMU if event is the last perf event */
static void hw_perf_event_destroy(struct perf_event *event)
{
- if (!atomic_add_unless(&num_events, -1, 1)) {
- mutex_lock(&pmc_reserve_mutex);
- if (atomic_dec_return(&num_events) == 0)
- __kernel_cpumcf_end();
- mutex_unlock(&pmc_reserve_mutex);
- }
+ mutex_lock(&pmc_reserve_mutex);
+ if (atomic_dec_return(&num_events) == 0)
+ __kernel_cpumcf_end();
+ mutex_unlock(&pmc_reserve_mutex);
}
/* CPUMF <-> perf event mappings for kernel+userspace (basic set) */
@@ -434,6 +591,12 @@ static void cpumf_hw_inuse(void)
mutex_unlock(&pmc_reserve_mutex);
}
+static int is_userspace_event(u64 ev)
+{
+ return cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev;
+}
+
static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
{
struct perf_event_attr *attr = &event->attr;
@@ -456,19 +619,26 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
if (is_sampling_event(event)) /* No sampling support */
return -ENOENT;
ev = attr->config;
- /* Count user space (problem-state) only */
if (!attr->exclude_user && attr->exclude_kernel) {
- if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_user[ev];
-
- /* No support for kernel space counters only */
+ /*
+ * Count user space (problem-state) only
+ * Handle events 32 and 33 as 0:u and 1:u
+ */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_user))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_user[ev];
+ }
} else if (!attr->exclude_kernel && attr->exclude_user) {
+ /* No support for kernel space counters only */
return -EOPNOTSUPP;
- } else { /* Count user and kernel space */
- if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
- return -EOPNOTSUPP;
- ev = cpumf_generic_events_basic[ev];
+ } else {
+ /* Count user and kernel space, incl. events 32 + 33 */
+ if (!is_userspace_event(ev)) {
+ if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
+ return -EOPNOTSUPP;
+ ev = cpumf_generic_events_basic[ev];
+ }
}
break;
@@ -761,31 +931,120 @@ static struct pmu cpumf_pmu = {
.read = cpumf_pmu_read,
};
+static int cpum_cf_setup(unsigned int cpu, unsigned long flags)
+{
+ local_irq_disable();
+ cpum_cf_setup_cpu((void *)flags);
+ local_irq_enable();
+ return 0;
+}
+
+static int cfset_online_cpu(unsigned int cpu);
+static int cpum_cf_online_cpu(unsigned int cpu)
+{
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d in_irq %ld\n", __func__,
+ cpu, in_interrupt());
+ cpum_cf_setup(cpu, PMC_INIT);
+ return cfset_online_cpu(cpu);
+}
+
+static int cfset_offline_cpu(unsigned int cpu);
+static int cpum_cf_offline_cpu(unsigned int cpu)
+{
+ debug_sprintf_event(cf_dbg, 4, "%s cpu %d\n", __func__, cpu);
+ cfset_offline_cpu(cpu);
+ return cpum_cf_setup(cpu, PMC_RELEASE);
+}
+
+/* Return true if store counter set multiple instruction is available */
+static inline int stccm_avail(void)
+{
+ return test_facility(142);
+}
+
+/* CPU-measurement alerts for the counter facility */
+static void cpumf_measurement_alert(struct ext_code ext_code,
+ unsigned int alert, unsigned long unused)
+{
+ struct cpu_cf_events *cpuhw;
+
+ if (!(alert & CPU_MF_INT_CF_MASK))
+ return;
+
+ inc_irq_stat(IRQEXT_CMC);
+ cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+ /*
+ * Measurement alerts are shared and might happen when the PMU
+ * is not reserved. Ignore these alerts in this case.
+ */
+ if (!(cpuhw->flags & PMU_F_RESERVED))
+ return;
+
+ /* counter authorization change alert */
+ if (alert & CPU_MF_INT_CF_CACA)
+ qctri(&cpuhw->info);
+
+ /* loss of counter data alert */
+ if (alert & CPU_MF_INT_CF_LCDA)
+ pr_err("CPU[%i] Counter data was lost\n", smp_processor_id());
+
+ /* loss of MT counter data alert */
+ if (alert & CPU_MF_INT_CF_MTDA)
+ pr_warn("CPU[%i] MT counter data was lost\n",
+ smp_processor_id());
+}
+
static int cfset_init(void);
static int __init cpumf_pmu_init(void)
{
int rc;
- if (!kernel_cpumcf_avail())
+ if (!cpum_cf_avail())
return -ENODEV;
+ /*
+ * Clear bit 15 of cr0 to unauthorize problem-state to
+ * extract measurement counters
+ */
+ ctl_clear_bit(0, 48);
+
+ /* register handler for measurement-alert interruptions */
+ rc = register_external_irq(EXT_IRQ_MEASURE_ALERT,
+ cpumf_measurement_alert);
+ if (rc) {
+ pr_err("Registering for CPU-measurement alerts failed with rc=%i\n", rc);
+ return rc;
+ }
+
/* Setup s390dbf facility */
cf_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
if (!cf_dbg) {
pr_err("Registration of s390dbf(cpum_cf) failed\n");
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out1;
}
debug_register_view(cf_dbg, &debug_sprintf_view);
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", -1);
if (rc) {
- debug_unregister_view(cf_dbg, &debug_sprintf_view);
- debug_unregister(cf_dbg);
pr_err("Registering the cpum_cf PMU failed with rc=%i\n", rc);
+ goto out2;
} else if (stccm_avail()) { /* Setup counter set device */
cfset_init();
}
+
+ rc = cpuhp_setup_state(CPUHP_AP_PERF_S390_CF_ONLINE,
+ "perf/s390/cf:online",
+ cpum_cf_online_cpu, cpum_cf_offline_cpu);
+ return rc;
+
+out2:
+ debug_unregister_view(cf_dbg, &debug_sprintf_view);
+ debug_unregister(cf_dbg);
+out1:
+ unregister_external_irq(EXT_IRQ_MEASURE_ALERT, cpumf_measurement_alert);
return rc;
}
@@ -1003,7 +1262,6 @@ static int cfset_all_start(struct cfset_request *req)
return rc;
}
-
/* Return the maximum required space for all possible CPUs in case one
* CPU will be onlined during the START, READ, STOP cycles.
* To find out the size of the counter sets, any one CPU will do. They
@@ -1266,7 +1524,7 @@ static struct miscdevice cfset_dev = {
/* Hotplug add of a CPU. Scan through all active processes and add
* that CPU to the list of CPUs supplied with ioctl(..., START, ...).
*/
-int cfset_online_cpu(unsigned int cpu)
+static int cfset_online_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;
@@ -1286,7 +1544,7 @@ int cfset_online_cpu(unsigned int cpu)
/* Hotplug remove of a CPU. Scan through all active processes and clear
* that CPU from the list of CPUs supplied with ioctl(..., START, ...).
*/
-int cfset_offline_cpu(unsigned int cpu)
+static int cfset_offline_cpu(unsigned int cpu)
{
struct cfset_call_on_cpu_parm p;
struct cfset_request *rp;