From 5a6bbd1d18cabf5a680e726f0ef8f6dda0105fe8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Nov 2021 14:14:53 +0000 Subject: dt-bindings: arm-pmu: Document Apple PMU compatible strings As we are about to add support fur the Apple PMUs, document the compatible strings associated with the two micro-architectures present in the Apple M1. Acked-by: Rob Herring Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- Documentation/devicetree/bindings/arm/pmu.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/pmu.yaml b/Documentation/devicetree/bindings/arm/pmu.yaml index 981bac451698..7a04b8aaaec3 100644 --- a/Documentation/devicetree/bindings/arm/pmu.yaml +++ b/Documentation/devicetree/bindings/arm/pmu.yaml @@ -20,6 +20,8 @@ properties: items: - enum: - apm,potenza-pmu + - apple,firestorm-pmu + - apple,icestorm-pmu - arm,armv8-pmuv3 # Only for s/w models - arm,arm1136-pmu - arm,arm1176-pmu -- cgit v1.2.3 From 74703b13f9d2ef286ef588f29295a2fd30b5f295 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Nov 2021 19:58:42 +0000 Subject: dt-bindings: apple,aic: Add CPU PMU per-cpu pseudo-interrupts Advertise the two pseudo-interrupts that tied to the two PMU flavours present in the Apple M1 SoC. We choose the expose two different pseudo-interrupts to the OS as the e-core PMU is obviously different from the p-core one, effectively presenting two different devices. Acked-by: Rob Herring Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml | 2 ++ include/dt-bindings/interrupt-controller/apple-aic.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml index 97359024709a..c7577d401786 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml @@ -56,6 +56,8 @@ properties: - 1: virtual HV timer - 2: physical guest timer - 3: virtual guest timer + - 4: 'efficient' CPU PMU + - 5: 'performance' CPU PMU The 3rd cell contains the interrupt flags. This is normally IRQ_TYPE_LEVEL_HIGH (4). diff --git a/include/dt-bindings/interrupt-controller/apple-aic.h b/include/dt-bindings/interrupt-controller/apple-aic.h index 604f2bb30ac0..bf3aac0e5491 100644 --- a/include/dt-bindings/interrupt-controller/apple-aic.h +++ b/include/dt-bindings/interrupt-controller/apple-aic.h @@ -11,5 +11,7 @@ #define AIC_TMR_HV_VIRT 1 #define AIC_TMR_GUEST_PHYS 2 #define AIC_TMR_GUEST_VIRT 3 +#define AIC_CPU_PMU_E 4 +#define AIC_CPU_PMU_P 5 #endif -- cgit v1.2.3 From dba07ad11384d6a4ece4acda1fbe726222ca7ad0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Dec 2021 16:49:04 +0000 Subject: dt-bindings: apple,aic: Add affinity description for per-cpu pseudo-interrupts Some of the FIQ per-cpu pseudo-interrupts are better described with a specific affinity, the most obvious candidate being the CPU PMUs. Augment the AIC binding to be able to specify that affinity in the interrupt controller node. Reviewed-by: Rob Herring Signed-off-by: Marc Zyngier --- .../bindings/interrupt-controller/apple,aic.yaml | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml index c7577d401786..85c85b694217 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml @@ -70,6 +70,35 @@ properties: power-domains: maxItems: 1 + affinities: + type: object + additionalProperties: false + description: + FIQ affinity can be expressed as a single "affinities" node, + containing a set of sub-nodes, one per FIQ with a non-default + affinity. + patternProperties: + "^.+-affinity$": + type: object + additionalProperties: false + properties: + apple,fiq-index: + description: + The interrupt number specified as a FIQ, and for which + the affinity is not the default. + $ref: /schemas/types.yaml#/definitions/uint32 + maximum: 5 + + cpus: + $ref: /schemas/types.yaml#/definitions/phandle-array + description: + Should be a list of phandles to CPU nodes (as described in + Documentation/devicetree/bindings/arm/cpus.yaml). + + required: + - fiq-index + - cpus + required: - compatible - '#interrupt-cells' -- cgit v1.2.3 From a5e8801202b318622ea526aa5625e5f7eceb4d26 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Nov 2021 13:35:25 +0000 Subject: irqchip/apple-aic: Parse FIQ affinities from device-tree In order to be able to tell the core IRQ code about the affinity of the PMU interrupt in later patches, parse the affinities kindly provided in the device-tree. Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-apple-aic.c | 49 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 38091ebb9403..22d9b2058612 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -177,6 +177,9 @@ struct aic_irq_chip { void __iomem *base; struct irq_domain *hw_domain; struct irq_domain *ipi_domain; + struct { + cpumask_t aff; + } *fiq_aff[AIC_NR_FIQ]; int nr_hw; }; @@ -793,12 +796,50 @@ static struct gic_kvm_info vgic_info __initdata = { .no_hw_deactivation = true, }; +static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff) +{ + int i, n; + u32 fiq; + + if (of_property_read_u32(aff, "apple,fiq-index", &fiq) || + WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq]) + return; + + n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32)); + if (WARN_ON(n < 0)) + return; + + ic->fiq_aff[fiq] = kzalloc(sizeof(ic->fiq_aff[fiq]), GFP_KERNEL); + if (!ic->fiq_aff[fiq]) + return; + + for (i = 0; i < n; i++) { + struct device_node *cpu_node; + u32 cpu_phandle; + int cpu; + + if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle)) + continue; + + cpu_node = of_find_node_by_phandle(cpu_phandle); + if (WARN_ON(!cpu_node)) + continue; + + cpu = of_cpu_node_to_id(cpu_node); + if (WARN_ON(cpu < 0)) + continue; + + cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff); + } +} + static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent) { int i; void __iomem *regs; u32 info; struct aic_irq_chip *irqc; + struct device_node *affs; regs = of_iomap(node, 0); if (WARN_ON(!regs)) @@ -832,6 +873,14 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p return -ENODEV; } + affs = of_get_child_by_name(node, "affinities"); + if (affs) { + struct device_node *chld; + + for_each_child_of_node(affs, chld) + build_fiq_affinity(irqc, chld); + } + set_handle_irq(aic_handle_irq); set_handle_fiq(aic_handle_fiq); -- cgit v1.2.3 From c7708816c9442beb32488e07b0fb47b6f66577cb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 1 Nov 2021 19:59:20 +0000 Subject: irqchip/apple-aic: Wire PMU interrupts Add the necessary code to configure and P and E-core PMU interrupts with their respective affinities. When such an interrupt fires, map it onto the right pseudo-interrupt. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-apple-aic.c | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 22d9b2058612..873544e58676 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -155,7 +155,7 @@ #define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4) #define UPMSR_IACT BIT(0) -#define AIC_NR_FIQ 4 +#define AIC_NR_FIQ 6 #define AIC_NR_SWIPI 32 /* @@ -415,16 +415,15 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) aic_irqc->nr_hw + AIC_TMR_EL02_VIRT); } - if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == - (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { - /* - * Not supported yet, let's figure out how to handle this when - * we implement these proprietary performance counters. For now, - * just mask it and move on. - */ - pr_err_ratelimited("PMC FIQ fired. Masking.\n"); - sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT, - FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF)); + if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + int irq; + if (cpumask_test_cpu(smp_processor_id(), + &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) + irq = AIC_CPU_PMU_P; + else + irq = AIC_CPU_PMU_E; + generic_handle_domain_irq(aic_irqc->hw_domain, + aic_irqc->nr_hw + irq); } if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ && @@ -464,7 +463,18 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq, handle_fasteoi_irq, NULL, NULL); irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq))); } else { - irq_set_percpu_devid(irq); + int fiq = hw - ic->nr_hw; + + switch (fiq) { + case AIC_CPU_PMU_P: + case AIC_CPU_PMU_E: + irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff); + break; + default: + irq_set_percpu_devid(irq); + break; + } + irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data, handle_percpu_devid_irq, NULL, NULL); } -- cgit v1.2.3 From 1852e22b318b8d1c02b574da679b1b74f3686090 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 14 Dec 2021 15:56:55 +0000 Subject: arm64: dts: apple: Add t8103 PMU interrupt affinities The two PMU pseudo interrupts have specific affinities. One set is affine to the small cores, and the other set affine to the big ones. Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 19afbc91020a..a2e006538c56 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -213,6 +213,18 @@ interrupt-controller; reg = <0x2 0x3b100000 0x0 0x8000>; power-domains = <&ps_aic>; + + affinities { + e-core-pmu-affinity { + apple,fiq-index = ; + cpus = <&cpu0 &cpu1 &cpu2 &cpu3>; + }; + + p-core-pmu-affinity { + apple,fiq-index = ; + cpus = <&cpu4 &cpu5 &cpu6 &cpu7>; + }; + }; }; pmgr: power-management@23b700000 { -- cgit v1.2.3 From 0f522efcd79634a6113195842ee763dc6ebacfbb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 2 Nov 2021 17:09:49 +0000 Subject: arm64: dts: apple: Add t8303 PMU nodes Advertise the two PMU nodes for the t8103 SoC. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- arch/arm64/boot/dts/apple/t8103.dtsi | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index a2e006538c56..9f8f4145db88 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -97,6 +97,18 @@ ; }; + pmu-e { + compatible = "apple,icestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = ; + }; + + pmu-p { + compatible = "apple,firestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = ; + }; + clkref: clock-ref { compatible = "fixed-clock"; #clock-cells = <0>; -- cgit v1.2.3 From 11db7410cfcba2e5ffed7b8bb2a57d4dd5e22063 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 3 Nov 2021 13:55:19 +0000 Subject: irqchip/apple-aic: Move PMU-specific registers to their own include file As we are about to have a PMU driver, move the PMU bits from the AIC driver into a common include file. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/apple_m1_pmu.h | 19 +++++++++++++++++++ drivers/irqchip/irq-apple-aic.c | 11 +---------- 2 files changed, 20 insertions(+), 10 deletions(-) create mode 100644 arch/arm64/include/asm/apple_m1_pmu.h diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h new file mode 100644 index 000000000000..b848af7faadc --- /dev/null +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ASM_APPLE_M1_PMU_h +#define __ASM_APPLE_M1_PMU_h + +#include +#include + +/* Core PMC control register */ +#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_IMODE GENMASK(10, 8) +#define PMCR0_IMODE_OFF 0 +#define PMCR0_IMODE_PMI 1 +#define PMCR0_IMODE_AIC 2 +#define PMCR0_IMODE_HALT 3 +#define PMCR0_IMODE_FIQ 4 +#define PMCR0_IACT BIT(11) + +#endif /* __ASM_APPLE_M1_PMU_h */ diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 873544e58676..b40199c6625e 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include @@ -109,16 +110,6 @@ * Note: sysreg-based IPIs are not supported yet. */ -/* Core PMC control register */ -#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) -#define PMCR0_IMODE GENMASK(10, 8) -#define PMCR0_IMODE_OFF 0 -#define PMCR0_IMODE_PMI 1 -#define PMCR0_IMODE_AIC 2 -#define PMCR0_IMODE_HALT 3 -#define PMCR0_IMODE_FIQ 4 -#define PMCR0_IACT BIT(11) - /* IPI request registers */ #define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0) #define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1) -- cgit v1.2.3 From 1280f12f56a15abde23503ba876343e5f201c9c2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Feb 2022 18:56:03 +0000 Subject: drivers/perf: arm_pmu: Handle 47 bit counters The current ARM PMU framework can only deal with 32 or 64bit counters. Teach it about a 47bit flavour. Yes, this is odd. Reviewed-by: Hector Martin Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- drivers/perf/arm_pmu.c | 2 ++ include/linux/perf/arm_pmu.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 295cc7952d0e..0a9ed1a061ac 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event) { if (event->hw.flags & ARMPMU_EVT_64BIT) return GENMASK_ULL(63, 0); + else if (event->hw.flags & ARMPMU_EVT_47BIT) + return GENMASK_ULL(46, 0); else return GENMASK_ULL(31, 0); } diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 2512e2f9cd4e..0407a38b470a 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -26,6 +26,8 @@ */ /* Event uses a 64bit counter */ #define ARMPMU_EVT_64BIT 1 +/* Event uses a 47bit counter */ +#define ARMPMU_EVT_47BIT 2 #define HW_OP_UNSUPPORTED 0xFFFF #define C(_x) PERF_COUNT_HW_CACHE_##_x -- cgit v1.2.3 From a639027a1be1d68437e1c2cac6ed16306c84ab3c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 8 Feb 2022 18:56:04 +0000 Subject: drivers/perf: Add Apple icestorm/firestorm CPU PMU driver Add a new, weird and wonderful driver for the equally weird Apple PMU HW. Although the PMU itself is functional, we don't know much about the events yet, so this can be considered as yet another random number generator... Nonetheless, it can reliably count at least cycles and instructions in the usually wonky big-little way. For anything else, it of course supports raw event numbers. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/apple_m1_pmu.h | 45 +++ drivers/perf/Kconfig | 7 + drivers/perf/Makefile | 1 + drivers/perf/apple_m1_cpu_pmu.c | 584 ++++++++++++++++++++++++++++++++++ 4 files changed, 637 insertions(+) create mode 100644 drivers/perf/apple_m1_cpu_pmu.c diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h index b848af7faadc..99483b19b99f 100644 --- a/arch/arm64/include/asm/apple_m1_pmu.h +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -6,8 +6,21 @@ #include #include +/* Counters */ +#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0) +#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0) +#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0) +#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0) +#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0) +#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0) +#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0) +#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0) +#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0) +#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0) + /* Core PMC control register */ #define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0) #define PMCR0_IMODE GENMASK(10, 8) #define PMCR0_IMODE_OFF 0 #define PMCR0_IMODE_PMI 1 @@ -15,5 +28,37 @@ #define PMCR0_IMODE_HALT 3 #define PMCR0_IMODE_FIQ 4 #define PMCR0_IACT BIT(11) +#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12) +#define PMCR0_STOP_CNT_ON_PMI BIT(20) +#define PMCR0_CNT_GLOB_L2C_EVT BIT(21) +#define PMCR0_DEFER_PMI_TO_ERET BIT(22) +#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30) +#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32) +#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44) + +#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0) +#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8) +#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16) +#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40) +#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48) + +#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0) +#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0) +#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0) + +#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0) +#define PMESR0_EVT_CNT_2 GENMASK(7, 0) +#define PMESR0_EVT_CNT_3 GENMASK(15, 8) +#define PMESR0_EVT_CNT_4 GENMASK(23, 16) +#define PMESR0_EVT_CNT_5 GENMASK(31, 24) + +#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0) +#define PMESR1_EVT_CNT_6 GENMASK(7, 0) +#define PMESR1_EVT_CNT_7 GENMASK(15, 8) +#define PMESR1_EVT_CNT_8 GENMASK(23, 16) +#define PMESR1_EVT_CNT_9 GENMASK(31, 24) + +#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0) +#define PMSR_OVERFLOW GENMASK(9, 0) #endif /* __ASM_APPLE_M1_PMU_h */ diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index e1a0c44bc686..d4fa0dabb05f 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -146,6 +146,13 @@ config MARVELL_CN10K_TAD_PMU Provides support for Last-Level cache Tag-and-data Units (LLC-TAD) performance monitors on CN10K family silicons. +config APPLE_M1_CPU_PMU + bool "Apple M1 CPU PMU support" + depends on ARM_PMU && ARCH_APPLE + help + Provides support for the non-architectural CPU PMUs present on + the Apple M1 SoCs and derivatives. + source "drivers/perf/hisilicon/Kconfig" endmenu diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile index 2db5418d5b0a..21ad0832e3d4 100644 --- a/drivers/perf/Makefile +++ b/drivers/perf/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o +obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o diff --git a/drivers/perf/apple_m1_cpu_pmu.c b/drivers/perf/apple_m1_cpu_pmu.c new file mode 100644 index 000000000000..979a7c2b4f56 --- /dev/null +++ b/drivers/perf/apple_m1_cpu_pmu.c @@ -0,0 +1,584 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CPU PMU driver for the Apple M1 and derivatives + * + * Copyright (C) 2021 Google LLC + * + * Author: Marc Zyngier + * + * Most of the information used in this driver was provided by the + * Asahi Linux project. The rest was experimentally discovered. + */ + +#include +#include +#include + +#include +#include +#include + +#define M1_PMU_NR_COUNTERS 10 + +#define M1_PMU_CFG_EVENT GENMASK(7, 0) + +#define ANY_BUT_0_1 GENMASK(9, 2) +#define ONLY_2_TO_7 GENMASK(7, 2) +#define ONLY_2_4_6 (BIT(2) | BIT(4) | BIT(6)) +#define ONLY_5_6_7 (BIT(5) | BIT(6) | BIT(7)) + +/* + * Description of the events we actually know about, as well as those with + * a specific counter affinity. Yes, this is a grand total of two known + * counters, and the rest is anybody's guess. + * + * Not all counters can count all events. Counters #0 and #1 are wired to + * count cycles and instructions respectively, and some events have + * bizarre mappings (every other counter, or even *one* counter). These + * restrictions equally apply to both P and E cores. + * + * It is worth noting that the PMUs attached to P and E cores are likely + * to be different because the underlying uarches are different. At the + * moment, we don't really need to distinguish between the two because we + * know next to nothing about the events themselves, and we already have + * per cpu-type PMU abstractions. + * + * If we eventually find out that the events are different across + * implementations, we'll have to introduce per cpu-type tables. + */ +enum m1_pmu_events { + M1_PMU_PERFCTR_UNKNOWN_01 = 0x01, + M1_PMU_PERFCTR_CPU_CYCLES = 0x02, + M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c, + M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d, + M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e, + M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f, + M1_PMU_PERFCTR_UNKNOWN_90 = 0x90, + M1_PMU_PERFCTR_UNKNOWN_93 = 0x93, + M1_PMU_PERFCTR_UNKNOWN_94 = 0x94, + M1_PMU_PERFCTR_UNKNOWN_95 = 0x95, + M1_PMU_PERFCTR_UNKNOWN_96 = 0x96, + M1_PMU_PERFCTR_UNKNOWN_97 = 0x97, + M1_PMU_PERFCTR_UNKNOWN_98 = 0x98, + M1_PMU_PERFCTR_UNKNOWN_99 = 0x99, + M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a, + M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b, + M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c, + M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f, + M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf, + M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0, + M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1, + M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4, + M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5, + M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6, + M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8, + M1_PMU_PERFCTR_UNKNOWN_ca = 0xca, + M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb, + M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5, + M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6, + M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7, + M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8, + M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd, + M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT, + + /* + * From this point onwards, these are not actual HW events, + * but attributes that get stored in hw->config_base. + */ + M1_PMU_CFG_COUNT_USER = BIT(8), + M1_PMU_CFG_COUNT_KERNEL = BIT(9), +}; + +/* + * Per-event affinity table. Most events can be installed on counter + * 2-9, but there are a number of exceptions. Note that this table + * has been created experimentally, and I wouldn't be surprised if more + * counters had strange affinities. + */ +static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = { + [0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1, + [M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7), + [M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0), + [M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1), + [M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7), + [M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7, + [M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6, + [M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7, + [M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6, +}; + +static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = { + PERF_MAP_ALL_UNSUPPORTED, + [PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS, + /* No idea about the rest yet */ +}; + +/* sysfs definitions */ +static ssize_t m1_pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, + char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%04llx\n", pmu_attr->id); +} + +#define M1_PMU_EVENT_ATTR(name, config) \ + PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config) + +static struct attribute *m1_pmu_event_attrs[] = { + M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES), + M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS), + NULL, +}; + +static const struct attribute_group m1_pmu_events_attr_group = { + .name = "events", + .attrs = m1_pmu_event_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7"); + +static struct attribute *m1_pmu_format_attrs[] = { + &format_attr_event.attr, + NULL, +}; + +static const struct attribute_group m1_pmu_format_attr_group = { + .name = "format", + .attrs = m1_pmu_format_attrs, +}; + +/* Low level accessors. No synchronisation. */ +#define PMU_READ_COUNTER(_idx) \ + case _idx: return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1) + +#define PMU_WRITE_COUNTER(_val, _idx) \ + case _idx: \ + write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1); \ + return + +static u64 m1_pmu_read_hw_counter(unsigned int index) +{ + switch (index) { + PMU_READ_COUNTER(0); + PMU_READ_COUNTER(1); + PMU_READ_COUNTER(2); + PMU_READ_COUNTER(3); + PMU_READ_COUNTER(4); + PMU_READ_COUNTER(5); + PMU_READ_COUNTER(6); + PMU_READ_COUNTER(7); + PMU_READ_COUNTER(8); + PMU_READ_COUNTER(9); + } + + BUG(); +} + +static void m1_pmu_write_hw_counter(u64 val, unsigned int index) +{ + switch (index) { + PMU_WRITE_COUNTER(val, 0); + PMU_WRITE_COUNTER(val, 1); + PMU_WRITE_COUNTER(val, 2); + PMU_WRITE_COUNTER(val, 3); + PMU_WRITE_COUNTER(val, 4); + PMU_WRITE_COUNTER(val, 5); + PMU_WRITE_COUNTER(val, 6); + PMU_WRITE_COUNTER(val, 7); + PMU_WRITE_COUNTER(val, 8); + PMU_WRITE_COUNTER(val, 9); + } + + BUG(); +} + +#define get_bit_offset(index, mask) (__ffs(mask) + (index)) + +static void __m1_pmu_enable_counter(unsigned int index, bool en) +{ + u64 val, bit; + + switch (index) { + case 0 ... 7: + bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7)); + break; + case 8 ... 9: + bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + + if (en) + val |= bit; + else + val &= ~bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); +} + +static void m1_pmu_enable_counter(unsigned int index) +{ + __m1_pmu_enable_counter(index, true); +} + +static void m1_pmu_disable_counter(unsigned int index) +{ + __m1_pmu_enable_counter(index, false); +} + +static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en) +{ + u64 val, bit; + + switch (index) { + case 0 ... 7: + bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7)); + break; + case 8 ... 9: + bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + + if (en) + val |= bit; + else + val &= ~bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); +} + +static void m1_pmu_enable_counter_interrupt(unsigned int index) +{ + __m1_pmu_enable_counter_interrupt(index, true); +} + +static void m1_pmu_disable_counter_interrupt(unsigned int index) +{ + __m1_pmu_enable_counter_interrupt(index, false); +} + +static void m1_pmu_configure_counter(unsigned int index, u8 event, + bool user, bool kernel) +{ + u64 val, user_bit, kernel_bit; + int shift; + + switch (index) { + case 0 ... 7: + user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7)); + kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7)); + break; + case 8 ... 9: + user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9)); + kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9)); + break; + default: + BUG(); + } + + val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1); + + if (user) + val |= user_bit; + else + val &= ~user_bit; + + if (kernel) + val |= kernel_bit; + else + val &= ~kernel_bit; + + write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1); + + /* + * Counters 0 and 1 have fixed events. For anything else, + * place the event at the expected location in the relevant + * register (PMESR0 holds the event configuration for counters + * 2-5, resp. PMESR1 for counters 6-9). + */ + switch (index) { + case 0 ... 1: + break; + case 2 ... 5: + shift = (index - 2) * 8; + val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1); + val &= ~((u64)0xff << shift); + val |= (u64)event << shift; + write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1); + break; + case 6 ... 9: + shift = (index - 6) * 8; + val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1); + val &= ~((u64)0xff << shift); + val |= (u64)event << shift; + write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1); + break; + } +} + +/* arm_pmu backend */ +static void m1_pmu_enable_event(struct perf_event *event) +{ + bool user, kernel; + u8 evt; + + evt = event->hw.config_base & M1_PMU_CFG_EVENT; + user = event->hw.config_base & M1_PMU_CFG_COUNT_USER; + kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL; + + m1_pmu_disable_counter_interrupt(event->hw.idx); + m1_pmu_disable_counter(event->hw.idx); + isb(); + + m1_pmu_configure_counter(event->hw.idx, evt, user, kernel); + m1_pmu_enable_counter(event->hw.idx); + m1_pmu_enable_counter_interrupt(event->hw.idx); + isb(); +} + +static void m1_pmu_disable_event(struct perf_event *event) +{ + m1_pmu_disable_counter_interrupt(event->hw.idx); + m1_pmu_disable_counter(event->hw.idx); + isb(); +} + +static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu) +{ + struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); + struct pt_regs *regs; + u64 overflow, state; + int idx; + + overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1); + if (!overflow) { + /* Spurious interrupt? */ + state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + state &= ~PMCR0_IACT; + write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1); + isb(); + return IRQ_NONE; + } + + cpu_pmu->stop(cpu_pmu); + + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + struct perf_sample_data data; + + if (!event) + continue; + + armpmu_event_update(event); + perf_sample_data_init(&data, 0, event->hw.last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + m1_pmu_disable_event(event); + } + + cpu_pmu->start(cpu_pmu); + + return IRQ_HANDLED; +} + +static u64 m1_pmu_read_counter(struct perf_event *event) +{ + return m1_pmu_read_hw_counter(event->hw.idx); +} + +static void m1_pmu_write_counter(struct perf_event *event, u64 value) +{ + m1_pmu_write_hw_counter(value, event->hw.idx); + isb(); +} + +static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT; + unsigned long affinity = m1_pmu_event_affinity[evtype]; + int idx; + + /* + * Place the event on the first free counter that can count + * this event. + * + * We could do a better job if we had a view of all the events + * counting on the PMU at any given time, and by placing the + * most constraining events first. + */ + for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + return -EAGAIN; +} + +static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + clear_bit(event->hw.idx, cpuc->used_mask); +} + +static void __m1_pmu_set_mode(u8 mode) +{ + u64 val; + + val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1); + val &= ~(PMCR0_IMODE | PMCR0_IACT); + val |= FIELD_PREP(PMCR0_IMODE, mode); + write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1); + isb(); +} + +static void m1_pmu_start(struct arm_pmu *cpu_pmu) +{ + __m1_pmu_set_mode(PMCR0_IMODE_FIQ); +} + +static void m1_pmu_stop(struct arm_pmu *cpu_pmu) +{ + __m1_pmu_set_mode(PMCR0_IMODE_OFF); +} + +static int m1_pmu_map_event(struct perf_event *event) +{ + /* + * Although the counters are 48bit wide, bit 47 is what + * triggers the overflow interrupt. Advertise the counters + * being 47bit wide to mimick the behaviour of the ARM PMU. + */ + event->hw.flags |= ARMPMU_EVT_47BIT; + return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT); +} + +static void m1_pmu_reset(void *info) +{ + int i; + + __m1_pmu_set_mode(PMCR0_IMODE_OFF); + + for (i = 0; i < M1_PMU_NR_COUNTERS; i++) { + m1_pmu_disable_counter(i); + m1_pmu_disable_counter_interrupt(i); + m1_pmu_write_hw_counter(0, i); + } + + isb(); +} + +static int m1_pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (!attr->exclude_guest) + return -EINVAL; + if (!attr->exclude_kernel) + config_base |= M1_PMU_CFG_COUNT_KERNEL; + if (!attr->exclude_user) + config_base |= M1_PMU_CFG_COUNT_USER; + + event->config_base = config_base; + + return 0; +} + +static int m1_pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = m1_pmu_handle_irq; + cpu_pmu->enable = m1_pmu_enable_event; + cpu_pmu->disable = m1_pmu_disable_event; + cpu_pmu->read_counter = m1_pmu_read_counter; + cpu_pmu->write_counter = m1_pmu_write_counter; + cpu_pmu->get_event_idx = m1_pmu_get_event_idx; + cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx; + cpu_pmu->start = m1_pmu_start; + cpu_pmu->stop = m1_pmu_stop; + cpu_pmu->map_event = m1_pmu_map_event; + cpu_pmu->reset = m1_pmu_reset; + cpu_pmu->set_event_filter = m1_pmu_set_event_filter; + + cpu_pmu->num_events = M1_PMU_NR_COUNTERS; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group; + cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group; + return 0; +} + +/* Device driver gunk */ +static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_icestorm_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "apple_firestorm_pmu"; + return m1_pmu_init(cpu_pmu); +} + +static const struct of_device_id m1_pmu_of_device_ids[] = { + { .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, }, + { .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, }, + { }, +}; +MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids); + +static int m1_pmu_device_probe(struct platform_device *pdev) +{ + return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL); +} + +static struct platform_driver m1_pmu_driver = { + .driver = { + .name = "apple-m1-cpu-pmu", + .of_match_table = m1_pmu_of_device_ids, + .suppress_bind_attrs = true, + }, + .probe = m1_pmu_device_probe, +}; + +module_platform_driver(m1_pmu_driver); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3