From d4b5694c75d4eba8238d541a55da0c67e876213e Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:01 -0700
Subject: perf/x86/intel: Use the common uarch name for the shared functions

From PMU's perspective, the SPR/GNR server has a similar uarch to the
ADL/MTL client p-core. Many functions are shared. However, the shared
function name uses the abbreviation of the server product code name,
rather than the common uarch code name.

Rename these internal shared functions by the common uarch name.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-2-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 64 ++++++++++++++++++++++----------------------
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/perf_event.h |  2 +-
 3 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index fa355d3658a6..93be19c27ec7 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -299,7 +299,7 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
-static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
+static struct extra_reg intel_glc_extra_regs[] __read_mostly = {
 	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
 	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
@@ -309,7 +309,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = {
 	EVENT_EXTRA_END
 };
 
-static struct event_constraint intel_spr_event_constraints[] = {
+static struct event_constraint intel_glc_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
@@ -349,7 +349,7 @@ static struct event_constraint intel_spr_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-static struct extra_reg intel_gnr_extra_regs[] __read_mostly = {
+static struct extra_reg intel_rwc_extra_regs[] __read_mostly = {
 	INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
 	INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
 	INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
@@ -473,7 +473,7 @@ static u64 intel_pmu_event_map(int hw_event)
 	return intel_perfmon_event_map[hw_event];
 }
 
-static __initconst const u64 spr_hw_cache_event_ids
+static __initconst const u64 glc_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -552,7 +552,7 @@ static __initconst const u64 spr_hw_cache_event_ids
  },
 };
 
-static __initconst const u64 spr_hw_cache_extra_regs
+static __initconst const u64 glc_hw_cache_extra_regs
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
@@ -4273,7 +4273,7 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 }
 
 static struct event_constraint *
-spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 			  struct perf_event *event)
 {
 	struct event_constraint *c;
@@ -4362,7 +4362,7 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
 	if (pmu->cpu_type == hybrid_big)
-		return spr_get_event_constraints(cpuc, idx, event);
+		return glc_get_event_constraints(cpuc, idx, event);
 	else if (pmu->cpu_type == hybrid_small)
 		return tnt_get_event_constraints(cpuc, idx, event);
 
@@ -4409,7 +4409,7 @@ rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 {
 	struct event_constraint *c;
 
-	c = spr_get_event_constraints(cpuc, idx, event);
+	c = glc_get_event_constraints(cpuc, idx, event);
 
 	/* The Retire Latency is not supported by the fixed counter 0. */
 	if (event->attr.precise_ip &&
@@ -4490,7 +4490,7 @@ static void nhm_limit_period(struct perf_event *event, s64 *left)
 	*left = max(*left, 32LL);
 }
 
-static void spr_limit_period(struct perf_event *event, s64 *left)
+static void glc_limit_period(struct perf_event *event, s64 *left)
 {
 	if (event->attr.precise_ip == 3)
 		*left = max(*left, 128LL);
@@ -5337,14 +5337,14 @@ static struct attribute *icl_tsx_events_attrs[] = {
 EVENT_ATTR_STR(mem-stores,	mem_st_spr,	"event=0xcd,umask=0x2");
 EVENT_ATTR_STR(mem-loads-aux,	mem_ld_aux,	"event=0x03,umask=0x82");
 
-static struct attribute *spr_events_attrs[] = {
+static struct attribute *glc_events_attrs[] = {
 	EVENT_PTR(mem_ld_hsw),
 	EVENT_PTR(mem_st_spr),
 	EVENT_PTR(mem_ld_aux),
 	NULL,
 };
 
-static struct attribute *spr_td_events_attrs[] = {
+static struct attribute *glc_td_events_attrs[] = {
 	EVENT_PTR(slots),
 	EVENT_PTR(td_retiring),
 	EVENT_PTR(td_bad_spec),
@@ -5357,7 +5357,7 @@ static struct attribute *spr_td_events_attrs[] = {
 	NULL,
 };
 
-static struct attribute *spr_tsx_events_attrs[] = {
+static struct attribute *glc_tsx_events_attrs[] = {
 	EVENT_PTR(tx_start),
 	EVENT_PTR(tx_abort),
 	EVENT_PTR(tx_commit),
@@ -6215,7 +6215,7 @@ __init int intel_pmu_init(void)
 		intel_pmu_pebs_data_source_grt();
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
 		x86_pmu.get_event_constraints = tnt_get_event_constraints;
-		x86_pmu.limit_period = spr_limit_period;
+		x86_pmu.limit_period = glc_limit_period;
 		td_attr = tnt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = nhm_format_attr;
@@ -6246,7 +6246,7 @@ __init int intel_pmu_init(void)
 		intel_pmu_pebs_data_source_cmt();
 		x86_pmu.pebs_latency_data = mtl_latency_data_small;
 		x86_pmu.get_event_constraints = cmt_get_event_constraints;
-		x86_pmu.limit_period = spr_limit_period;
+		x86_pmu.limit_period = glc_limit_period;
 		td_attr = cmt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = cmt_format_attr;
@@ -6563,20 +6563,20 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_SAPPHIRERAPIDS_X:
 	case INTEL_FAM6_EMERALDRAPIDS_X:
 		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
-		x86_pmu.extra_regs = intel_spr_extra_regs;
+		x86_pmu.extra_regs = intel_glc_extra_regs;
 		fallthrough;
 	case INTEL_FAM6_GRANITERAPIDS_X:
 	case INTEL_FAM6_GRANITERAPIDS_D:
 		pmem = true;
 		x86_pmu.late_ack = true;
-		memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+		memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 
-		x86_pmu.event_constraints = intel_spr_event_constraints;
-		x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints;
+		x86_pmu.event_constraints = intel_glc_event_constraints;
+		x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints;
 		if (!x86_pmu.extra_regs)
-			x86_pmu.extra_regs = intel_gnr_extra_regs;
-		x86_pmu.limit_period = spr_limit_period;
+			x86_pmu.extra_regs = intel_rwc_extra_regs;
+		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.pebs_ept = 1;
 		x86_pmu.pebs_aliases = NULL;
 		x86_pmu.pebs_prec_dist = true;
@@ -6586,13 +6586,13 @@ __init int intel_pmu_init(void)
 		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
 
 		x86_pmu.hw_config = hsw_hw_config;
-		x86_pmu.get_event_constraints = spr_get_event_constraints;
+		x86_pmu.get_event_constraints = glc_get_event_constraints;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 			hsw_format_attr : nhm_format_attr;
 		extra_skl_attr = skl_format_attr;
-		mem_attr = spr_events_attrs;
-		td_attr = spr_td_events_attrs;
-		tsx_attr = spr_tsx_events_attrs;
+		mem_attr = glc_events_attrs;
+		td_attr = glc_td_events_attrs;
+		tsx_attr = glc_tsx_events_attrs;
 		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
 		x86_pmu.lbr_pt_coexist = true;
 		intel_pmu_pebs_data_source_skl(pmem);
@@ -6642,7 +6642,7 @@ __init int intel_pmu_init(void)
 		x86_pmu.filter = intel_pmu_filter;
 		x86_pmu.get_event_constraints = adl_get_event_constraints;
 		x86_pmu.hw_config = adl_hw_config;
-		x86_pmu.limit_period = spr_limit_period;
+		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
 		/*
 		 * The rtm_abort_event is used to check whether to enable GPRs
@@ -6691,11 +6691,11 @@ __init int intel_pmu_init(void)
 		pmu->intel_cap.perf_metrics = 1;
 		pmu->intel_cap.pebs_output_pt_available = 0;
 
-		memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
-		memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
-		pmu->event_constraints = intel_spr_event_constraints;
-		pmu->pebs_constraints = intel_spr_pebs_event_constraints;
-		pmu->extra_regs = intel_spr_extra_regs;
+		memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
+		memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
+		pmu->event_constraints = intel_glc_event_constraints;
+		pmu->pebs_constraints = intel_glc_pebs_event_constraints;
+		pmu->extra_regs = intel_glc_extra_regs;
 
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
@@ -6719,7 +6719,7 @@ __init int intel_pmu_init(void)
 		pmu->pebs_constraints = intel_grt_pebs_event_constraints;
 		pmu->extra_regs = intel_grt_extra_regs;
 		if (is_mtl(boot_cpu_data.x86_model)) {
-			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs;
+			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs;
 			x86_pmu.pebs_latency_data = mtl_latency_data_small;
 			extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
 				mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index eb8dd8b8a1e8..74642469ca7b 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1058,7 +1058,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
-struct event_constraint intel_spr_pebs_event_constraints[] = {
+struct event_constraint intel_glc_pebs_event_constraints[] = {
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL),	/* INST_RETIRED.PREC_DIST */
 	INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
 
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index c8ba2be7585d..96a427fc55cf 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1521,7 +1521,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[];
 
 extern struct event_constraint intel_icl_pebs_event_constraints[];
 
-extern struct event_constraint intel_spr_pebs_event_constraints[];
+extern struct event_constraint intel_glc_pebs_event_constraints[];
 
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
-- 
cgit v1.2.3


From 0ba0c03528e918a8f6b5aa63d502fdc6a9d80fc7 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:02 -0700
Subject: perf/x86/intel: Factor out the initialization code for SPR

The SPR and ADL p-core have a similar uarch. Most of the initialization
code can be shared.

Factor out intel_pmu_init_glc() for the common initialization code.
The common part of the ADL p-core will be replaced by the later patch.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-3-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 49 +++++++++++++++++++++++---------------------
 1 file changed, 26 insertions(+), 23 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 93be19c27ec7..86aad3224a76 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5916,6 +5916,30 @@ static __always_inline bool is_mtl(u8 x86_model)
 	       (x86_model == INTEL_FAM6_METEORLAKE_L);
 }
 
+static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
+{
+	x86_pmu.late_ack = true;
+	x86_pmu.limit_period = glc_limit_period;
+	x86_pmu.pebs_aliases = NULL;
+	x86_pmu.pebs_prec_dist = true;
+	x86_pmu.pebs_block = true;
+	x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+	x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+	x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+	x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
+	x86_pmu.lbr_pt_coexist = true;
+	x86_pmu.num_topdown_events = 8;
+	static_call_update(intel_pmu_update_topdown_event,
+			   &icl_update_topdown_event);
+	static_call_update(intel_pmu_set_topdown_event_period,
+			   &icl_set_topdown_event_period);
+
+	memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+	memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+	hybrid(pmu, event_constraints) = intel_glc_event_constraints;
+	hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints;
+}
+
 __init int intel_pmu_init(void)
 {
 	struct attribute **extra_skl_attr = &empty_attrs;
@@ -6567,24 +6591,10 @@ __init int intel_pmu_init(void)
 		fallthrough;
 	case INTEL_FAM6_GRANITERAPIDS_X:
 	case INTEL_FAM6_GRANITERAPIDS_D:
-		pmem = true;
-		x86_pmu.late_ack = true;
-		memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
-
-		x86_pmu.event_constraints = intel_glc_event_constraints;
-		x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints;
+		intel_pmu_init_glc(NULL);
 		if (!x86_pmu.extra_regs)
 			x86_pmu.extra_regs = intel_rwc_extra_regs;
-		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.pebs_ept = 1;
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-
 		x86_pmu.hw_config = hsw_hw_config;
 		x86_pmu.get_event_constraints = glc_get_event_constraints;
 		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
@@ -6593,14 +6603,7 @@ __init int intel_pmu_init(void)
 		mem_attr = glc_events_attrs;
 		td_attr = glc_td_events_attrs;
 		tsx_attr = glc_tsx_events_attrs;
-		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
-		x86_pmu.lbr_pt_coexist = true;
-		intel_pmu_pebs_data_source_skl(pmem);
-		x86_pmu.num_topdown_events = 8;
-		static_call_update(intel_pmu_update_topdown_event,
-				   &icl_update_topdown_event);
-		static_call_update(intel_pmu_set_topdown_event_period,
-				   &icl_set_topdown_event_period);
+		intel_pmu_pebs_data_source_skl(true);
 		pr_cont("Sapphire Rapids events, ");
 		name = "sapphire_rapids";
 		break;
-- 
cgit v1.2.3


From d87d221f854b62f5e8026505497d33404ef6050c Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:03 -0700
Subject: perf/x86/intel: Factor out the initialization code for ADL e-core

From PMU's perspective, the ADL e-core and newer SRF/GRR have a similar
uarch. Most of the initialization code can be shared.

Factor out intel_pmu_init_grt() for the common initialization code.
The common part of the ADL e-core will be replaced by the later patch.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-4-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 58 ++++++++++++++++----------------------------
 1 file changed, 21 insertions(+), 37 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 86aad3224a76..95ac7bbd7fab 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -5940,6 +5940,25 @@ static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
 	hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints;
 }
 
+static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
+{
+	x86_pmu.mid_ack = true;
+	x86_pmu.limit_period = glc_limit_period;
+	x86_pmu.pebs_aliases = NULL;
+	x86_pmu.pebs_prec_dist = true;
+	x86_pmu.pebs_block = true;
+	x86_pmu.lbr_pt_coexist = true;
+	x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+	x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
+
+	memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+	memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+	hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+	hybrid(pmu, event_constraints) = intel_slm_event_constraints;
+	hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints;
+	hybrid(pmu, extra_regs) = intel_grt_extra_regs;
+}
+
 __init int intel_pmu_init(void)
 {
 	struct attribute **extra_skl_attr = &empty_attrs;
@@ -6218,28 +6237,10 @@ __init int intel_pmu_init(void)
 		break;
 
 	case INTEL_FAM6_ATOM_GRACEMONT:
-		x86_pmu.mid_ack = true;
-		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
-		       sizeof(hw_cache_extra_regs));
-		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
-
-		x86_pmu.event_constraints = intel_slm_event_constraints;
-		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
-		x86_pmu.extra_regs = intel_grt_extra_regs;
-
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.lbr_pt_coexist = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-
+		intel_pmu_init_grt(NULL);
 		intel_pmu_pebs_data_source_grt();
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
 		x86_pmu.get_event_constraints = tnt_get_event_constraints;
-		x86_pmu.limit_period = glc_limit_period;
 		td_attr = tnt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = nhm_format_attr;
@@ -6249,28 +6250,11 @@ __init int intel_pmu_init(void)
 
 	case INTEL_FAM6_ATOM_CRESTMONT:
 	case INTEL_FAM6_ATOM_CRESTMONT_X:
-		x86_pmu.mid_ack = true;
-		memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
-		       sizeof(hw_cache_extra_regs));
-		hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
-
-		x86_pmu.event_constraints = intel_slm_event_constraints;
-		x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints;
+		intel_pmu_init_grt(NULL);
 		x86_pmu.extra_regs = intel_cmt_extra_regs;
-
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.lbr_pt_coexist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-
 		intel_pmu_pebs_data_source_cmt();
 		x86_pmu.pebs_latency_data = mtl_latency_data_small;
 		x86_pmu.get_event_constraints = cmt_get_event_constraints;
-		x86_pmu.limit_period = glc_limit_period;
 		td_attr = cmt_events_attrs;
 		mem_attr = grt_mem_attrs;
 		extra_attr = cmt_format_attr;
-- 
cgit v1.2.3


From 299a5fc8e783eed705015e83e381912dbbf3eabc Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:04 -0700
Subject: perf/x86/intel: Apply the common initialization code for ADL

Use the intel_pmu_init_glc() and intel_pmu_init_grt() to replace the
duplicate code for ADL.

The current code already checks the PERF_X86_EVENT_TOPDOWN flag before
invoking the Topdown metrics functions. (The PERF_X86_EVENT_TOPDOWN flag
is to indicate the Topdown metric feature, which is only available for
the p-core.) Drop the unnecessary adl_set_topdown_event_period() and
adl_update_topdown_event().

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-5-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 53 ++------------------------------------------
 1 file changed, 2 insertions(+), 51 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 95ac7bbd7fab..a5ba491a6497 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2556,16 +2556,6 @@ static int icl_set_topdown_event_period(struct perf_event *event)
 	return 0;
 }
 
-static int adl_set_topdown_event_period(struct perf_event *event)
-{
-	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
-
-	if (pmu->cpu_type != hybrid_big)
-		return 0;
-
-	return icl_set_topdown_event_period(event);
-}
-
 DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period);
 
 static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx)
@@ -2708,16 +2698,6 @@ static u64 icl_update_topdown_event(struct perf_event *event)
 						 x86_pmu.num_topdown_events - 1);
 }
 
-static u64 adl_update_topdown_event(struct perf_event *event)
-{
-	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
-
-	if (pmu->cpu_type != hybrid_big)
-		return 0;
-
-	return icl_update_topdown_event(event);
-}
-
 DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update);
 
 static void intel_pmu_read_topdown_event(struct perf_event *event)
@@ -6612,32 +6592,11 @@ __init int intel_pmu_init(void)
 		static_branch_enable(&perf_is_hybrid);
 		x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
 
-		x86_pmu.pebs_aliases = NULL;
-		x86_pmu.pebs_prec_dist = true;
-		x86_pmu.pebs_block = true;
-		x86_pmu.flags |= PMU_FL_HAS_RSP_1;
-		x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
-		x86_pmu.flags |= PMU_FL_INSTR_LATENCY;
-		x86_pmu.lbr_pt_coexist = true;
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
-		x86_pmu.num_topdown_events = 8;
-		static_call_update(intel_pmu_update_topdown_event,
-				   &adl_update_topdown_event);
-		static_call_update(intel_pmu_set_topdown_event_period,
-				   &adl_set_topdown_event_period);
-
 		x86_pmu.filter = intel_pmu_filter;
 		x86_pmu.get_event_constraints = adl_get_event_constraints;
 		x86_pmu.hw_config = adl_hw_config;
-		x86_pmu.limit_period = glc_limit_period;
 		x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
-		/*
-		 * The rtm_abort_event is used to check whether to enable GPRs
-		 * for the RTM abort event. Atom doesn't have the RTM abort
-		 * event. There is no harmful to set it in the common
-		 * x86_pmu.rtm_abort_event.
-		 */
-		x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04);
 
 		td_attr = adl_hybrid_events_attrs;
 		mem_attr = adl_hybrid_mem_attrs;
@@ -6649,6 +6608,7 @@ __init int intel_pmu_init(void)
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		pmu->name = "cpu_core";
 		pmu->cpu_type = hybrid_big;
+		intel_pmu_init_glc(&pmu->pmu);
 		pmu->late_ack = true;
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
 			pmu->num_counters = x86_pmu.num_counters + 2;
@@ -6678,16 +6638,13 @@ __init int intel_pmu_init(void)
 		pmu->intel_cap.perf_metrics = 1;
 		pmu->intel_cap.pebs_output_pt_available = 0;
 
-		memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
-		memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
-		pmu->event_constraints = intel_glc_event_constraints;
-		pmu->pebs_constraints = intel_glc_pebs_event_constraints;
 		pmu->extra_regs = intel_glc_extra_regs;
 
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
 		pmu->name = "cpu_atom";
 		pmu->cpu_type = hybrid_small;
+		intel_pmu_init_grt(&pmu->pmu);
 		pmu->mid_ack = true;
 		pmu->num_counters = x86_pmu.num_counters;
 		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
@@ -6699,12 +6656,6 @@ __init int intel_pmu_init(void)
 		pmu->intel_cap.perf_metrics = 0;
 		pmu->intel_cap.pebs_output_pt_available = 1;
 
-		memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids));
-		memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs));
-		pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
-		pmu->event_constraints = intel_slm_event_constraints;
-		pmu->pebs_constraints = intel_grt_pebs_event_constraints;
-		pmu->extra_regs = intel_grt_extra_regs;
 		if (is_mtl(boot_cpu_data.x86_model)) {
 			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs;
 			x86_pmu.pebs_latency_data = mtl_latency_data_small;
-- 
cgit v1.2.3


From b0560bfd4b70277a4936c82e50e940aa253c95bf Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:05 -0700
Subject: perf/x86/intel: Clean up the hybrid CPU type handling code

There is a fairly long list of grievances about the current code. The
main beefs:

   1. hybrid_big_small assumes that the *HARDWARE* (CPUID) provided
      core types are a bitmap. They are not. If Intel happened to
      make a core type of 0xff, hilarity would ensue.
   2. adl_get_hybrid_cpu_type() utterly inscrutable.  There are
      precisely zero comments and zero changelog about what it is
      attempting to do.

According to Kan, the adl_get_hybrid_cpu_type() is there because some
Alder Lake (ADL) CPUs can do some silly things. Some ADL models are
*supposed* to be hybrid CPUs with big and little cores, but there are
some SKUs that only have big cores. CPUID(0x1a) on those CPUs does
not say that the CPUs are big cores. It apparently just returns 0x0.
It confuses perf because it expects to see either 0x40 (Core) or
0x20 (Atom).

The perf workaround for this is to watch for a CPU core saying it is
type 0x0. If that happens on an Alder Lake, it calls
x86_pmu.get_hybrid_cpu_type() and just assumes that the core is a
Core (0x40) CPU.

To fix up the mess, separate out the CPU types and the 'pmu' types.
This allows 'hybrid_pmu_type' bitmaps without worrying that some
future CPU type will set multiple bits.

Since the types are now separate, add a function to glue them back
together again. Actual comment on the situation in the glue
function (find_hybrid_pmu_for_cpu()).

Also, give ->get_hybrid_cpu_type() a real return type and make it
clear that it is overriding the *CPU* type, not the PMU type.

Rename cpu_type to pmu_type in the struct x86_hybrid_pmu to reflect the
change.

Originally-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-6-kan.liang@linux.intel.com
---
 arch/x86/events/core.c       |  6 ++--
 arch/x86/events/intel/core.c | 69 ++++++++++++++++++++++++++++++--------------
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/perf_event.h | 35 +++++++++++++---------
 4 files changed, 72 insertions(+), 40 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 185f902e5f28..40ad1425ffa2 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1887,9 +1887,9 @@ ssize_t events_hybrid_sysfs_show(struct device *dev,
 
 	str = pmu_attr->event_str;
 	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
-		if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type))
+		if (!(x86_pmu.hybrid_pmu[i].pmu_type & pmu_attr->pmu_type))
 			continue;
-		if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) {
+		if (x86_pmu.hybrid_pmu[i].pmu_type & pmu->pmu_type) {
 			next_str = strchr(str, ';');
 			if (next_str)
 				return snprintf(page, next_str - str + 1, "%s", str);
@@ -2169,7 +2169,7 @@ static int __init init_hw_perf_events(void)
 			hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE;
 
 			err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name,
-						(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
+						(hybrid_pmu->pmu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
 			if (err)
 				break;
 		}
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index a5ba491a6497..9ac2e122113b 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3849,7 +3849,7 @@ static inline bool require_mem_loads_aux_event(struct perf_event *event)
 		return false;
 
 	if (is_hybrid())
-		return hybrid_pmu(event->pmu)->cpu_type == hybrid_big;
+		return hybrid_pmu(event->pmu)->pmu_type == hybrid_big;
 
 	return true;
 }
@@ -4341,9 +4341,9 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 {
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
-	if (pmu->cpu_type == hybrid_big)
+	if (pmu->pmu_type == hybrid_big)
 		return glc_get_event_constraints(cpuc, idx, event);
-	else if (pmu->cpu_type == hybrid_small)
+	else if (pmu->pmu_type == hybrid_small)
 		return tnt_get_event_constraints(cpuc, idx, event);
 
 	WARN_ON(1);
@@ -4413,9 +4413,9 @@ mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
 {
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
-	if (pmu->cpu_type == hybrid_big)
+	if (pmu->pmu_type == hybrid_big)
 		return rwc_get_event_constraints(cpuc, idx, event);
-	if (pmu->cpu_type == hybrid_small)
+	if (pmu->pmu_type == hybrid_small)
 		return cmt_get_event_constraints(cpuc, idx, event);
 
 	WARN_ON(1);
@@ -4426,18 +4426,18 @@ static int adl_hw_config(struct perf_event *event)
 {
 	struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
 
-	if (pmu->cpu_type == hybrid_big)
+	if (pmu->pmu_type == hybrid_big)
 		return hsw_hw_config(event);
-	else if (pmu->cpu_type == hybrid_small)
+	else if (pmu->pmu_type == hybrid_small)
 		return intel_pmu_hw_config(event);
 
 	WARN_ON(1);
 	return -EOPNOTSUPP;
 }
 
-static u8 adl_get_hybrid_cpu_type(void)
+static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void)
 {
-	return hybrid_big;
+	return HYBRID_INTEL_CORE;
 }
 
 /*
@@ -4613,22 +4613,47 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 	}
 }
 
-static bool init_hybrid_pmu(int cpu)
+static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
 {
-	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
 	u8 cpu_type = get_this_hybrid_cpu_type();
-	struct x86_hybrid_pmu *pmu = NULL;
 	int i;
 
-	if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
-		cpu_type = x86_pmu.get_hybrid_cpu_type();
+	/*
+	 * This is running on a CPU model that is known to have hybrid
+	 * configurations. But the CPU told us it is not hybrid, shame
+	 * on it. There should be a fixup function provided for these
+	 * troublesome CPUs (->get_hybrid_cpu_type).
+	 */
+	if (cpu_type == HYBRID_INTEL_NONE) {
+		if (x86_pmu.get_hybrid_cpu_type)
+			cpu_type = x86_pmu.get_hybrid_cpu_type();
+		else
+			return NULL;
+	}
 
+	/*
+	 * This essentially just maps between the 'hybrid_cpu_type'
+	 * and 'hybrid_pmu_type' enums:
+	 */
 	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
-		if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) {
-			pmu = &x86_pmu.hybrid_pmu[i];
-			break;
-		}
+		enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type;
+
+		if (cpu_type == HYBRID_INTEL_CORE &&
+		    pmu_type == hybrid_big)
+			return &x86_pmu.hybrid_pmu[i];
+		if (cpu_type == HYBRID_INTEL_ATOM &&
+		    pmu_type == hybrid_small)
+			return &x86_pmu.hybrid_pmu[i];
 	}
+
+	return NULL;
+}
+
+static bool init_hybrid_pmu(int cpu)
+{
+	struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
+	struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu();
+
 	if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) {
 		cpuc->pmu = NULL;
 		return false;
@@ -5679,7 +5704,7 @@ static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr)
 	struct perf_pmu_events_hybrid_attr *pmu_attr =
 		container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr);
 
-	return pmu->cpu_type & pmu_attr->pmu_type;
+	return pmu->pmu_type & pmu_attr->pmu_type;
 }
 
 static umode_t hybrid_events_is_visible(struct kobject *kobj,
@@ -5716,7 +5741,7 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj,
 		container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr);
 	int cpu = hybrid_find_supported_cpu(pmu);
 
-	return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0;
+	return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0;
 }
 
 static struct attribute_group hybrid_group_events_td  = {
@@ -6607,7 +6632,7 @@ __init int intel_pmu_init(void)
 		/* Initialize big core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
 		pmu->name = "cpu_core";
-		pmu->cpu_type = hybrid_big;
+		pmu->pmu_type = hybrid_big;
 		intel_pmu_init_glc(&pmu->pmu);
 		pmu->late_ack = true;
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
@@ -6643,7 +6668,7 @@ __init int intel_pmu_init(void)
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
 		pmu->name = "cpu_atom";
-		pmu->cpu_type = hybrid_small;
+		pmu->pmu_type = hybrid_small;
 		intel_pmu_init_grt(&pmu->pmu);
 		pmu->mid_ack = true;
 		pmu->num_counters = x86_pmu.num_counters;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 74642469ca7b..bf97ab904d40 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -261,7 +261,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
 {
 	u64 val;
 
-	WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big);
+	WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
 
 	dse &= PERF_PEBS_DATA_SOURCE_MASK;
 	val = hybrid_var(event->pmu, pebs_data_source)[dse];
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 96a427fc55cf..53dd5d495ba6 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -652,10 +652,29 @@ enum {
 #define PERF_PEBS_DATA_SOURCE_MAX	0x10
 #define PERF_PEBS_DATA_SOURCE_MASK	(PERF_PEBS_DATA_SOURCE_MAX - 1)
 
+enum hybrid_cpu_type {
+	HYBRID_INTEL_NONE,
+	HYBRID_INTEL_ATOM	= 0x20,
+	HYBRID_INTEL_CORE	= 0x40,
+};
+
+enum hybrid_pmu_type {
+	not_hybrid,
+	hybrid_small		= BIT(0),
+	hybrid_big		= BIT(1),
+
+	hybrid_big_small	= hybrid_big | hybrid_small, /* only used for matching */
+};
+
+#define X86_HYBRID_PMU_ATOM_IDX		0
+#define X86_HYBRID_PMU_CORE_IDX		1
+
+#define X86_HYBRID_NUM_PMUS		2
+
 struct x86_hybrid_pmu {
 	struct pmu			pmu;
 	const char			*name;
-	u8				cpu_type;
+	enum hybrid_pmu_type		pmu_type;
 	cpumask_t			supported_cpus;
 	union perf_capabilities		intel_cap;
 	u64				intel_ctrl;
@@ -721,18 +740,6 @@ extern struct static_key_false perf_is_hybrid;
 	__Fp;						\
 })
 
-enum hybrid_pmu_type {
-	hybrid_big		= 0x40,
-	hybrid_small		= 0x20,
-
-	hybrid_big_small	= hybrid_big | hybrid_small,
-};
-
-#define X86_HYBRID_PMU_ATOM_IDX		0
-#define X86_HYBRID_PMU_CORE_IDX		1
-
-#define X86_HYBRID_NUM_PMUS		2
-
 /*
  * struct x86_pmu - generic x86 pmu
  */
@@ -940,7 +947,7 @@ struct x86_pmu {
 	 */
 	int				num_hybrid_pmus;
 	struct x86_hybrid_pmu		*hybrid_pmu;
-	u8 (*get_hybrid_cpu_type)	(void);
+	enum hybrid_cpu_type (*get_hybrid_cpu_type)	(void);
 };
 
 struct x86_perf_task_context_opt {
-- 
cgit v1.2.3


From 97588df87b56e27fd2b5d928d61c7a53e38afbb0 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Tue, 29 Aug 2023 05:58:06 -0700
Subject: perf/x86/intel: Add common intel_pmu_init_hybrid()

The current hybrid initialization codes aren't well organized and are
hard to read.

Factor out intel_pmu_init_hybrid() to do a common setup for each
hybrid PMU. The PMU-specific capability will be updated later via either
hard code (ADL) or CPUID hybrid enumeration (MTL).

Splitting the ADL and MTL initialization codes, since they have
different uarches. The hard code PMU capabilities are not required for
MTL either. They can be enumerated by the new leaf 0x23 and
IA32_PERF_CAPABILITIES MSR.

The hybrid enumeration of the IA32_PERF_CAPABILITIES MSR is broken on
MTL. Using the default value.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230829125806.3016082-7-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 162 +++++++++++++++++++++++++++++--------------
 1 file changed, 111 insertions(+), 51 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 9ac2e122113b..83dfbba1ee69 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4598,6 +4598,16 @@ static void intel_pmu_check_num_counters(int *num_counters,
 					 int *num_counters_fixed,
 					 u64 *intel_ctrl, u64 fixed_mask);
 
+static inline bool intel_pmu_broken_perf_cap(void)
+{
+	/* The Perf Metric (Bit 15) is always cleared */
+	if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) ||
+	    (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L))
+		return true;
+
+	return false;
+}
+
 static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 {
 	unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF);
@@ -4610,7 +4620,27 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 		pmu->num_counters_fixed = fls(ebx);
 		intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
 					     &pmu->intel_ctrl, ebx);
+		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+		pmu->unconstrained = (struct event_constraint)
+				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
+							0, pmu->num_counters, 0, 0);
 	}
+
+
+	if (!intel_pmu_broken_perf_cap()) {
+		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
+		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
+	}
+
+	if (pmu->intel_cap.perf_metrics)
+		pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
+	else
+		pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS);
+
+	if (pmu->intel_cap.pebs_output_pt_available)
+		pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
+	else
+		pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
 }
 
 static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
@@ -5915,10 +5945,52 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
 	}
 }
 
-static __always_inline bool is_mtl(u8 x86_model)
+static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
+	{ hybrid_small, "cpu_atom" },
+	{ hybrid_big, "cpu_core" },
+};
+
+static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
 {
-	return (x86_model == INTEL_FAM6_METEORLAKE) ||
-	       (x86_model == INTEL_FAM6_METEORLAKE_L);
+	unsigned long pmus_mask = pmus;
+	struct x86_hybrid_pmu *pmu;
+	int idx = 0, bit;
+
+	x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask);
+	x86_pmu.hybrid_pmu = kcalloc(x86_pmu.num_hybrid_pmus,
+				     sizeof(struct x86_hybrid_pmu),
+				     GFP_KERNEL);
+	if (!x86_pmu.hybrid_pmu)
+		return -ENOMEM;
+
+	static_branch_enable(&perf_is_hybrid);
+	x86_pmu.filter = intel_pmu_filter;
+
+	for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) {
+		pmu = &x86_pmu.hybrid_pmu[idx++];
+		pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id;
+		pmu->name = intel_hybrid_pmu_type_map[bit].name;
+
+		pmu->num_counters = x86_pmu.num_counters;
+		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
+		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+		pmu->unconstrained = (struct event_constraint)
+				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
+							0, pmu->num_counters, 0, 0);
+
+		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
+		if (pmu->pmu_type & hybrid_small) {
+			pmu->intel_cap.perf_metrics = 0;
+			pmu->intel_cap.pebs_output_pt_available = 1;
+			pmu->mid_ack = true;
+		} else if (pmu->pmu_type & hybrid_big) {
+			pmu->intel_cap.perf_metrics = 1;
+			pmu->intel_cap.pebs_output_pt_available = 0;
+			pmu->late_ack = true;
+		}
+	}
+
+	return 0;
 }
 
 static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
@@ -6602,23 +6674,14 @@ __init int intel_pmu_init(void)
 	case INTEL_FAM6_RAPTORLAKE:
 	case INTEL_FAM6_RAPTORLAKE_P:
 	case INTEL_FAM6_RAPTORLAKE_S:
-	case INTEL_FAM6_METEORLAKE:
-	case INTEL_FAM6_METEORLAKE_L:
 		/*
 		 * Alder Lake has 2 types of CPU, core and atom.
 		 *
 		 * Initialize the common PerfMon capabilities here.
 		 */
-		x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS,
-					     sizeof(struct x86_hybrid_pmu),
-					     GFP_KERNEL);
-		if (!x86_pmu.hybrid_pmu)
-			return -ENOMEM;
-		static_branch_enable(&perf_is_hybrid);
-		x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
+		intel_pmu_init_hybrid(hybrid_big_small);
 
 		x86_pmu.pebs_latency_data = adl_latency_data_small;
-		x86_pmu.filter = intel_pmu_filter;
 		x86_pmu.get_event_constraints = adl_get_event_constraints;
 		x86_pmu.hw_config = adl_hw_config;
 		x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type;
@@ -6631,10 +6694,7 @@ __init int intel_pmu_init(void)
 
 		/* Initialize big core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
-		pmu->name = "cpu_core";
-		pmu->pmu_type = hybrid_big;
 		intel_pmu_init_glc(&pmu->pmu);
-		pmu->late_ack = true;
 		if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
 			pmu->num_counters = x86_pmu.num_counters + 2;
 			pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
@@ -6659,45 +6719,45 @@ __init int intel_pmu_init(void)
 		pmu->unconstrained = (struct event_constraint)
 					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
 							   0, pmu->num_counters, 0, 0);
-		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
-		pmu->intel_cap.perf_metrics = 1;
-		pmu->intel_cap.pebs_output_pt_available = 0;
-
 		pmu->extra_regs = intel_glc_extra_regs;
 
 		/* Initialize Atom core specific PerfMon capabilities.*/
 		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
-		pmu->name = "cpu_atom";
-		pmu->pmu_type = hybrid_small;
 		intel_pmu_init_grt(&pmu->pmu);
-		pmu->mid_ack = true;
-		pmu->num_counters = x86_pmu.num_counters;
-		pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
-		pmu->max_pebs_events = x86_pmu.max_pebs_events;
-		pmu->unconstrained = (struct event_constraint)
-					__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							   0, pmu->num_counters, 0, 0);
-		pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities;
-		pmu->intel_cap.perf_metrics = 0;
-		pmu->intel_cap.pebs_output_pt_available = 1;
-
-		if (is_mtl(boot_cpu_data.x86_model)) {
-			x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs;
-			x86_pmu.pebs_latency_data = mtl_latency_data_small;
-			extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
-				mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
-			mem_attr = mtl_hybrid_mem_attrs;
-			intel_pmu_pebs_data_source_mtl();
-			x86_pmu.get_event_constraints = mtl_get_event_constraints;
-			pmu->extra_regs = intel_cmt_extra_regs;
-			pr_cont("Meteorlake Hybrid events, ");
-			name = "meteorlake_hybrid";
-		} else {
-			x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
-			intel_pmu_pebs_data_source_adl();
-			pr_cont("Alderlake Hybrid events, ");
-			name = "alderlake_hybrid";
-		}
+
+		x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX;
+		intel_pmu_pebs_data_source_adl();
+		pr_cont("Alderlake Hybrid events, ");
+		name = "alderlake_hybrid";
+		break;
+
+	case INTEL_FAM6_METEORLAKE:
+	case INTEL_FAM6_METEORLAKE_L:
+		intel_pmu_init_hybrid(hybrid_big_small);
+
+		x86_pmu.pebs_latency_data = mtl_latency_data_small;
+		x86_pmu.get_event_constraints = mtl_get_event_constraints;
+		x86_pmu.hw_config = adl_hw_config;
+
+		td_attr = adl_hybrid_events_attrs;
+		mem_attr = mtl_hybrid_mem_attrs;
+		tsx_attr = adl_hybrid_tsx_attrs;
+		extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+			mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr;
+
+		/* Initialize big core specific PerfMon capabilities.*/
+		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
+		intel_pmu_init_glc(&pmu->pmu);
+		pmu->extra_regs = intel_rwc_extra_regs;
+
+		/* Initialize Atom core specific PerfMon capabilities.*/
+		pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
+		intel_pmu_init_grt(&pmu->pmu);
+		pmu->extra_regs = intel_cmt_extra_regs;
+
+		intel_pmu_pebs_data_source_mtl();
+		pr_cont("Meteorlake Hybrid events, ");
+		name = "meteorlake_hybrid";
 		break;
 
 	default:
@@ -6809,7 +6869,7 @@ __init int intel_pmu_init(void)
 	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
 		x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
 
-	if (is_hybrid())
+	if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
 		intel_pmu_check_hybrid_pmus((u64)fixed_mask);
 
 	if (x86_pmu.intel_cap.pebs_timing_info)
-- 
cgit v1.2.3


From 950ecdc672aec9cd29036b2e2535b07c103af494 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 11 Sep 2023 06:51:28 -0700
Subject: perf/x86/intel: Fix broken fixed event constraints extension

Unnecessary multiplexing is triggered when running an "instructions"
event on an MTL.

perf stat -e cpu_core/instructions/,cpu_core/instructions/ -a sleep 1

 Performance counter stats for 'system wide':

       115,489,000      cpu_core/instructions/                (50.02%)
       127,433,777      cpu_core/instructions/                (49.98%)

       1.002294504 seconds time elapsed

Linux architectural perf events, e.g., cycles and instructions, usually
have dedicated fixed counters. These events also have equivalent events
which can be used in the general-purpose counters. The counters are
precious. In the intel_pmu_check_event_constraints(), perf check/extend
the event constraints of these events. So these events can utilize both
fixed counters and general-purpose counters.

The following cleanup commit:

  97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")

forgot adding the intel_pmu_check_event_constraints() into update_pmu_cap().
The architectural perf events cannot utilize the general-purpose counters.

The code to check and update the counters, event constraints and
extra_regs is the same among hybrid systems. Move
intel_pmu_check_hybrid_pmus() to init_hybrid_pmu(), and
emove the duplicate check in update_pmu_cap().

Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()")
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230911135128.2322833-1-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 65 ++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 39 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 83dfbba1ee69..e1543d6dc48a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4598,6 +4598,13 @@ static void intel_pmu_check_num_counters(int *num_counters,
 					 int *num_counters_fixed,
 					 u64 *intel_ctrl, u64 fixed_mask);
 
+static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints,
+					      int num_counters,
+					      int num_counters_fixed,
+					      u64 intel_ctrl);
+
+static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs);
+
 static inline bool intel_pmu_broken_perf_cap(void)
 {
 	/* The Perf Metric (Bit 15) is always cleared */
@@ -4618,12 +4625,6 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 			    &eax, &ebx, &ecx, &edx);
 		pmu->num_counters = fls(eax);
 		pmu->num_counters_fixed = fls(ebx);
-		intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
-					     &pmu->intel_ctrl, ebx);
-		pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
-		pmu->unconstrained = (struct event_constraint)
-				     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
-							0, pmu->num_counters, 0, 0);
 	}
 
 
@@ -4631,6 +4632,16 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 		/* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities);
 	}
+}
+
+static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu)
+{
+	intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed,
+				     &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1);
+	pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
+	pmu->unconstrained = (struct event_constraint)
+			     __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
+						0, pmu->num_counters, 0, 0);
 
 	if (pmu->intel_cap.perf_metrics)
 		pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
@@ -4641,6 +4652,13 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu)
 		pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
 	else
 		pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT;
+
+	intel_pmu_check_event_constraints(pmu->event_constraints,
+					  pmu->num_counters,
+					  pmu->num_counters_fixed,
+					  pmu->intel_ctrl);
+
+	intel_pmu_check_extra_regs(pmu->extra_regs);
 }
 
 static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void)
@@ -4696,6 +4714,8 @@ static bool init_hybrid_pmu(int cpu)
 	if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
 		update_pmu_cap(pmu);
 
+	intel_pmu_check_hybrid_pmus(pmu);
+
 	if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed))
 		return false;
 
@@ -5915,36 +5935,6 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs)
 	}
 }
 
-static void intel_pmu_check_hybrid_pmus(u64 fixed_mask)
-{
-	struct x86_hybrid_pmu *pmu;
-	int i;
-
-	for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
-		pmu = &x86_pmu.hybrid_pmu[i];
-
-		intel_pmu_check_num_counters(&pmu->num_counters,
-					     &pmu->num_counters_fixed,
-					     &pmu->intel_ctrl,
-					     fixed_mask);
-
-		if (pmu->intel_cap.perf_metrics) {
-			pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
-			pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS;
-		}
-
-		if (pmu->intel_cap.pebs_output_pt_available)
-			pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT;
-
-		intel_pmu_check_event_constraints(pmu->event_constraints,
-						  pmu->num_counters,
-						  pmu->num_counters_fixed,
-						  pmu->intel_ctrl);
-
-		intel_pmu_check_extra_regs(pmu->extra_regs);
-	}
-}
-
 static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = {
 	{ hybrid_small, "cpu_atom" },
 	{ hybrid_big, "cpu_core" },
@@ -6869,9 +6859,6 @@ __init int intel_pmu_init(void)
 	if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics)
 		x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS;
 
-	if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT))
-		intel_pmu_check_hybrid_pmus((u64)fixed_mask);
-
 	if (x86_pmu.intel_cap.pebs_timing_info)
 		x86_pmu.flags |= PMU_FL_RETIRE_LATENCY;
 
-- 
cgit v1.2.3


From ffbe4ab0beda55b5c467aa3d95ca14db75a84717 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@linux.intel.com>
Date: Mon, 11 Sep 2023 07:41:38 -0700
Subject: perf/x86/intel: Extend the ref-cycles event to GP counters

The current ref-cycles event is only available on the fixed counter 2.
Starting from the GLC and GRT core, the architectural UnHalted Reference
Cycles event (0x013c) which is available on general-purpose counters
can collect the exact same events as the fixed counter 2.

Update the mapping of ref-cycles to 0x013c. So the ref-cycles can be
available on both fixed counter 2 and general-purpose counters.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230911144139.2354015-1-kan.liang@linux.intel.com
---
 arch/x86/events/intel/core.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index e1543d6dc48a..a08f794a0e79 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -211,6 +211,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly =
 	EVENT_CONSTRAINT_END
 };
 
+static struct event_constraint intel_grt_event_constraints[] __read_mostly = {
+	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+	FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+	FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */
+	EVENT_CONSTRAINT_END
+};
+
 static struct event_constraint intel_skl_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0),	/* INST_RETIRED.ANY */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
@@ -314,6 +322,7 @@ static struct event_constraint intel_glc_event_constraints[] = {
 	FIXED_EVENT_CONSTRAINT(0x0100, 0),	/* INST_RETIRED.PREC_DIST */
 	FIXED_EVENT_CONSTRAINT(0x003c, 1),	/* CPU_CLK_UNHALTED.CORE */
 	FIXED_EVENT_CONSTRAINT(0x0300, 2),	/* CPU_CLK_UNHALTED.REF */
+	FIXED_EVENT_CONSTRAINT(0x013c, 2),	/* CPU_CLK_UNHALTED.REF_TSC_P */
 	FIXED_EVENT_CONSTRAINT(0x0400, 3),	/* SLOTS */
 	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0),
 	METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1),
@@ -5983,6 +5992,12 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus)
 	return 0;
 }
 
+static __always_inline void intel_pmu_ref_cycles_ext(void)
+{
+	if (!(x86_pmu.events_maskl & (INTEL_PMC_MSK_FIXED_REF_CYCLES >> INTEL_PMC_IDX_FIXED)))
+		intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c;
+}
+
 static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
 {
 	x86_pmu.late_ack = true;
@@ -6005,6 +6020,8 @@ static __always_inline void intel_pmu_init_glc(struct pmu *pmu)
 	memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 	hybrid(pmu, event_constraints) = intel_glc_event_constraints;
 	hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints;
+
+	intel_pmu_ref_cycles_ext();
 }
 
 static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
@@ -6021,9 +6038,11 @@ static __always_inline void intel_pmu_init_grt(struct pmu *pmu)
 	memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids));
 	memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 	hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
-	hybrid(pmu, event_constraints) = intel_slm_event_constraints;
+	hybrid(pmu, event_constraints) = intel_grt_event_constraints;
 	hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints;
 	hybrid(pmu, extra_regs) = intel_grt_extra_regs;
+
+	intel_pmu_ref_cycles_ext();
 }
 
 __init int intel_pmu_init(void)
-- 
cgit v1.2.3


From 1ce19bf90bd55bf54f9ed75d594029db63d395b0 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 7 Aug 2023 16:51:14 +0200
Subject: perf/x86/rapl: Stop doing cpu_relax() in the local64_cmpxchg() loop
 in rapl_event_update()

According to the following commit:

   f5fe24ef17b5 ("lockref: stop doing cpu_relax in the cmpxchg loop")

   "On the x86-64 architecture even a failing cmpxchg grants exclusive
    access to the cacheline, making it preferable to retry the failed op
    immediately instead of stalling with the pause instruction."

Based on the above observation, remove cpu_relax() from the
local64_cmpxchg() loop of rapl_event_update().

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lore.kernel.org/r/20230807145134.3176-1-ubizjak@gmail.com
---
 arch/x86/events/rapl.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 1579429846cc..e8f53b2590a5 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -184,10 +184,8 @@ again:
 	rdmsrl(event->hw.event_base, new_raw_count);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			    new_raw_count) != prev_raw_count) {
-		cpu_relax();
+			    new_raw_count) != prev_raw_count)
 		goto again;
-	}
 
 	/*
 	 * Now we have the new raw value and have updated the prev
-- 
cgit v1.2.3


From bcc6ec3d954bbcc8bec34a21c05ea536a2e96d6f Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Mon, 7 Aug 2023 16:51:15 +0200
Subject: perf/x86/rapl: Use local64_try_cmpxchg in rapl_event_update()

Use local64_try_cmpxchg() instead of local64_cmpxchg(*ptr, old, new) == old.

X86 CMPXCHG instruction returns success in ZF flag, so this change saves a
compare after CMPXCHG (and related move instruction in front of CMPXCHG).

Also, try_cmpxchg() implicitly assigns old *ptr value to "old" when CMPXCHG
fails. There is no need to re-read the value in the loop.

No functional change intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lore.kernel.org/r/20230807145134.3176-2-ubizjak@gmail.com
---
 arch/x86/events/rapl.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index e8f53b2590a5..6d3e73848643 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -179,13 +179,11 @@ static u64 rapl_event_update(struct perf_event *event)
 	s64 delta, sdelta;
 	int shift = RAPL_CNTR_WIDTH;
 
-again:
 	prev_raw_count = local64_read(&hwc->prev_count);
-	rdmsrl(event->hw.event_base, new_raw_count);
-
-	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
-			    new_raw_count) != prev_raw_count)
-		goto again;
+	do {
+		rdmsrl(event->hw.event_base, new_raw_count);
+	} while (!local64_try_cmpxchg(&hwc->prev_count,
+				      &prev_raw_count, new_raw_count));
 
 	/*
 	 * Now we have the new raw value and have updated the prev
-- 
cgit v1.2.3


From 618e77d77494bfd6273256d43eafe4368445c745 Mon Sep 17 00:00:00 2001
From: David Reaver <me@davidreaver.com>
Date: Tue, 1 Aug 2023 08:56:51 -0700
Subject: perf/x86/rapl: Fix "Using plain integer as NULL pointer" Sparse
 warning

Change 0 to NULL when initializing the test field of perf_msr structs to
avoid the following sparse warnings:

  make C=2 arch/x86/events/rapl.o

  CHECK   arch/x86/events/rapl.c
  ...
  arch/x86/events/rapl.c:540:59: warning: Using plain integer as NULL pointer
  arch/x86/events/rapl.c:542:59: warning: Using plain integer as NULL pointer
  arch/x86/events/rapl.c:543:59: warning: Using plain integer as NULL pointer
  arch/x86/events/rapl.c:544:59: warning: Using plain integer as NULL pointer

Signed-off-by: David Reaver <me@davidreaver.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20230801155651.108076-1-me@davidreaver.com
---
 arch/x86/events/rapl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index 6d3e73848643..c2c37fb0a228 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -533,11 +533,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = {
  * - want to use same event codes across both architectures
  */
 static struct perf_msr amd_rapl_msrs[] = {
-	[PERF_RAPL_PP0]  = { 0, &rapl_events_cores_group, 0, false, 0 },
+	[PERF_RAPL_PP0]  = { 0, &rapl_events_cores_group, NULL, false, 0 },
 	[PERF_RAPL_PKG]  = { MSR_AMD_PKG_ENERGY_STATUS,  &rapl_events_pkg_group,   test_msr, false, RAPL_MSR_MASK },
-	[PERF_RAPL_RAM]  = { 0, &rapl_events_ram_group,   0, false, 0 },
-	[PERF_RAPL_PP1]  = { 0, &rapl_events_gpu_group,   0, false, 0 },
-	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  0, false, 0 },
+	[PERF_RAPL_RAM]  = { 0, &rapl_events_ram_group,   NULL, false, 0 },
+	[PERF_RAPL_PP1]  = { 0, &rapl_events_gpu_group,   NULL, false, 0 },
+	[PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group,  NULL, false, 0 },
 };
 
 static int rapl_cpu_offline(unsigned int cpu)
-- 
cgit v1.2.3


From a56d5551e1993ca84dd0c69df5a3d8223d13fb5f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 6 Oct 2023 13:17:54 -0700
Subject: perf/x86/rapl: Annotate 'struct rapl_pmus' with __counted_by

Prepare for the coming implementation by GCC and Clang of the __counted_by
attribute. Flexible array members annotated with __counted_by can have
their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS=y (for
array indexing) and CONFIG_FORTIFY_SOURCE=y (for strcpy/memcpy-family
functions).

Found with Coccinelle:

  https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1]

Add __counted_by for 'struct rapl_pmus'.

No change in functionality intended.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20231006201754.work.473-kees@kernel.org
---
 arch/x86/events/rapl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c
index c2c37fb0a228..8d98d468b976 100644
--- a/arch/x86/events/rapl.c
+++ b/arch/x86/events/rapl.c
@@ -115,7 +115,7 @@ struct rapl_pmu {
 struct rapl_pmus {
 	struct pmu		pmu;
 	unsigned int		maxdie;
-	struct rapl_pmu		*pmus[];
+	struct rapl_pmu		*pmus[] __counted_by(maxdie);
 };
 
 enum rapl_unit_quirk {
-- 
cgit v1.2.3


From 38cd5b6a875adc877681faf8e3ad47fdbd6eceb5 Mon Sep 17 00:00:00 2001
From: Lucy Mielke <lucymielke@icloud.com>
Date: Mon, 9 Oct 2023 08:54:54 +0200
Subject: perf/x86/intel/pt: Fix kernel-doc comments

Some parameters or return codes were either wrong or missing,
update them.

Signed-off-by: Lucy Mielke <lucymielke@icloud.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/ZSOjQW3e2nJR4bAo@fedora.fritz.box
---
 arch/x86/events/intel/pt.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 42a55794004a..8e2a12235e62 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -736,6 +736,7 @@ static bool topa_table_full(struct topa *topa)
 /**
  * topa_insert_pages() - create a list of ToPA tables
  * @buf:	PT buffer being initialized.
+ * @cpu:	CPU on which to allocate.
  * @gfp:	Allocation flags.
  *
  * This initializes a list of ToPA tables with entries from
@@ -1207,8 +1208,11 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
 /**
  * pt_buffer_init_topa() - initialize ToPA table for pt buffer
  * @buf:	PT buffer.
- * @size:	Total size of all regions within this ToPA.
+ * @cpu:	CPU on which to allocate.
+ * @nr_pages:	No. of pages to allocate.
  * @gfp:	Allocation flags.
+ *
+ * Return:	0 on success or error code.
  */
 static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
 			       unsigned long nr_pages, gfp_t gfp)
@@ -1281,7 +1285,7 @@ out:
 
 /**
  * pt_buffer_setup_aux() - set up topa tables for a PT buffer
- * @cpu:	Cpu on which to allocate, -1 means current.
+ * @event:	Performance event
  * @pages:	Array of pointers to buffer pages passed from perf core.
  * @nr_pages:	Number of pages in the buffer.
  * @snapshot:	If this is a snapshot/overwrite counter.
-- 
cgit v1.2.3


From 05276d4831fe023b6674a72bd6b8c5b39796e690 Mon Sep 17 00:00:00 2001
From: Tero Kristo <tero.kristo@linux.intel.com>
Date: Tue, 12 Sep 2023 15:44:31 +0300
Subject: perf/x86/cstate: Allow reading the package statistics from local CPU

The MSR registers for reading the package residency counters are
available on every CPU of the package. To avoid doing unnecessary SMP
calls to read the values for these from the various CPUs inside a
package, allow reading them from any CPU of the package.

Suggested-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230912124432.3616761-2-tero.kristo@linux.intel.com
---
 arch/x86/events/intel/cstate.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index 96fffb2d521d..cbeb6d2bf5b4 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -336,6 +336,9 @@ static int cstate_pmu_event_init(struct perf_event *event)
 		cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX);
 		if (!(pkg_msr_mask & (1 << cfg)))
 			return -EINVAL;
+
+		event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
+
 		event->hw.event_base = pkg_msr[cfg].msr;
 		cpu = cpumask_any_and(&cstate_pkg_cpu_mask,
 				      topology_die_cpumask(event->cpu));
-- 
cgit v1.2.3


From d6389d3ccc136a4229a8d497899c64f80fd3c5b3 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das@amd.com>
Date: Thu, 5 Oct 2023 10:53:11 +0530
Subject: perf/x86/amd/uncore: Refactor uncore management

Since struct amd_uncore is used to manage per-cpu contexts, rename it to
amd_uncore_ctx in order to better reflect its purpose. Add a new struct
amd_uncore_pmu to encapsulate all attributes which are shared by per-cpu
contexts for a corresponding PMU. These include the number of counters,
active mask, MSR and RDPMC base addresses, etc. Since the struct pmu is
now embedded, the corresponding amd_uncore_pmu for a given event can be
found by simply using container_of().

Finally, move all PMU-specific code to separate functions. While the
original event management functions continue to provide the base
functionality, all PMU-specific quirks and customizations are applied in
separate functions.

The motivation is to simplify the management of uncore PMUs.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/24b38c49a5dae65d8c96e5d75a2b96ae97aaa651.1696425185.git.sandipan.das@amd.com
---
 arch/x86/events/amd/uncore.c | 737 ++++++++++++++++++++++---------------------
 1 file changed, 379 insertions(+), 358 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 83f15fe411b3..ffcecda13d65 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -27,56 +27,41 @@
 
 #define COUNTER_SHIFT		16
 
+#define NUM_UNCORES_MAX		2	/* DF (or NB) and L3 (or L2) */
+#define UNCORE_NAME_LEN		16
+
 #undef pr_fmt
 #define pr_fmt(fmt)	"amd_uncore: " fmt
 
 static int pmu_version;
-static int num_counters_llc;
-static int num_counters_nb;
-static bool l3_mask;
 
 static HLIST_HEAD(uncore_unused_list);
 
-struct amd_uncore {
+struct amd_uncore_ctx {
 	int id;
 	int refcnt;
 	int cpu;
-	int num_counters;
-	int rdpmc_base;
-	u32 msr_base;
-	cpumask_t *active_mask;
-	struct pmu *pmu;
 	struct perf_event **events;
 	struct hlist_node node;
 };
 
-static struct amd_uncore * __percpu *amd_uncore_nb;
-static struct amd_uncore * __percpu *amd_uncore_llc;
-
-static struct pmu amd_nb_pmu;
-static struct pmu amd_llc_pmu;
-
-static cpumask_t amd_nb_active_mask;
-static cpumask_t amd_llc_active_mask;
-
-static bool is_nb_event(struct perf_event *event)
-{
-	return event->pmu->type == amd_nb_pmu.type;
-}
+struct amd_uncore_pmu {
+	char name[UNCORE_NAME_LEN];
+	int num_counters;
+	int rdpmc_base;
+	u32 msr_base;
+	cpumask_t active_mask;
+	struct pmu pmu;
+	struct amd_uncore_ctx * __percpu *ctx;
+	int (*id)(unsigned int cpu);
+};
 
-static bool is_llc_event(struct perf_event *event)
-{
-	return event->pmu->type == amd_llc_pmu.type;
-}
+static struct amd_uncore_pmu pmus[NUM_UNCORES_MAX];
+static int num_pmus __read_mostly;
 
-static struct amd_uncore *event_to_amd_uncore(struct perf_event *event)
+static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
 {
-	if (is_nb_event(event) && amd_uncore_nb)
-		return *per_cpu_ptr(amd_uncore_nb, event->cpu);
-	else if (is_llc_event(event) && amd_uncore_llc)
-		return *per_cpu_ptr(amd_uncore_llc, event->cpu);
-
-	return NULL;
+	return container_of(event->pmu, struct amd_uncore_pmu, pmu);
 }
 
 static void amd_uncore_read(struct perf_event *event)
@@ -118,7 +103,7 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
 	hwc->state |= PERF_HES_STOPPED;
 
 	if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
-		amd_uncore_read(event);
+		event->pmu->read(event);
 		hwc->state |= PERF_HES_UPTODATE;
 	}
 }
@@ -126,15 +111,16 @@ static void amd_uncore_stop(struct perf_event *event, int flags)
 static int amd_uncore_add(struct perf_event *event, int flags)
 {
 	int i;
-	struct amd_uncore *uncore = event_to_amd_uncore(event);
+	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
 	struct hw_perf_event *hwc = &event->hw;
 
 	/* are we already assigned? */
-	if (hwc->idx != -1 && uncore->events[hwc->idx] == event)
+	if (hwc->idx != -1 && ctx->events[hwc->idx] == event)
 		goto out;
 
-	for (i = 0; i < uncore->num_counters; i++) {
-		if (uncore->events[i] == event) {
+	for (i = 0; i < pmu->num_counters; i++) {
+		if (ctx->events[i] == event) {
 			hwc->idx = i;
 			goto out;
 		}
@@ -142,8 +128,8 @@ static int amd_uncore_add(struct perf_event *event, int flags)
 
 	/* if not, take the first available counter */
 	hwc->idx = -1;
-	for (i = 0; i < uncore->num_counters; i++) {
-		if (cmpxchg(&uncore->events[i], NULL, event) == NULL) {
+	for (i = 0; i < pmu->num_counters; i++) {
+		if (cmpxchg(&ctx->events[i], NULL, event) == NULL) {
 			hwc->idx = i;
 			break;
 		}
@@ -153,23 +139,13 @@ out:
 	if (hwc->idx == -1)
 		return -EBUSY;
 
-	hwc->config_base = uncore->msr_base + (2 * hwc->idx);
-	hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx);
-	hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx;
+	hwc->config_base = pmu->msr_base + (2 * hwc->idx);
+	hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx);
+	hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 
-	/*
-	 * The first four DF counters are accessible via RDPMC index 6 to 9
-	 * followed by the L3 counters from index 10 to 15. For processors
-	 * with more than four DF counters, the DF RDPMC assignments become
-	 * discontiguous as the additional counters are accessible starting
-	 * from index 16.
-	 */
-	if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB)
-		hwc->event_base_rdpmc += NUM_COUNTERS_L3;
-
 	if (flags & PERF_EF_START)
-		amd_uncore_start(event, PERF_EF_RELOAD);
+		event->pmu->start(event, PERF_EF_RELOAD);
 
 	return 0;
 }
@@ -177,55 +153,36 @@ out:
 static void amd_uncore_del(struct perf_event *event, int flags)
 {
 	int i;
-	struct amd_uncore *uncore = event_to_amd_uncore(event);
+	struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event);
+	struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
 	struct hw_perf_event *hwc = &event->hw;
 
-	amd_uncore_stop(event, PERF_EF_UPDATE);
+	event->pmu->stop(event, PERF_EF_UPDATE);
 
-	for (i = 0; i < uncore->num_counters; i++) {
-		if (cmpxchg(&uncore->events[i], event, NULL) == event)
+	for (i = 0; i < pmu->num_counters; i++) {
+		if (cmpxchg(&ctx->events[i], event, NULL) == event)
 			break;
 	}
 
 	hwc->idx = -1;
 }
 
-/*
- * Return a full thread and slice mask unless user
- * has provided them
- */
-static u64 l3_thread_slice_mask(u64 config)
-{
-	if (boot_cpu_data.x86 <= 0x18)
-		return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
-		       ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
-
-	/*
-	 * If the user doesn't specify a threadmask, they're not trying to
-	 * count core 0, so we enable all cores & threads.
-	 * We'll also assume that they want to count slice 0 if they specify
-	 * a threadmask and leave sliceid and enallslices unpopulated.
-	 */
-	if (!(config & AMD64_L3_F19H_THREAD_MASK))
-		return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
-		       AMD64_L3_EN_ALL_CORES;
-
-	return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
-			 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
-			 AMD64_L3_COREID_MASK);
-}
-
 static int amd_uncore_event_init(struct perf_event *event)
 {
-	struct amd_uncore *uncore;
+	struct amd_uncore_pmu *pmu;
+	struct amd_uncore_ctx *ctx;
 	struct hw_perf_event *hwc = &event->hw;
-	u64 event_mask = AMD64_RAW_EVENT_MASK_NB;
 
 	if (event->attr.type != event->pmu->type)
 		return -ENOENT;
 
-	if (pmu_version >= 2 && is_nb_event(event))
-		event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB;
+	if (event->cpu < 0)
+		return -EINVAL;
+
+	pmu = event_to_amd_uncore_pmu(event);
+	ctx = *per_cpu_ptr(pmu->ctx, event->cpu);
+	if (!ctx)
+		return -ENODEV;
 
 	/*
 	 * NB and Last level cache counters (MSRs) are shared across all cores
@@ -235,28 +192,14 @@ static int amd_uncore_event_init(struct perf_event *event)
 	 * out. So we do not support sampling and per-thread events via
 	 * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts:
 	 */
-	hwc->config = event->attr.config & event_mask;
+	hwc->config = event->attr.config;
 	hwc->idx = -1;
 
-	if (event->cpu < 0)
-		return -EINVAL;
-
-	/*
-	 * SliceMask and ThreadMask need to be set for certain L3 events.
-	 * For other events, the two fields do not affect the count.
-	 */
-	if (l3_mask && is_llc_event(event))
-		hwc->config |= l3_thread_slice_mask(event->attr.config);
-
-	uncore = event_to_amd_uncore(event);
-	if (!uncore)
-		return -ENODEV;
-
 	/*
 	 * since request can come in to any of the shared cores, we will remap
 	 * to a single common cpu.
 	 */
-	event->cpu = uncore->cpu;
+	event->cpu = ctx->cpu;
 
 	return 0;
 }
@@ -278,17 +221,10 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
 {
-	cpumask_t *active_mask;
-	struct pmu *pmu = dev_get_drvdata(dev);
-
-	if (pmu->type == amd_nb_pmu.type)
-		active_mask = &amd_nb_active_mask;
-	else if (pmu->type == amd_llc_pmu.type)
-		active_mask = &amd_llc_active_mask;
-	else
-		return 0;
+	struct pmu *ptr = dev_get_drvdata(dev);
+	struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu);
 
-	return cpumap_print_to_pagebuf(true, buf, active_mask);
+	return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask);
 }
 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 
@@ -396,113 +332,57 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = {
 	NULL,
 };
 
-static struct pmu amd_nb_pmu = {
-	.task_ctx_nr	= perf_invalid_context,
-	.attr_groups	= amd_uncore_df_attr_groups,
-	.name		= "amd_nb",
-	.event_init	= amd_uncore_event_init,
-	.add		= amd_uncore_add,
-	.del		= amd_uncore_del,
-	.start		= amd_uncore_start,
-	.stop		= amd_uncore_stop,
-	.read		= amd_uncore_read,
-	.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
-	.module		= THIS_MODULE,
-};
-
-static struct pmu amd_llc_pmu = {
-	.task_ctx_nr	= perf_invalid_context,
-	.attr_groups	= amd_uncore_l3_attr_groups,
-	.attr_update	= amd_uncore_l3_attr_update,
-	.name		= "amd_l2",
-	.event_init	= amd_uncore_event_init,
-	.add		= amd_uncore_add,
-	.del		= amd_uncore_del,
-	.start		= amd_uncore_start,
-	.stop		= amd_uncore_stop,
-	.read		= amd_uncore_read,
-	.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
-	.module		= THIS_MODULE,
-};
-
-static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
-{
-	return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL,
-			cpu_to_node(cpu));
-}
-
-static inline struct perf_event **
-amd_uncore_events_alloc(unsigned int num, unsigned int cpu)
-{
-	return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL,
-			    cpu_to_node(cpu));
-}
-
 static int amd_uncore_cpu_up_prepare(unsigned int cpu)
 {
-	struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL;
-
-	if (amd_uncore_nb) {
-		*per_cpu_ptr(amd_uncore_nb, cpu) = NULL;
-		uncore_nb = amd_uncore_alloc(cpu);
-		if (!uncore_nb)
-			goto fail;
-		uncore_nb->cpu = cpu;
-		uncore_nb->num_counters = num_counters_nb;
-		uncore_nb->rdpmc_base = RDPMC_BASE_NB;
-		uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL;
-		uncore_nb->active_mask = &amd_nb_active_mask;
-		uncore_nb->pmu = &amd_nb_pmu;
-		uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu);
-		if (!uncore_nb->events)
+	struct amd_uncore_pmu *pmu;
+	struct amd_uncore_ctx *ctx;
+	int node = cpu_to_node(cpu), i;
+
+	for (i = 0; i < num_pmus; i++) {
+		pmu = &pmus[i];
+		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
+		ctx = kzalloc_node(sizeof(struct amd_uncore_ctx), GFP_KERNEL,
+				   node);
+		if (!ctx)
 			goto fail;
-		uncore_nb->id = -1;
-		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb;
-	}
 
-	if (amd_uncore_llc) {
-		*per_cpu_ptr(amd_uncore_llc, cpu) = NULL;
-		uncore_llc = amd_uncore_alloc(cpu);
-		if (!uncore_llc)
-			goto fail;
-		uncore_llc->cpu = cpu;
-		uncore_llc->num_counters = num_counters_llc;
-		uncore_llc->rdpmc_base = RDPMC_BASE_LLC;
-		uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL;
-		uncore_llc->active_mask = &amd_llc_active_mask;
-		uncore_llc->pmu = &amd_llc_pmu;
-		uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu);
-		if (!uncore_llc->events)
+		ctx->cpu = cpu;
+		ctx->events = kzalloc_node(sizeof(struct perf_event *) *
+					   pmu->num_counters, GFP_KERNEL,
+					   node);
+		if (!ctx->events)
 			goto fail;
-		uncore_llc->id = -1;
-		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc;
+
+		ctx->id = -1;
+		*per_cpu_ptr(pmu->ctx, cpu) = ctx;
 	}
 
 	return 0;
 
 fail:
-	if (uncore_nb) {
-		kfree(uncore_nb->events);
-		kfree(uncore_nb);
-	}
+	/* Rollback */
+	for (; i >= 0; i--) {
+		pmu = &pmus[i];
+		ctx = *per_cpu_ptr(pmu->ctx, cpu);
+		if (!ctx)
+			continue;
 
-	if (uncore_llc) {
-		kfree(uncore_llc->events);
-		kfree(uncore_llc);
+		kfree(ctx->events);
+		kfree(ctx);
 	}
 
 	return -ENOMEM;
 }
 
-static struct amd_uncore *
-amd_uncore_find_online_sibling(struct amd_uncore *this,
-			       struct amd_uncore * __percpu *uncores)
+static struct amd_uncore_ctx *
+amd_uncore_find_online_sibling(struct amd_uncore_ctx *this,
+			       struct amd_uncore_pmu *pmu)
 {
 	unsigned int cpu;
-	struct amd_uncore *that;
+	struct amd_uncore_ctx *that;
 
 	for_each_online_cpu(cpu) {
-		that = *per_cpu_ptr(uncores, cpu);
+		that = *per_cpu_ptr(pmu->ctx, cpu);
 
 		if (!that)
 			continue;
@@ -523,24 +403,16 @@ amd_uncore_find_online_sibling(struct amd_uncore *this,
 
 static int amd_uncore_cpu_starting(unsigned int cpu)
 {
-	unsigned int eax, ebx, ecx, edx;
-	struct amd_uncore *uncore;
-
-	if (amd_uncore_nb) {
-		uncore = *per_cpu_ptr(amd_uncore_nb, cpu);
-		cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
-		uncore->id = ecx & 0xff;
-
-		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb);
-		*per_cpu_ptr(amd_uncore_nb, cpu) = uncore;
-	}
-
-	if (amd_uncore_llc) {
-		uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
-		uncore->id = get_llc_id(cpu);
+	struct amd_uncore_pmu *pmu;
+	struct amd_uncore_ctx *ctx;
+	int i;
 
-		uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
-		*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
+	for (i = 0; i < num_pmus; i++) {
+		pmu = &pmus[i];
+		ctx = *per_cpu_ptr(pmu->ctx, cpu);
+		ctx->id = pmu->id(cpu);
+		ctx = amd_uncore_find_online_sibling(ctx, pmu);
+		*per_cpu_ptr(pmu->ctx, cpu) = ctx;
 	}
 
 	return 0;
@@ -548,195 +420,359 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
 
 static void uncore_clean_online(void)
 {
-	struct amd_uncore *uncore;
+	struct amd_uncore_ctx *ctx;
 	struct hlist_node *n;
 
-	hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) {
-		hlist_del(&uncore->node);
-		kfree(uncore->events);
-		kfree(uncore);
+	hlist_for_each_entry_safe(ctx, n, &uncore_unused_list, node) {
+		hlist_del(&ctx->node);
+		kfree(ctx->events);
+		kfree(ctx);
 	}
 }
 
-static void uncore_online(unsigned int cpu,
-			  struct amd_uncore * __percpu *uncores)
+static int amd_uncore_cpu_online(unsigned int cpu)
 {
-	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+	struct amd_uncore_pmu *pmu;
+	struct amd_uncore_ctx *ctx;
+	int i;
 
 	uncore_clean_online();
 
-	if (cpu == uncore->cpu)
-		cpumask_set_cpu(cpu, uncore->active_mask);
+	for (i = 0; i < num_pmus; i++) {
+		pmu = &pmus[i];
+		ctx = *per_cpu_ptr(pmu->ctx, cpu);
+		if (cpu == ctx->cpu)
+			cpumask_set_cpu(cpu, &pmu->active_mask);
+	}
+
+	return 0;
 }
 
-static int amd_uncore_cpu_online(unsigned int cpu)
+static int amd_uncore_cpu_down_prepare(unsigned int cpu)
 {
-	if (amd_uncore_nb)
-		uncore_online(cpu, amd_uncore_nb);
+	struct amd_uncore_ctx *this, *that;
+	struct amd_uncore_pmu *pmu;
+	int i, j;
+
+	for (i = 0; i < num_pmus; i++) {
+		pmu = &pmus[i];
+		this = *per_cpu_ptr(pmu->ctx, cpu);
+
+		/* this cpu is going down, migrate to a shared sibling if possible */
+		for_each_online_cpu(j) {
+			that = *per_cpu_ptr(pmu->ctx, j);
+
+			if (cpu == j)
+				continue;
+
+			if (this == that) {
+				perf_pmu_migrate_context(&pmu->pmu, cpu, j);
+				cpumask_clear_cpu(cpu, &pmu->active_mask);
+				cpumask_set_cpu(j, &pmu->active_mask);
+				that->cpu = j;
+				break;
+			}
+		}
+	}
 
-	if (amd_uncore_llc)
-		uncore_online(cpu, amd_uncore_llc);
+	return 0;
+}
+
+static int amd_uncore_cpu_dead(unsigned int cpu)
+{
+	struct amd_uncore_ctx *ctx;
+	struct amd_uncore_pmu *pmu;
+	int i;
+
+	for (i = 0; i < num_pmus; i++) {
+		pmu = &pmus[i];
+		ctx = *per_cpu_ptr(pmu->ctx, cpu);
+		if (cpu == ctx->cpu)
+			cpumask_clear_cpu(cpu, &pmu->active_mask);
+
+		if (!--ctx->refcnt) {
+			kfree(ctx->events);
+			kfree(ctx);
+		}
+
+		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
+	}
 
 	return 0;
 }
 
-static void uncore_down_prepare(unsigned int cpu,
-				struct amd_uncore * __percpu *uncores)
+static int amd_uncore_df_id(unsigned int cpu)
 {
-	unsigned int i;
-	struct amd_uncore *this = *per_cpu_ptr(uncores, cpu);
+	unsigned int eax, ebx, ecx, edx;
 
-	if (this->cpu != cpu)
-		return;
+	cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
 
-	/* this cpu is going down, migrate to a shared sibling if possible */
-	for_each_online_cpu(i) {
-		struct amd_uncore *that = *per_cpu_ptr(uncores, i);
+	return ecx & 0xff;
+}
 
-		if (cpu == i)
-			continue;
+static int amd_uncore_df_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = amd_uncore_event_init(event);
 
-		if (this == that) {
-			perf_pmu_migrate_context(this->pmu, cpu, i);
-			cpumask_clear_cpu(cpu, that->active_mask);
-			cpumask_set_cpu(i, that->active_mask);
-			that->cpu = i;
-			break;
-		}
-	}
+	if (ret || pmu_version < 2)
+		return ret;
+
+	hwc->config = event->attr.config &
+		      (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB :
+					  AMD64_RAW_EVENT_MASK_NB);
+
+	return 0;
 }
 
-static int amd_uncore_cpu_down_prepare(unsigned int cpu)
+static int amd_uncore_df_add(struct perf_event *event, int flags)
 {
-	if (amd_uncore_nb)
-		uncore_down_prepare(cpu, amd_uncore_nb);
+	int ret = amd_uncore_add(event, flags & ~PERF_EF_START);
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (ret)
+		return ret;
+
+	/*
+	 * The first four DF counters are accessible via RDPMC index 6 to 9
+	 * followed by the L3 counters from index 10 to 15. For processors
+	 * with more than four DF counters, the DF RDPMC assignments become
+	 * discontiguous as the additional counters are accessible starting
+	 * from index 16.
+	 */
+	if (hwc->idx >= NUM_COUNTERS_NB)
+		hwc->event_base_rdpmc += NUM_COUNTERS_L3;
 
-	if (amd_uncore_llc)
-		uncore_down_prepare(cpu, amd_uncore_llc);
+	/* Delayed start after rdpmc base update */
+	if (flags & PERF_EF_START)
+		amd_uncore_start(event, PERF_EF_RELOAD);
 
 	return 0;
 }
 
-static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores)
+static int amd_uncore_df_init(void)
 {
-	struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu);
+	struct attribute **df_attr = amd_uncore_df_format_attr;
+	struct amd_uncore_pmu *pmu = &pmus[num_pmus];
+	union cpuid_0x80000022_ebx ebx;
+	int ret;
 
-	if (cpu == uncore->cpu)
-		cpumask_clear_cpu(cpu, uncore->active_mask);
+	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
+		return 0;
 
-	if (!--uncore->refcnt) {
-		kfree(uncore->events);
-		kfree(uncore);
+	/*
+	 * For Family 17h and above, the Northbridge counters are repurposed
+	 * as Data Fabric counters. The PMUs are exported based on family as
+	 * either NB or DF.
+	 */
+	strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
+		sizeof(pmu->name));
+
+	pmu->num_counters = NUM_COUNTERS_NB;
+	pmu->msr_base = MSR_F15H_NB_PERF_CTL;
+	pmu->rdpmc_base = RDPMC_BASE_NB;
+	pmu->id = amd_uncore_df_id;
+
+	if (pmu_version >= 2) {
+		*df_attr++ = &format_attr_event14v2.attr;
+		*df_attr++ = &format_attr_umask12.attr;
+		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+		pmu->num_counters = ebx.split.num_df_pmc;
+	} else if (boot_cpu_data.x86 >= 0x17) {
+		*df_attr = &format_attr_event14.attr;
+	}
+
+	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+	if (!pmu->ctx)
+		return -ENOMEM;
+
+	pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+		.attr_groups	= amd_uncore_df_attr_groups,
+		.name		= pmu->name,
+		.event_init	= amd_uncore_df_event_init,
+		.add		= amd_uncore_df_add,
+		.del		= amd_uncore_del,
+		.start		= amd_uncore_start,
+		.stop		= amd_uncore_stop,
+		.read		= amd_uncore_read,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+		.module		= THIS_MODULE,
+	};
+
+	ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1);
+	if (ret) {
+		free_percpu(pmu->ctx);
+		pmu->ctx = NULL;
+		return ret;
 	}
 
-	*per_cpu_ptr(uncores, cpu) = NULL;
+	pr_info("%d %s %s counters detected\n", pmu->num_counters,
+		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
+		pmu->pmu.name);
+
+	num_pmus++;
+
+	return 0;
 }
 
-static int amd_uncore_cpu_dead(unsigned int cpu)
+static int amd_uncore_l3_id(unsigned int cpu)
 {
-	if (amd_uncore_nb)
-		uncore_dead(cpu, amd_uncore_nb);
+	return get_llc_id(cpu);
+}
+
+static int amd_uncore_l3_event_init(struct perf_event *event)
+{
+	int ret = amd_uncore_event_init(event);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 config = event->attr.config;
+	u64 mask;
+
+	hwc->config = config & AMD64_RAW_EVENT_MASK_NB;
+
+	/*
+	 * SliceMask and ThreadMask need to be set for certain L3 events.
+	 * For other events, the two fields do not affect the count.
+	 */
+	if (ret || boot_cpu_data.x86 < 0x17)
+		return ret;
+
+	mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK |
+			 AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES |
+			 AMD64_L3_COREID_MASK);
+
+	if (boot_cpu_data.x86 <= 0x18)
+		mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) |
+		       ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK);
+
+	/*
+	 * If the user doesn't specify a ThreadMask, they're not trying to
+	 * count core 0, so we enable all cores & threads.
+	 * We'll also assume that they want to count slice 0 if they specify
+	 * a ThreadMask and leave SliceId and EnAllSlices unpopulated.
+	 */
+	else if (!(config & AMD64_L3_F19H_THREAD_MASK))
+		mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES |
+		       AMD64_L3_EN_ALL_CORES;
 
-	if (amd_uncore_llc)
-		uncore_dead(cpu, amd_uncore_llc);
+	hwc->config |= mask;
 
 	return 0;
 }
 
-static int __init amd_uncore_init(void)
+static int amd_uncore_l3_init(void)
 {
-	struct attribute **df_attr = amd_uncore_df_format_attr;
 	struct attribute **l3_attr = amd_uncore_l3_format_attr;
-	union cpuid_0x80000022_ebx ebx;
-	int ret = -ENODEV;
+	struct amd_uncore_pmu *pmu = &pmus[num_pmus];
+	int ret;
 
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
-	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
-		return -ENODEV;
+	if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
+		return 0;
 
-	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
-		return -ENODEV;
+	/*
+	 * For Family 17h and above, L3 cache counters are available instead
+	 * of L2 cache counters. The PMUs are exported based on family as
+	 * either L2 or L3.
+	 */
+	strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
+		sizeof(pmu->name));
 
-	if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
-		pmu_version = 2;
+	pmu->num_counters = NUM_COUNTERS_L2;
+	pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
+	pmu->rdpmc_base = RDPMC_BASE_LLC;
+	pmu->id = amd_uncore_l3_id;
 
-	num_counters_nb	= NUM_COUNTERS_NB;
-	num_counters_llc = NUM_COUNTERS_L2;
 	if (boot_cpu_data.x86 >= 0x17) {
-		/*
-		 * For F17h and above, the Northbridge counters are
-		 * repurposed as Data Fabric counters. Also, L3
-		 * counters are supported too. The PMUs are exported
-		 * based on family as either L2 or L3 and NB or DF.
-		 */
-		num_counters_llc	  = NUM_COUNTERS_L3;
-		amd_nb_pmu.name		  = "amd_df";
-		amd_llc_pmu.name	  = "amd_l3";
-		l3_mask			  = true;
+		*l3_attr++ = &format_attr_event8.attr;
+		*l3_attr++ = &format_attr_umask8.attr;
+		*l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
+			     &format_attr_threadmask2.attr :
+			     &format_attr_threadmask8.attr;
+		pmu->num_counters = NUM_COUNTERS_L3;
 	}
 
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
-		if (pmu_version >= 2) {
-			*df_attr++ = &format_attr_event14v2.attr;
-			*df_attr++ = &format_attr_umask12.attr;
-		} else if (boot_cpu_data.x86 >= 0x17) {
-			*df_attr = &format_attr_event14.attr;
-		}
+	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+	if (!pmu->ctx)
+		return -ENOMEM;
+
+	pmu->pmu = (struct pmu) {
+		.task_ctx_nr	= perf_invalid_context,
+		.attr_groups	= amd_uncore_l3_attr_groups,
+		.attr_update	= amd_uncore_l3_attr_update,
+		.name		= pmu->name,
+		.event_init	= amd_uncore_l3_event_init,
+		.add		= amd_uncore_add,
+		.del		= amd_uncore_del,
+		.start		= amd_uncore_start,
+		.stop		= amd_uncore_stop,
+		.read		= amd_uncore_read,
+		.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+		.module		= THIS_MODULE,
+	};
+
+	ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1);
+	if (ret) {
+		free_percpu(pmu->ctx);
+		pmu->ctx = NULL;
+		return ret;
+	}
 
-		amd_uncore_nb = alloc_percpu(struct amd_uncore *);
-		if (!amd_uncore_nb) {
-			ret = -ENOMEM;
-			goto fail_nb;
-		}
-		ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1);
-		if (ret)
-			goto fail_nb;
+	pr_info("%d %s %s counters detected\n", pmu->num_counters,
+		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
+		pmu->pmu.name);
 
-		if (pmu_version >= 2) {
-			ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
-			num_counters_nb = ebx.split.num_df_pmc;
-		}
+	num_pmus++;
 
-		pr_info("%d %s %s counters detected\n", num_counters_nb,
-			boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
-			amd_nb_pmu.name);
+	return 0;
+}
 
-		ret = 0;
-	}
+static void uncore_free(void)
+{
+	struct amd_uncore_pmu *pmu;
+	int i;
 
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
-		if (boot_cpu_data.x86 >= 0x19) {
-			*l3_attr++ = &format_attr_event8.attr;
-			*l3_attr++ = &format_attr_umask8.attr;
-			*l3_attr++ = &format_attr_threadmask2.attr;
-		} else if (boot_cpu_data.x86 >= 0x17) {
-			*l3_attr++ = &format_attr_event8.attr;
-			*l3_attr++ = &format_attr_umask8.attr;
-			*l3_attr++ = &format_attr_threadmask8.attr;
-		}
+	for (i = 0; i < num_pmus; i++) {
+		pmu = &pmus[i];
+		if (!pmu->ctx)
+			continue;
 
-		amd_uncore_llc = alloc_percpu(struct amd_uncore *);
-		if (!amd_uncore_llc) {
-			ret = -ENOMEM;
-			goto fail_llc;
-		}
-		ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1);
-		if (ret)
-			goto fail_llc;
-
-		pr_info("%d %s %s counters detected\n", num_counters_llc,
-			boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
-			amd_llc_pmu.name);
-		ret = 0;
+		perf_pmu_unregister(&pmu->pmu);
+		free_percpu(pmu->ctx);
+		pmu->ctx = NULL;
 	}
 
+	num_pmus = 0;
+}
+
+static int __init amd_uncore_init(void)
+{
+	int ret;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+		return -ENODEV;
+
+	if (!boot_cpu_has(X86_FEATURE_TOPOEXT))
+		return -ENODEV;
+
+	if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
+		pmu_version = 2;
+
+	ret = amd_uncore_df_init();
+	if (ret)
+		goto fail;
+
+	ret = amd_uncore_l3_init();
+	if (ret)
+		goto fail;
+
 	/*
 	 * Install callbacks. Core will call them for each online cpu.
 	 */
 	if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
 			      "perf/x86/amd/uncore:prepare",
 			      amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
-		goto fail_llc;
+		goto fail;
 
 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
 			      "perf/x86/amd/uncore:starting",
@@ -753,12 +789,8 @@ fail_start:
 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
 fail_prep:
 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
-fail_llc:
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB))
-		perf_pmu_unregister(&amd_nb_pmu);
-	free_percpu(amd_uncore_llc);
-fail_nb:
-	free_percpu(amd_uncore_nb);
+fail:
+	uncore_free();
 
 	return ret;
 }
@@ -768,18 +800,7 @@ static void __exit amd_uncore_exit(void)
 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
-
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) {
-		perf_pmu_unregister(&amd_llc_pmu);
-		free_percpu(amd_uncore_llc);
-		amd_uncore_llc = NULL;
-	}
-
-	if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) {
-		perf_pmu_unregister(&amd_nb_pmu);
-		free_percpu(amd_uncore_nb);
-		amd_uncore_nb = NULL;
-	}
+	uncore_free();
 }
 
 module_init(amd_uncore_init);
-- 
cgit v1.2.3


From 07888daa056e809de0b6b234116b575c11f9f99d Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das@amd.com>
Date: Thu, 5 Oct 2023 10:53:12 +0530
Subject: perf/x86/amd/uncore: Move discovery and registration

Uncore PMUs have traditionally been registered in the module init path.
This is fine for the existing DF and L3 PMUs since the CPUID information
does not vary across CPUs but not for the memory controller (UMC) PMUs
since information like active memory channels can vary for each socket
depending on how the DIMMs have been physically populated.

To overcome this, the discovery of PMU information using CPUID is moved
to the startup of UNCORE_STARTING. This cannot be done in the startup of
UNCORE_PREP since the hotplug callback does not run on the CPU that is
being brought online.

Previously, the startup of UNCORE_PREP was used for allocating uncore
contexts following which, the startup of UNCORE_STARTING was used to
find and reuse an existing sibling context, if possible. Any unused
contexts were added to a list for reclaimation later during the startup
of UNCORE_ONLINE.

Since all required CPUID info is now available only after the startup of
UNCORE_STARTING has completed, context allocation has been moved to the
startup of UNCORE_ONLINE. Before allocating contexts, the first CPU that
comes online has to take up the additional responsibility of registering
the PMUs. This is a one-time process though. Since sibling discovery now
happens prior to deciding whether a new context is required, there is no
longer a need to track and free up unused contexts.

The teardown of UNCORE_ONLINE and UNCORE_PREP functionally remain the
same.

Overall, the flow of control described above is achieved using the
following handlers for managing uncore PMUs. It is mandatory to define
them for each type of uncore PMU.

  * scan() runs during startup of UNCORE_STARTING and collects PMU info
    using CPUID.

  * init() runs during startup of UNCORE_ONLINE, registers PMUs and sets
    up uncore contexts.

  * move() runs during teardown of UNCORE_ONLINE and migrates uncore
    contexts to a shared sibling, if possible.

  * free() runs during teardown of UNCORE_PREP and frees up uncore
    contexts.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/e6c447e48872fcab8452e0dd81b1c9cb09f39eb4.1696425185.git.sandipan.das@amd.com
---
 arch/x86/events/amd/uncore.c | 494 ++++++++++++++++++++++++++-----------------
 1 file changed, 305 insertions(+), 189 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index ffcecda13d65..ff1d09cc07ad 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -26,8 +26,6 @@
 #define RDPMC_BASE_LLC		10
 
 #define COUNTER_SHIFT		16
-
-#define NUM_UNCORES_MAX		2	/* DF (or NB) and L3 (or L2) */
 #define UNCORE_NAME_LEN		16
 
 #undef pr_fmt
@@ -35,10 +33,7 @@
 
 static int pmu_version;
 
-static HLIST_HEAD(uncore_unused_list);
-
 struct amd_uncore_ctx {
-	int id;
 	int refcnt;
 	int cpu;
 	struct perf_event **events;
@@ -53,11 +48,36 @@ struct amd_uncore_pmu {
 	cpumask_t active_mask;
 	struct pmu pmu;
 	struct amd_uncore_ctx * __percpu *ctx;
-	int (*id)(unsigned int cpu);
 };
 
-static struct amd_uncore_pmu pmus[NUM_UNCORES_MAX];
-static int num_pmus __read_mostly;
+enum {
+	UNCORE_TYPE_DF,
+	UNCORE_TYPE_L3,
+
+	UNCORE_TYPE_MAX
+};
+
+union amd_uncore_info {
+	struct {
+		u64	aux_data:32;	/* auxiliary data */
+		u64	num_pmcs:8;	/* number of counters */
+		u64	cid:8;		/* context id */
+	} split;
+	u64		full;
+};
+
+struct amd_uncore {
+	union amd_uncore_info * __percpu info;
+	struct amd_uncore_pmu *pmus;
+	unsigned int num_pmus;
+	bool init_done;
+	void (*scan)(struct amd_uncore *uncore, unsigned int cpu);
+	int  (*init)(struct amd_uncore *uncore, unsigned int cpu);
+	void (*move)(struct amd_uncore *uncore, unsigned int cpu);
+	void (*free)(struct amd_uncore *uncore, unsigned int cpu);
+};
+
+static struct amd_uncore uncores[UNCORE_TYPE_MAX];
 
 static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event)
 {
@@ -332,182 +352,192 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = {
 	NULL,
 };
 
-static int amd_uncore_cpu_up_prepare(unsigned int cpu)
+static __always_inline
+int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
 {
-	struct amd_uncore_pmu *pmu;
-	struct amd_uncore_ctx *ctx;
-	int node = cpu_to_node(cpu), i;
-
-	for (i = 0; i < num_pmus; i++) {
-		pmu = &pmus[i];
-		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
-		ctx = kzalloc_node(sizeof(struct amd_uncore_ctx), GFP_KERNEL,
-				   node);
-		if (!ctx)
-			goto fail;
-
-		ctx->cpu = cpu;
-		ctx->events = kzalloc_node(sizeof(struct perf_event *) *
-					   pmu->num_counters, GFP_KERNEL,
-					   node);
-		if (!ctx->events)
-			goto fail;
-
-		ctx->id = -1;
-		*per_cpu_ptr(pmu->ctx, cpu) = ctx;
-	}
-
-	return 0;
-
-fail:
-	/* Rollback */
-	for (; i >= 0; i--) {
-		pmu = &pmus[i];
-		ctx = *per_cpu_ptr(pmu->ctx, cpu);
-		if (!ctx)
-			continue;
-
-		kfree(ctx->events);
-		kfree(ctx);
-	}
+	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
+	return info->split.cid;
+}
 
-	return -ENOMEM;
+static __always_inline
+int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
+{
+	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
+	return info->split.num_pmcs;
 }
 
-static struct amd_uncore_ctx *
-amd_uncore_find_online_sibling(struct amd_uncore_ctx *this,
-			       struct amd_uncore_pmu *pmu)
+static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu)
 {
-	unsigned int cpu;
-	struct amd_uncore_ctx *that;
+	struct amd_uncore_pmu *pmu;
+	struct amd_uncore_ctx *ctx;
+	int i;
 
-	for_each_online_cpu(cpu) {
-		that = *per_cpu_ptr(pmu->ctx, cpu);
+	if (!uncore->init_done)
+		return;
 
-		if (!that)
+	for (i = 0; i < uncore->num_pmus; i++) {
+		pmu = &uncore->pmus[i];
+		ctx = *per_cpu_ptr(pmu->ctx, cpu);
+		if (!ctx)
 			continue;
 
-		if (this == that)
-			continue;
+		if (cpu == ctx->cpu)
+			cpumask_clear_cpu(cpu, &pmu->active_mask);
 
-		if (this->id == that->id) {
-			hlist_add_head(&this->node, &uncore_unused_list);
-			this = that;
-			break;
+		if (!--ctx->refcnt) {
+			kfree(ctx->events);
+			kfree(ctx);
 		}
-	}
 
-	this->refcnt++;
-	return this;
+		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
+	}
 }
 
-static int amd_uncore_cpu_starting(unsigned int cpu)
+static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 {
+	struct amd_uncore_ctx *curr, *prev;
 	struct amd_uncore_pmu *pmu;
-	struct amd_uncore_ctx *ctx;
-	int i;
+	int node, cid, i, j;
 
-	for (i = 0; i < num_pmus; i++) {
-		pmu = &pmus[i];
-		ctx = *per_cpu_ptr(pmu->ctx, cpu);
-		ctx->id = pmu->id(cpu);
-		ctx = amd_uncore_find_online_sibling(ctx, pmu);
-		*per_cpu_ptr(pmu->ctx, cpu) = ctx;
-	}
+	if (!uncore->init_done || !uncore->num_pmus)
+		return 0;
 
-	return 0;
-}
+	cid = amd_uncore_ctx_cid(uncore, cpu);
 
-static void uncore_clean_online(void)
-{
-	struct amd_uncore_ctx *ctx;
-	struct hlist_node *n;
+	for (i = 0; i < uncore->num_pmus; i++) {
+		pmu = &uncore->pmus[i];
+		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
+		curr = NULL;
 
-	hlist_for_each_entry_safe(ctx, n, &uncore_unused_list, node) {
-		hlist_del(&ctx->node);
-		kfree(ctx->events);
-		kfree(ctx);
-	}
-}
+		/* Find a sibling context */
+		for_each_online_cpu(j) {
+			if (cpu == j)
+				continue;
 
-static int amd_uncore_cpu_online(unsigned int cpu)
-{
-	struct amd_uncore_pmu *pmu;
-	struct amd_uncore_ctx *ctx;
-	int i;
+			prev = *per_cpu_ptr(pmu->ctx, j);
+			if (!prev)
+				continue;
 
-	uncore_clean_online();
+			if (cid == amd_uncore_ctx_cid(uncore, j)) {
+				curr = prev;
+				break;
+			}
+		}
+
+		/* Allocate context if sibling does not exist */
+		if (!curr) {
+			node = cpu_to_node(cpu);
+			curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node);
+			if (!curr)
+				goto fail;
+
+			curr->cpu = cpu;
+			curr->events = kzalloc_node(sizeof(*curr->events) *
+						    pmu->num_counters,
+						    GFP_KERNEL, node);
+			if (!curr->events) {
+				kfree(curr);
+				goto fail;
+			}
 
-	for (i = 0; i < num_pmus; i++) {
-		pmu = &pmus[i];
-		ctx = *per_cpu_ptr(pmu->ctx, cpu);
-		if (cpu == ctx->cpu)
 			cpumask_set_cpu(cpu, &pmu->active_mask);
+		}
+
+		curr->refcnt++;
+		*per_cpu_ptr(pmu->ctx, cpu) = curr;
 	}
 
 	return 0;
+
+fail:
+	amd_uncore_ctx_free(uncore, cpu);
+
+	return -ENOMEM;
 }
 
-static int amd_uncore_cpu_down_prepare(unsigned int cpu)
+static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu)
 {
-	struct amd_uncore_ctx *this, *that;
+	struct amd_uncore_ctx *curr, *next;
 	struct amd_uncore_pmu *pmu;
 	int i, j;
 
-	for (i = 0; i < num_pmus; i++) {
-		pmu = &pmus[i];
-		this = *per_cpu_ptr(pmu->ctx, cpu);
+	if (!uncore->init_done)
+		return;
 
-		/* this cpu is going down, migrate to a shared sibling if possible */
-		for_each_online_cpu(j) {
-			that = *per_cpu_ptr(pmu->ctx, j);
+	for (i = 0; i < uncore->num_pmus; i++) {
+		pmu = &uncore->pmus[i];
+		curr = *per_cpu_ptr(pmu->ctx, cpu);
+		if (!curr)
+			continue;
 
-			if (cpu == j)
+		/* Migrate to a shared sibling if possible */
+		for_each_online_cpu(j) {
+			next = *per_cpu_ptr(pmu->ctx, j);
+			if (!next || cpu == j)
 				continue;
 
-			if (this == that) {
+			if (curr == next) {
 				perf_pmu_migrate_context(&pmu->pmu, cpu, j);
 				cpumask_clear_cpu(cpu, &pmu->active_mask);
 				cpumask_set_cpu(j, &pmu->active_mask);
-				that->cpu = j;
+				next->cpu = j;
 				break;
 			}
 		}
 	}
+}
+
+static int amd_uncore_cpu_starting(unsigned int cpu)
+{
+	struct amd_uncore *uncore;
+	int i;
+
+	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
+		uncore = &uncores[i];
+		uncore->scan(uncore, cpu);
+	}
 
 	return 0;
 }
 
-static int amd_uncore_cpu_dead(unsigned int cpu)
+static int amd_uncore_cpu_online(unsigned int cpu)
 {
-	struct amd_uncore_ctx *ctx;
-	struct amd_uncore_pmu *pmu;
+	struct amd_uncore *uncore;
 	int i;
 
-	for (i = 0; i < num_pmus; i++) {
-		pmu = &pmus[i];
-		ctx = *per_cpu_ptr(pmu->ctx, cpu);
-		if (cpu == ctx->cpu)
-			cpumask_clear_cpu(cpu, &pmu->active_mask);
+	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
+		uncore = &uncores[i];
+		if (uncore->init(uncore, cpu))
+			break;
+	}
 
-		if (!--ctx->refcnt) {
-			kfree(ctx->events);
-			kfree(ctx);
-		}
+	return 0;
+}
 
-		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
+static int amd_uncore_cpu_down_prepare(unsigned int cpu)
+{
+	struct amd_uncore *uncore;
+	int i;
+
+	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
+		uncore = &uncores[i];
+		uncore->move(uncore, cpu);
 	}
 
 	return 0;
 }
 
-static int amd_uncore_df_id(unsigned int cpu)
+static int amd_uncore_cpu_dead(unsigned int cpu)
 {
-	unsigned int eax, ebx, ecx, edx;
+	struct amd_uncore *uncore;
+	int i;
 
-	cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
+		uncore = &uncores[i];
+		uncore->free(uncore, cpu);
+	}
 
-	return ecx & 0xff;
+	return 0;
 }
 
 static int amd_uncore_df_event_init(struct perf_event *event)
@@ -550,41 +580,66 @@ static int amd_uncore_df_add(struct perf_event *event, int flags)
 	return 0;
 }
 
-static int amd_uncore_df_init(void)
+static
+void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
 {
-	struct attribute **df_attr = amd_uncore_df_format_attr;
-	struct amd_uncore_pmu *pmu = &pmus[num_pmus];
 	union cpuid_0x80000022_ebx ebx;
-	int ret;
+	union amd_uncore_info info;
 
 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB))
-		return 0;
+		return;
+
+	info.split.aux_data = 0;
+	info.split.num_pmcs = NUM_COUNTERS_NB;
+	info.split.cid = topology_die_id(cpu);
+
+	if (pmu_version >= 2) {
+		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
+		info.split.num_pmcs = ebx.split.num_df_pmc;
+	}
+
+	*per_cpu_ptr(uncore->info, cpu) = info;
+}
+
+static
+int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
+{
+	struct attribute **df_attr = amd_uncore_df_format_attr;
+	struct amd_uncore_pmu *pmu;
+
+	/* Run just once */
+	if (uncore->init_done)
+		return amd_uncore_ctx_init(uncore, cpu);
+
+	/* No grouping, single instance for a system */
+	uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
+	if (!uncore->pmus) {
+		uncore->num_pmus = 0;
+		goto done;
+	}
 
 	/*
 	 * For Family 17h and above, the Northbridge counters are repurposed
 	 * as Data Fabric counters. The PMUs are exported based on family as
 	 * either NB or DF.
 	 */
+	pmu = &uncore->pmus[0];
 	strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb",
 		sizeof(pmu->name));
-
-	pmu->num_counters = NUM_COUNTERS_NB;
+	pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
 	pmu->msr_base = MSR_F15H_NB_PERF_CTL;
 	pmu->rdpmc_base = RDPMC_BASE_NB;
-	pmu->id = amd_uncore_df_id;
 
 	if (pmu_version >= 2) {
 		*df_attr++ = &format_attr_event14v2.attr;
 		*df_attr++ = &format_attr_umask12.attr;
-		ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES);
-		pmu->num_counters = ebx.split.num_df_pmc;
 	} else if (boot_cpu_data.x86 >= 0x17) {
 		*df_attr = &format_attr_event14.attr;
 	}
 
 	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
 	if (!pmu->ctx)
-		return -ENOMEM;
+		goto done;
 
 	pmu->pmu = (struct pmu) {
 		.task_ctx_nr	= perf_invalid_context,
@@ -600,25 +655,22 @@ static int amd_uncore_df_init(void)
 		.module		= THIS_MODULE,
 	};
 
-	ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1);
-	if (ret) {
+	if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
 		free_percpu(pmu->ctx);
 		pmu->ctx = NULL;
-		return ret;
+		goto done;
 	}
 
-	pr_info("%d %s %s counters detected\n", pmu->num_counters,
-		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
+	pr_info("%d %s%s counters detected\n", pmu->num_counters,
+		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
 		pmu->pmu.name);
 
-	num_pmus++;
+	uncore->num_pmus = 1;
 
-	return 0;
-}
+done:
+	uncore->init_done = true;
 
-static int amd_uncore_l3_id(unsigned int cpu)
-{
-	return get_llc_id(cpu);
+	return amd_uncore_ctx_init(uncore, cpu);
 }
 
 static int amd_uncore_l3_event_init(struct perf_event *event)
@@ -660,27 +712,52 @@ static int amd_uncore_l3_event_init(struct perf_event *event)
 	return 0;
 }
 
-static int amd_uncore_l3_init(void)
+static
+void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
 {
-	struct attribute **l3_attr = amd_uncore_l3_format_attr;
-	struct amd_uncore_pmu *pmu = &pmus[num_pmus];
-	int ret;
+	union amd_uncore_info info;
 
 	if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC))
-		return 0;
+		return;
+
+	info.split.aux_data = 0;
+	info.split.num_pmcs = NUM_COUNTERS_L2;
+	info.split.cid = get_llc_id(cpu);
+
+	if (boot_cpu_data.x86 >= 0x17)
+		info.split.num_pmcs = NUM_COUNTERS_L3;
+
+	*per_cpu_ptr(uncore->info, cpu) = info;
+}
+
+static
+int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
+{
+	struct attribute **l3_attr = amd_uncore_l3_format_attr;
+	struct amd_uncore_pmu *pmu;
+
+	/* Run just once */
+	if (uncore->init_done)
+		return amd_uncore_ctx_init(uncore, cpu);
+
+	/* No grouping, single instance for a system */
+	uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL);
+	if (!uncore->pmus) {
+		uncore->num_pmus = 0;
+		goto done;
+	}
 
 	/*
 	 * For Family 17h and above, L3 cache counters are available instead
 	 * of L2 cache counters. The PMUs are exported based on family as
 	 * either L2 or L3.
 	 */
+	pmu = &uncore->pmus[0];
 	strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2",
 		sizeof(pmu->name));
-
-	pmu->num_counters = NUM_COUNTERS_L2;
+	pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
 	pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
 	pmu->rdpmc_base = RDPMC_BASE_LLC;
-	pmu->id = amd_uncore_l3_id;
 
 	if (boot_cpu_data.x86 >= 0x17) {
 		*l3_attr++ = &format_attr_event8.attr;
@@ -688,12 +765,11 @@ static int amd_uncore_l3_init(void)
 		*l3_attr++ = boot_cpu_data.x86 >= 0x19 ?
 			     &format_attr_threadmask2.attr :
 			     &format_attr_threadmask8.attr;
-		pmu->num_counters = NUM_COUNTERS_L3;
 	}
 
 	pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
 	if (!pmu->ctx)
-		return -ENOMEM;
+		goto done;
 
 	pmu->pmu = (struct pmu) {
 		.task_ctx_nr	= perf_invalid_context,
@@ -710,43 +786,45 @@ static int amd_uncore_l3_init(void)
 		.module		= THIS_MODULE,
 	};
 
-	ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1);
-	if (ret) {
+	if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
 		free_percpu(pmu->ctx);
 		pmu->ctx = NULL;
-		return ret;
+		goto done;
 	}
 
-	pr_info("%d %s %s counters detected\n", pmu->num_counters,
-		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON" : "",
+	pr_info("%d %s%s counters detected\n", pmu->num_counters,
+		boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ?  "HYGON " : "",
 		pmu->pmu.name);
 
-	num_pmus++;
-
-	return 0;
-}
-
-static void uncore_free(void)
-{
-	struct amd_uncore_pmu *pmu;
-	int i;
+	uncore->num_pmus = 1;
 
-	for (i = 0; i < num_pmus; i++) {
-		pmu = &pmus[i];
-		if (!pmu->ctx)
-			continue;
-
-		perf_pmu_unregister(&pmu->pmu);
-		free_percpu(pmu->ctx);
-		pmu->ctx = NULL;
-	}
+done:
+	uncore->init_done = true;
 
-	num_pmus = 0;
+	return amd_uncore_ctx_init(uncore, cpu);
 }
 
+static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
+	/* UNCORE_TYPE_DF */
+	{
+		.scan = amd_uncore_df_ctx_scan,
+		.init = amd_uncore_df_ctx_init,
+		.move = amd_uncore_ctx_move,
+		.free = amd_uncore_ctx_free,
+	},
+	/* UNCORE_TYPE_L3 */
+	{
+		.scan = amd_uncore_l3_ctx_scan,
+		.init = amd_uncore_l3_ctx_init,
+		.move = amd_uncore_ctx_move,
+		.free = amd_uncore_ctx_free,
+	},
+};
+
 static int __init amd_uncore_init(void)
 {
-	int ret;
+	struct amd_uncore *uncore;
+	int ret, i;
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
@@ -758,20 +836,27 @@ static int __init amd_uncore_init(void)
 	if (boot_cpu_has(X86_FEATURE_PERFMON_V2))
 		pmu_version = 2;
 
-	ret = amd_uncore_df_init();
-	if (ret)
-		goto fail;
+	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
+		uncore = &uncores[i];
 
-	ret = amd_uncore_l3_init();
-	if (ret)
-		goto fail;
+		BUG_ON(!uncore->scan);
+		BUG_ON(!uncore->init);
+		BUG_ON(!uncore->move);
+		BUG_ON(!uncore->free);
+
+		uncore->info = alloc_percpu(union amd_uncore_info);
+		if (!uncore->info) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+	};
 
 	/*
 	 * Install callbacks. Core will call them for each online cpu.
 	 */
 	if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
 			      "perf/x86/amd/uncore:prepare",
-			      amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead))
+			      NULL, amd_uncore_cpu_dead))
 		goto fail;
 
 	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
@@ -790,17 +875,48 @@ fail_start:
 fail_prep:
 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
 fail:
-	uncore_free();
+	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
+		uncore = &uncores[i];
+		if (uncore->info) {
+			free_percpu(uncore->info);
+			uncore->info = NULL;
+		}
+	}
 
 	return ret;
 }
 
 static void __exit amd_uncore_exit(void)
 {
+	struct amd_uncore *uncore;
+	struct amd_uncore_pmu *pmu;
+	int i, j;
+
 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE);
 	cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING);
 	cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP);
-	uncore_free();
+
+	for (i = 0; i < UNCORE_TYPE_MAX; i++) {
+		uncore = &uncores[i];
+		if (!uncore->info)
+			continue;
+
+		free_percpu(uncore->info);
+		uncore->info = NULL;
+
+		for (j = 0; j < uncore->num_pmus; j++) {
+			pmu = &uncore->pmus[j];
+			if (!pmu->ctx)
+				continue;
+
+			perf_pmu_unregister(&pmu->pmu);
+			free_percpu(pmu->ctx);
+			pmu->ctx = NULL;
+		}
+
+		kfree(uncore->pmus);
+		uncore->pmus = NULL;
+	}
 }
 
 module_init(amd_uncore_init);
-- 
cgit v1.2.3


From 7ef0343855dc23a979a53b3143540f93f3e5bef8 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das@amd.com>
Date: Thu, 5 Oct 2023 10:53:13 +0530
Subject: perf/x86/amd/uncore: Use rdmsr if rdpmc is unavailable

Not all uncore PMUs may support the use of the RDPMC instruction for
reading counters. In such cases, read the count from the corresponding
PERF_CTR register using the RDMSR instruction.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/e9d994e32a3fcb39fa59fcf43ab4260d11aba097.1696425185.git.sandipan.das@amd.com
---
 arch/x86/events/amd/uncore.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index ff1d09cc07ad..2fe623923034 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -96,7 +96,16 @@ static void amd_uncore_read(struct perf_event *event)
 	 */
 
 	prev = local64_read(&hwc->prev_count);
-	rdpmcl(hwc->event_base_rdpmc, new);
+
+	/*
+	 * Some uncore PMUs do not have RDPMC assignments. In such cases,
+	 * read counts directly from the corresponding PERF_CTR.
+	 */
+	if (hwc->event_base_rdpmc < 0)
+		rdmsrl(hwc->event_base, new);
+	else
+		rdpmcl(hwc->event_base_rdpmc, new);
+
 	local64_set(&hwc->prev_count, new);
 	delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT);
 	delta >>= COUNTER_SHIFT;
@@ -164,6 +173,9 @@ out:
 	hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx;
 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
 
+	if (pmu->rdpmc_base < 0)
+		hwc->event_base_rdpmc = -1;
+
 	if (flags & PERF_EF_START)
 		event->pmu->start(event, PERF_EF_RELOAD);
 
-- 
cgit v1.2.3


From 83a43c622123e714b0317a57176b336187f5deb3 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das@amd.com>
Date: Thu, 5 Oct 2023 10:53:14 +0530
Subject: perf/x86/amd/uncore: Add group exclusivity

In some cases, it may be necessary to restrict opening PMU events to a
subset of CPUs. E.g. Unified Memory Controller (UMC) PMUs are specific
to each active memory channel and the MSR address space for the PERF_CTL
and PERF_CTR registers is reused on each socket. Thus, opening events
for a specific UMC PMU should be restricted to CPUs belonging to the
same socket as that of the UMC. The "cpumask" of the PMU should also
reflect this accordingly.

Uncore PMUs which require this can use the new group attribute in struct
amd_uncore_pmu to set a valid group ID during the scan() phase. Later,
during init(), an uncore context for a CPU will be unavailable if the
group ID does not match.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/937d6d71010a48ea4e069f4904b3116a5f99ecdf.1696425185.git.sandipan.das@amd.com
---
 arch/x86/events/amd/uncore.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 2fe623923034..318982951dd2 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -27,6 +27,7 @@
 
 #define COUNTER_SHIFT		16
 #define UNCORE_NAME_LEN		16
+#define UNCORE_GROUP_MAX	256
 
 #undef pr_fmt
 #define pr_fmt(fmt)	"amd_uncore: " fmt
@@ -45,6 +46,7 @@ struct amd_uncore_pmu {
 	int num_counters;
 	int rdpmc_base;
 	u32 msr_base;
+	int group;
 	cpumask_t active_mask;
 	struct pmu pmu;
 	struct amd_uncore_ctx * __percpu *ctx;
@@ -61,6 +63,7 @@ union amd_uncore_info {
 	struct {
 		u64	aux_data:32;	/* auxiliary data */
 		u64	num_pmcs:8;	/* number of counters */
+		u64	gid:8;		/* group id */
 		u64	cid:8;		/* context id */
 	} split;
 	u64		full;
@@ -371,6 +374,13 @@ int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
 	return info->split.cid;
 }
 
+static __always_inline
+int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu)
+{
+	union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu);
+	return info->split.gid;
+}
+
 static __always_inline
 int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu)
 {
@@ -409,18 +419,23 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 {
 	struct amd_uncore_ctx *curr, *prev;
 	struct amd_uncore_pmu *pmu;
-	int node, cid, i, j;
+	int node, cid, gid, i, j;
 
 	if (!uncore->init_done || !uncore->num_pmus)
 		return 0;
 
 	cid = amd_uncore_ctx_cid(uncore, cpu);
+	gid = amd_uncore_ctx_gid(uncore, cpu);
 
 	for (i = 0; i < uncore->num_pmus; i++) {
 		pmu = &uncore->pmus[i];
 		*per_cpu_ptr(pmu->ctx, cpu) = NULL;
 		curr = NULL;
 
+		/* Check for group exclusivity */
+		if (gid != pmu->group)
+			continue;
+
 		/* Find a sibling context */
 		for_each_online_cpu(j) {
 			if (cpu == j)
@@ -603,6 +618,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
 
 	info.split.aux_data = 0;
 	info.split.num_pmcs = NUM_COUNTERS_NB;
+	info.split.gid = 0;
 	info.split.cid = topology_die_id(cpu);
 
 	if (pmu_version >= 2) {
@@ -641,6 +657,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 	pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
 	pmu->msr_base = MSR_F15H_NB_PERF_CTL;
 	pmu->rdpmc_base = RDPMC_BASE_NB;
+	pmu->group = amd_uncore_ctx_gid(uncore, cpu);
 
 	if (pmu_version >= 2) {
 		*df_attr++ = &format_attr_event14v2.attr;
@@ -734,6 +751,7 @@ void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
 
 	info.split.aux_data = 0;
 	info.split.num_pmcs = NUM_COUNTERS_L2;
+	info.split.gid = 0;
 	info.split.cid = get_llc_id(cpu);
 
 	if (boot_cpu_data.x86 >= 0x17)
@@ -770,6 +788,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
 	pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu);
 	pmu->msr_base = MSR_F16H_L2I_PERF_CTL;
 	pmu->rdpmc_base = RDPMC_BASE_LLC;
+	pmu->group = amd_uncore_ctx_gid(uncore, cpu);
 
 	if (boot_cpu_data.x86 >= 0x17) {
 		*l3_attr++ = &format_attr_event8.attr;
-- 
cgit v1.2.3


From 25e56847821f7375bdee7dae1027c7917d07ce4b Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das@amd.com>
Date: Thu, 5 Oct 2023 10:53:15 +0530
Subject: perf/x86/amd/uncore: Add memory controller support

Unified Memory Controller (UMC) events were introduced with Zen 4 as a
part of the Performance Monitoring Version 2 (PerfMonV2) enhancements.
An event is specified using the EventSelect bits and the RdWrMask bits
can be used for additional filtering of read and write requests.

As of now, a maximum of 12 channels of DDR5 are available on each socket
and each channel is controlled by a dedicated UMC. Each UMC, in turn,
has its own set of performance monitoring counters.

Since the MSR address space for the UMC PERF_CTL and PERF_CTR registers
are reused across sockets, uncore groups are created on the basis of
socket IDs. Hence, group exclusivity is mandatory while opening events
so that events for an UMC can only be opened on CPUs which are on the
same socket as the corresponding memory channel.

For each socket, the total number of available UMC counters and active
memory channels are determined from CPUID leaf 0x80000022 EBX and ECX
respectively. Usually, on Zen 4, each UMC has four counters.

MSR assignments are determined on the basis of active UMCs. E.g. if
UMCs 1, 4 and 9 are active for a given socket, then

  * UMC 1 gets MSRs 0xc0010800 to 0xc0010807 as PERF_CTLs and PERF_CTRs
  * UMC 4 gets MSRs 0xc0010808 to 0xc001080f as PERF_CTLs and PERF_CTRs
  * UMC 9 gets MSRs 0xc0010810 to 0xc0010817 as PERF_CTLs and PERF_CTRs

If there are sockets without any online CPUs when the amd_uncore driver
is loaded, UMCs for such sockets will not be discoverable since the
mechanism relies on executing the CPUID instruction on an online CPU
from the socket.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/b25f391205c22733493abec1ed850b71784edc5f.1696425185.git.sandipan.das@amd.com
---
 arch/x86/events/amd/uncore.c | 156 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 155 insertions(+), 1 deletion(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 318982951dd2..9b444ce24108 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -55,6 +55,7 @@ struct amd_uncore_pmu {
 enum {
 	UNCORE_TYPE_DF,
 	UNCORE_TYPE_L3,
+	UNCORE_TYPE_UMC,
 
 	UNCORE_TYPE_MAX
 };
@@ -286,7 +287,7 @@ static struct device_attribute format_attr_##_var =			\
 DEFINE_UNCORE_FORMAT_ATTR(event12,	event,		"config:0-7,32-35");
 DEFINE_UNCORE_FORMAT_ATTR(event14,	event,		"config:0-7,32-35,59-60"); /* F17h+ DF */
 DEFINE_UNCORE_FORMAT_ATTR(event14v2,	event,		"config:0-7,32-37");	   /* PerfMonV2 DF */
-DEFINE_UNCORE_FORMAT_ATTR(event8,	event,		"config:0-7");		   /* F17h+ L3 */
+DEFINE_UNCORE_FORMAT_ATTR(event8,	event,		"config:0-7");		   /* F17h+ L3, PerfMonV2 UMC */
 DEFINE_UNCORE_FORMAT_ATTR(umask8,	umask,		"config:8-15");
 DEFINE_UNCORE_FORMAT_ATTR(umask12,	umask,		"config:8-15,24-27");	   /* PerfMonV2 DF */
 DEFINE_UNCORE_FORMAT_ATTR(coreid,	coreid,		"config:42-44");	   /* F19h L3 */
@@ -296,6 +297,7 @@ DEFINE_UNCORE_FORMAT_ATTR(threadmask2,	threadmask,	"config:56-57");	   /* F19h L
 DEFINE_UNCORE_FORMAT_ATTR(enallslices,	enallslices,	"config:46");		   /* F19h L3 */
 DEFINE_UNCORE_FORMAT_ATTR(enallcores,	enallcores,	"config:47");		   /* F19h L3 */
 DEFINE_UNCORE_FORMAT_ATTR(sliceid,	sliceid,	"config:48-50");	   /* F19h L3 */
+DEFINE_UNCORE_FORMAT_ATTR(rdwrmask,	rdwrmask,	"config:8-9");		   /* PerfMonV2 UMC */
 
 /* Common DF and NB attributes */
 static struct attribute *amd_uncore_df_format_attr[] = {
@@ -312,6 +314,13 @@ static struct attribute *amd_uncore_l3_format_attr[] = {
 	NULL,
 };
 
+/* Common UMC attributes */
+static struct attribute *amd_uncore_umc_format_attr[] = {
+	&format_attr_event8.attr,       /* event */
+	&format_attr_rdwrmask.attr,     /* rdwrmask */
+	NULL,
+};
+
 /* F17h unique L3 attributes */
 static struct attribute *amd_f17h_uncore_l3_format_attr[] = {
 	&format_attr_slicemask.attr,	/* slicemask */
@@ -349,6 +358,11 @@ static struct attribute_group amd_f19h_uncore_l3_format_group = {
 	.is_visible = amd_f19h_uncore_is_visible,
 };
 
+static struct attribute_group amd_uncore_umc_format_group = {
+	.name = "format",
+	.attrs = amd_uncore_umc_format_attr,
+};
+
 static const struct attribute_group *amd_uncore_df_attr_groups[] = {
 	&amd_uncore_attr_group,
 	&amd_uncore_df_format_group,
@@ -367,6 +381,12 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = {
 	NULL,
 };
 
+static const struct attribute_group *amd_uncore_umc_attr_groups[] = {
+	&amd_uncore_attr_group,
+	&amd_uncore_umc_format_group,
+	NULL,
+};
+
 static __always_inline
 int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu)
 {
@@ -835,6 +855,133 @@ done:
 	return amd_uncore_ctx_init(uncore, cpu);
 }
 
+static int amd_uncore_umc_event_init(struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	int ret = amd_uncore_event_init(event);
+
+	if (ret)
+		return ret;
+
+	hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC;
+
+	return 0;
+}
+
+static void amd_uncore_umc_start(struct perf_event *event, int flags)
+{
+	struct hw_perf_event *hwc = &event->hw;
+
+	if (flags & PERF_EF_RELOAD)
+		wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count));
+
+	hwc->state = 0;
+	wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC));
+	perf_event_update_userpage(event);
+}
+
+static
+void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu)
+{
+	union cpuid_0x80000022_ebx ebx;
+	union amd_uncore_info info;
+	unsigned int eax, ecx, edx;
+
+	if (pmu_version < 2)
+		return;
+
+	cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx);
+	info.split.aux_data = ecx;	/* stash active mask */
+	info.split.num_pmcs = ebx.split.num_umc_pmc;
+	info.split.gid = topology_die_id(cpu);
+	info.split.cid = topology_die_id(cpu);
+	*per_cpu_ptr(uncore->info, cpu) = info;
+}
+
+static
+int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu)
+{
+	DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 };
+	u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 };
+	u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 };
+	union amd_uncore_info info;
+	struct amd_uncore_pmu *pmu;
+	int index = 0, gid, i;
+
+	if (pmu_version < 2)
+		return 0;
+
+	/* Run just once */
+	if (uncore->init_done)
+		return amd_uncore_ctx_init(uncore, cpu);
+
+	/* Find unique groups */
+	for_each_online_cpu(i) {
+		info = *per_cpu_ptr(uncore->info, i);
+		gid = info.split.gid;
+		if (test_bit(gid, gmask))
+			continue;
+
+		__set_bit(gid, gmask);
+		group_num_pmus[gid] = hweight32(info.split.aux_data);
+		group_num_pmcs[gid] = info.split.num_pmcs;
+		uncore->num_pmus += group_num_pmus[gid];
+	}
+
+	uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus,
+			       GFP_KERNEL);
+	if (!uncore->pmus) {
+		uncore->num_pmus = 0;
+		goto done;
+	}
+
+	for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) {
+		for (i = 0; i < group_num_pmus[gid]; i++) {
+			pmu = &uncore->pmus[index];
+			snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index);
+			pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid];
+			pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2;
+			pmu->rdpmc_base = -1;
+			pmu->group = gid;
+
+			pmu->ctx = alloc_percpu(struct amd_uncore_ctx *);
+			if (!pmu->ctx)
+				goto done;
+
+			pmu->pmu = (struct pmu) {
+				.task_ctx_nr	= perf_invalid_context,
+				.attr_groups	= amd_uncore_umc_attr_groups,
+				.name		= pmu->name,
+				.event_init	= amd_uncore_umc_event_init,
+				.add		= amd_uncore_add,
+				.del		= amd_uncore_del,
+				.start		= amd_uncore_umc_start,
+				.stop		= amd_uncore_stop,
+				.read		= amd_uncore_read,
+				.capabilities	= PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT,
+				.module		= THIS_MODULE,
+			};
+
+			if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) {
+				free_percpu(pmu->ctx);
+				pmu->ctx = NULL;
+				goto done;
+			}
+
+			pr_info("%d %s counters detected\n", pmu->num_counters,
+				pmu->pmu.name);
+
+			index++;
+		}
+	}
+
+done:
+	uncore->num_pmus = index;
+	uncore->init_done = true;
+
+	return amd_uncore_ctx_init(uncore, cpu);
+}
+
 static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
 	/* UNCORE_TYPE_DF */
 	{
@@ -850,6 +997,13 @@ static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
 		.move = amd_uncore_ctx_move,
 		.free = amd_uncore_ctx_free,
 	},
+	/* UNCORE_TYPE_UMC */
+	{
+		.scan = amd_uncore_umc_ctx_scan,
+		.init = amd_uncore_umc_ctx_init,
+		.move = amd_uncore_ctx_move,
+		.free = amd_uncore_ctx_free,
+	},
 };
 
 static int __init amd_uncore_init(void)
-- 
cgit v1.2.3


From 7543365739a4ff61d40ad53ab68c17d2e7dfb0c9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@linaro.org>
Date: Fri, 13 Oct 2023 10:18:12 +0300
Subject: perf/x86/amd/uncore: Fix uninitialized return value in
 amd_uncore_init()

Some of the error paths in this function return don't initialize the
error code.  Return -ENODEV by default.

Fixes: d6389d3ccc13 ("perf/x86/amd/uncore: Refactor uncore management")
Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/cec62eba-c4b8-4cb7-9671-58894dd4b974@moroto.mountain
---
 arch/x86/events/amd/uncore.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index 9b444ce24108..a389828f378c 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -1009,7 +1009,8 @@ static struct amd_uncore uncores[UNCORE_TYPE_MAX] = {
 static int __init amd_uncore_init(void)
 {
 	struct amd_uncore *uncore;
-	int ret, i;
+	int ret = -ENODEV;
+	int i;
 
 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
 	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
-- 
cgit v1.2.3


From 744940f1921c8feb90e3c4bcc1e153fdd6e10fe2 Mon Sep 17 00:00:00 2001
From: Sandipan Das <sandipan.das@amd.com>
Date: Mon, 16 Oct 2023 11:37:43 +0530
Subject: perf/x86/amd/uncore: Pass through error code for initialization
 failures, instead of -ENODEV

Pass through the appropriate error code when the registration of hotplug
callbacks fail during initialization, instead of returning a blanket -ENODEV.

[ mingo: Updated the changelog. ]

Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20231016060743.332051-1-sandipan.das@amd.com
---
 arch/x86/events/amd/uncore.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'arch/x86/events')

diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c
index a389828f378c..4429eac00185 100644
--- a/arch/x86/events/amd/uncore.c
+++ b/arch/x86/events/amd/uncore.c
@@ -1040,20 +1040,25 @@ static int __init amd_uncore_init(void)
 	/*
 	 * Install callbacks. Core will call them for each online cpu.
 	 */
-	if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
-			      "perf/x86/amd/uncore:prepare",
-			      NULL, amd_uncore_cpu_dead))
+	ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP,
+				"perf/x86/amd/uncore:prepare",
+				NULL, amd_uncore_cpu_dead);
+	if (ret)
 		goto fail;
 
-	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
-			      "perf/x86/amd/uncore:starting",
-			      amd_uncore_cpu_starting, NULL))
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING,
+				"perf/x86/amd/uncore:starting",
+				amd_uncore_cpu_starting, NULL);
+	if (ret)
 		goto fail_prep;
-	if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
-			      "perf/x86/amd/uncore:online",
-			      amd_uncore_cpu_online,
-			      amd_uncore_cpu_down_prepare))
+
+	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE,
+				"perf/x86/amd/uncore:online",
+				amd_uncore_cpu_online,
+				amd_uncore_cpu_down_prepare);
+	if (ret)
 		goto fail_start;
+
 	return 0;
 
 fail_start:
-- 
cgit v1.2.3