From d4b5694c75d4eba8238d541a55da0c67e876213e Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:01 -0700 Subject: perf/x86/intel: Use the common uarch name for the shared functions From PMU's perspective, the SPR/GNR server has a similar uarch to the ADL/MTL client p-core. Many functions are shared. However, the shared function name uses the abbreviation of the server product code name, rather than the common uarch code name. Rename these internal shared functions by the common uarch name. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-2-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 64 ++++++++++++++++++++++---------------------- arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 2 +- 3 files changed, 34 insertions(+), 34 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index fa355d3658a6..93be19c27ec7 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -299,7 +299,7 @@ static struct extra_reg intel_icl_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -static struct extra_reg intel_spr_extra_regs[] __read_mostly = { +static struct extra_reg intel_glc_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), @@ -309,7 +309,7 @@ static struct extra_reg intel_spr_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; -static struct event_constraint intel_spr_event_constraints[] = { +static struct event_constraint intel_glc_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -349,7 +349,7 @@ static struct event_constraint intel_spr_event_constraints[] = { EVENT_CONSTRAINT_END }; -static struct extra_reg intel_gnr_extra_regs[] __read_mostly = { +static struct extra_reg intel_rwc_extra_regs[] __read_mostly = { INTEL_UEVENT_EXTRA_REG(0x012a, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0), INTEL_UEVENT_EXTRA_REG(0x012b, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1), INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd), @@ -473,7 +473,7 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } -static __initconst const u64 spr_hw_cache_event_ids +static __initconst const u64 glc_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -552,7 +552,7 @@ static __initconst const u64 spr_hw_cache_event_ids }, }; -static __initconst const u64 spr_hw_cache_extra_regs +static __initconst const u64 glc_hw_cache_extra_regs [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -4273,7 +4273,7 @@ icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, } static struct event_constraint * -spr_get_event_constraints(struct cpu_hw_events *cpuc, int idx, +glc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct perf_event *event) { struct event_constraint *c; @@ -4362,7 +4362,7 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); if (pmu->cpu_type == hybrid_big) - return spr_get_event_constraints(cpuc, idx, event); + return glc_get_event_constraints(cpuc, idx, event); else if (pmu->cpu_type == hybrid_small) return tnt_get_event_constraints(cpuc, idx, event); @@ -4409,7 +4409,7 @@ rwc_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct event_constraint *c; - c = spr_get_event_constraints(cpuc, idx, event); + c = glc_get_event_constraints(cpuc, idx, event); /* The Retire Latency is not supported by the fixed counter 0. */ if (event->attr.precise_ip && @@ -4490,7 +4490,7 @@ static void nhm_limit_period(struct perf_event *event, s64 *left) *left = max(*left, 32LL); } -static void spr_limit_period(struct perf_event *event, s64 *left) +static void glc_limit_period(struct perf_event *event, s64 *left) { if (event->attr.precise_ip == 3) *left = max(*left, 128LL); @@ -5337,14 +5337,14 @@ static struct attribute *icl_tsx_events_attrs[] = { EVENT_ATTR_STR(mem-stores, mem_st_spr, "event=0xcd,umask=0x2"); EVENT_ATTR_STR(mem-loads-aux, mem_ld_aux, "event=0x03,umask=0x82"); -static struct attribute *spr_events_attrs[] = { +static struct attribute *glc_events_attrs[] = { EVENT_PTR(mem_ld_hsw), EVENT_PTR(mem_st_spr), EVENT_PTR(mem_ld_aux), NULL, }; -static struct attribute *spr_td_events_attrs[] = { +static struct attribute *glc_td_events_attrs[] = { EVENT_PTR(slots), EVENT_PTR(td_retiring), EVENT_PTR(td_bad_spec), @@ -5357,7 +5357,7 @@ static struct attribute *spr_td_events_attrs[] = { NULL, }; -static struct attribute *spr_tsx_events_attrs[] = { +static struct attribute *glc_tsx_events_attrs[] = { EVENT_PTR(tx_start), EVENT_PTR(tx_abort), EVENT_PTR(tx_commit), @@ -6215,7 +6215,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_grt(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.get_event_constraints = tnt_get_event_constraints; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = nhm_format_attr; @@ -6246,7 +6246,7 @@ __init int intel_pmu_init(void) intel_pmu_pebs_data_source_cmt(); x86_pmu.pebs_latency_data = mtl_latency_data_small; x86_pmu.get_event_constraints = cmt_get_event_constraints; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = cmt_format_attr; @@ -6563,20 +6563,20 @@ __init int intel_pmu_init(void) case INTEL_FAM6_SAPPHIRERAPIDS_X: case INTEL_FAM6_EMERALDRAPIDS_X: x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; - x86_pmu.extra_regs = intel_spr_extra_regs; + x86_pmu.extra_regs = intel_glc_extra_regs; fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: pmem = true; x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - x86_pmu.event_constraints = intel_spr_event_constraints; - x86_pmu.pebs_constraints = intel_spr_pebs_event_constraints; + x86_pmu.event_constraints = intel_glc_event_constraints; + x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints; if (!x86_pmu.extra_regs) - x86_pmu.extra_regs = intel_gnr_extra_regs; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.extra_regs = intel_rwc_extra_regs; + x86_pmu.limit_period = glc_limit_period; x86_pmu.pebs_ept = 1; x86_pmu.pebs_aliases = NULL; x86_pmu.pebs_prec_dist = true; @@ -6586,13 +6586,13 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.hw_config = hsw_hw_config; - x86_pmu.get_event_constraints = spr_get_event_constraints; + x86_pmu.get_event_constraints = glc_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; extra_skl_attr = skl_format_attr; - mem_attr = spr_events_attrs; - td_attr = spr_td_events_attrs; - tsx_attr = spr_tsx_events_attrs; + mem_attr = glc_events_attrs; + td_attr = glc_td_events_attrs; + tsx_attr = glc_tsx_events_attrs; x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); x86_pmu.lbr_pt_coexist = true; intel_pmu_pebs_data_source_skl(pmem); @@ -6642,7 +6642,7 @@ __init int intel_pmu_init(void) x86_pmu.filter = intel_pmu_filter; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; - x86_pmu.limit_period = spr_limit_period; + x86_pmu.limit_period = glc_limit_period; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; /* * The rtm_abort_event is used to check whether to enable GPRs @@ -6691,11 +6691,11 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 1; pmu->intel_cap.pebs_output_pt_available = 0; - memcpy(pmu->hw_cache_event_ids, spr_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, spr_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->event_constraints = intel_spr_event_constraints; - pmu->pebs_constraints = intel_spr_pebs_event_constraints; - pmu->extra_regs = intel_spr_extra_regs; + memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); + memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); + pmu->event_constraints = intel_glc_event_constraints; + pmu->pebs_constraints = intel_glc_pebs_event_constraints; + pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; @@ -6719,7 +6719,7 @@ __init int intel_pmu_init(void) pmu->pebs_constraints = intel_grt_pebs_event_constraints; pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { - x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_gnr_extra_regs; + x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index eb8dd8b8a1e8..74642469ca7b 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1058,7 +1058,7 @@ struct event_constraint intel_icl_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; -struct event_constraint intel_spr_pebs_event_constraints[] = { +struct event_constraint intel_glc_pebs_event_constraints[] = { INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index c8ba2be7585d..96a427fc55cf 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1521,7 +1521,7 @@ extern struct event_constraint intel_skl_pebs_event_constraints[]; extern struct event_constraint intel_icl_pebs_event_constraints[]; -extern struct event_constraint intel_spr_pebs_event_constraints[]; +extern struct event_constraint intel_glc_pebs_event_constraints[]; struct event_constraint *intel_pebs_constraints(struct perf_event *event); -- cgit v1.2.3 From 0ba0c03528e918a8f6b5aa63d502fdc6a9d80fc7 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:02 -0700 Subject: perf/x86/intel: Factor out the initialization code for SPR The SPR and ADL p-core have a similar uarch. Most of the initialization code can be shared. Factor out intel_pmu_init_glc() for the common initialization code. The common part of the ADL p-core will be replaced by the later patch. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-3-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 49 +++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 23 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 93be19c27ec7..86aad3224a76 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5916,6 +5916,30 @@ static __always_inline bool is_mtl(u8 x86_model) (x86_model == INTEL_FAM6_METEORLAKE_L); } +static __always_inline void intel_pmu_init_glc(struct pmu *pmu) +{ + x86_pmu.late_ack = true; + x86_pmu.limit_period = glc_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_NO_HT_SHARING; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); + x86_pmu.lbr_pt_coexist = true; + x86_pmu.num_topdown_events = 8; + static_call_update(intel_pmu_update_topdown_event, + &icl_update_topdown_event); + static_call_update(intel_pmu_set_topdown_event_period, + &icl_set_topdown_event_period); + + memcpy(hybrid_var(pmu, hw_cache_event_ids), glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hybrid(pmu, event_constraints) = intel_glc_event_constraints; + hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6567,24 +6591,10 @@ __init int intel_pmu_init(void) fallthrough; case INTEL_FAM6_GRANITERAPIDS_X: case INTEL_FAM6_GRANITERAPIDS_D: - pmem = true; - x86_pmu.late_ack = true; - memcpy(hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); - - x86_pmu.event_constraints = intel_glc_event_constraints; - x86_pmu.pebs_constraints = intel_glc_pebs_event_constraints; + intel_pmu_init_glc(NULL); if (!x86_pmu.extra_regs) x86_pmu.extra_regs = intel_rwc_extra_regs; - x86_pmu.limit_period = glc_limit_period; x86_pmu.pebs_ept = 1; - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = glc_get_event_constraints; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? @@ -6593,14 +6603,7 @@ __init int intel_pmu_init(void) mem_attr = glc_events_attrs; td_attr = glc_td_events_attrs; tsx_attr = glc_tsx_events_attrs; - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); - x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_skl(pmem); - x86_pmu.num_topdown_events = 8; - static_call_update(intel_pmu_update_topdown_event, - &icl_update_topdown_event); - static_call_update(intel_pmu_set_topdown_event_period, - &icl_set_topdown_event_period); + intel_pmu_pebs_data_source_skl(true); pr_cont("Sapphire Rapids events, "); name = "sapphire_rapids"; break; -- cgit v1.2.3 From d87d221f854b62f5e8026505497d33404ef6050c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:03 -0700 Subject: perf/x86/intel: Factor out the initialization code for ADL e-core From PMU's perspective, the ADL e-core and newer SRF/GRR have a similar uarch. Most of the initialization code can be shared. Factor out intel_pmu_init_grt() for the common initialization code. The common part of the ADL e-core will be replaced by the later patch. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-4-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 58 ++++++++++++++++---------------------------- 1 file changed, 21 insertions(+), 37 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 86aad3224a76..95ac7bbd7fab 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -5940,6 +5940,25 @@ static __always_inline void intel_pmu_init_glc(struct pmu *pmu) hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; } +static __always_inline void intel_pmu_init_grt(struct pmu *pmu) +{ + x86_pmu.mid_ack = true; + x86_pmu.limit_period = glc_limit_period; + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.pebs_block = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); + hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + hybrid(pmu, event_constraints) = intel_slm_event_constraints; + hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints; + hybrid(pmu, extra_regs) = intel_grt_extra_regs; +} + __init int intel_pmu_init(void) { struct attribute **extra_skl_attr = &empty_attrs; @@ -6218,28 +6237,10 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_ATOM_GRACEMONT: - x86_pmu.mid_ack = true; - memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - - x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; - x86_pmu.extra_regs = intel_grt_extra_regs; - - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.lbr_pt_coexist = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - + intel_pmu_init_grt(NULL); intel_pmu_pebs_data_source_grt(); x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.get_event_constraints = tnt_get_event_constraints; - x86_pmu.limit_period = glc_limit_period; td_attr = tnt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = nhm_format_attr; @@ -6249,28 +6250,11 @@ __init int intel_pmu_init(void) case INTEL_FAM6_ATOM_CRESTMONT: case INTEL_FAM6_ATOM_CRESTMONT_X: - x86_pmu.mid_ack = true; - memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, - sizeof(hw_cache_extra_regs)); - hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - - x86_pmu.event_constraints = intel_slm_event_constraints; - x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + intel_pmu_init_grt(NULL); x86_pmu.extra_regs = intel_cmt_extra_regs; - - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.lbr_pt_coexist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - intel_pmu_pebs_data_source_cmt(); x86_pmu.pebs_latency_data = mtl_latency_data_small; x86_pmu.get_event_constraints = cmt_get_event_constraints; - x86_pmu.limit_period = glc_limit_period; td_attr = cmt_events_attrs; mem_attr = grt_mem_attrs; extra_attr = cmt_format_attr; -- cgit v1.2.3 From 299a5fc8e783eed705015e83e381912dbbf3eabc Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:04 -0700 Subject: perf/x86/intel: Apply the common initialization code for ADL Use the intel_pmu_init_glc() and intel_pmu_init_grt() to replace the duplicate code for ADL. The current code already checks the PERF_X86_EVENT_TOPDOWN flag before invoking the Topdown metrics functions. (The PERF_X86_EVENT_TOPDOWN flag is to indicate the Topdown metric feature, which is only available for the p-core.) Drop the unnecessary adl_set_topdown_event_period() and adl_update_topdown_event(). Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-5-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 53 ++------------------------------------------ 1 file changed, 2 insertions(+), 51 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 95ac7bbd7fab..a5ba491a6497 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2556,16 +2556,6 @@ static int icl_set_topdown_event_period(struct perf_event *event) return 0; } -static int adl_set_topdown_event_period(struct perf_event *event) -{ - struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - - if (pmu->cpu_type != hybrid_big) - return 0; - - return icl_set_topdown_event_period(event); -} - DEFINE_STATIC_CALL(intel_pmu_set_topdown_event_period, x86_perf_event_set_period); static inline u64 icl_get_metrics_event_value(u64 metric, u64 slots, int idx) @@ -2708,16 +2698,6 @@ static u64 icl_update_topdown_event(struct perf_event *event) x86_pmu.num_topdown_events - 1); } -static u64 adl_update_topdown_event(struct perf_event *event) -{ - struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - - if (pmu->cpu_type != hybrid_big) - return 0; - - return icl_update_topdown_event(event); -} - DEFINE_STATIC_CALL(intel_pmu_update_topdown_event, x86_perf_event_update); static void intel_pmu_read_topdown_event(struct perf_event *event) @@ -6612,32 +6592,11 @@ __init int intel_pmu_init(void) static_branch_enable(&perf_is_hybrid); x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; - x86_pmu.pebs_aliases = NULL; - x86_pmu.pebs_prec_dist = true; - x86_pmu.pebs_block = true; - x86_pmu.flags |= PMU_FL_HAS_RSP_1; - x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - x86_pmu.flags |= PMU_FL_INSTR_LATENCY; - x86_pmu.lbr_pt_coexist = true; x86_pmu.pebs_latency_data = adl_latency_data_small; - x86_pmu.num_topdown_events = 8; - static_call_update(intel_pmu_update_topdown_event, - &adl_update_topdown_event); - static_call_update(intel_pmu_set_topdown_event_period, - &adl_set_topdown_event_period); - x86_pmu.filter = intel_pmu_filter; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; - x86_pmu.limit_period = glc_limit_period; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; - /* - * The rtm_abort_event is used to check whether to enable GPRs - * for the RTM abort event. Atom doesn't have the RTM abort - * event. There is no harmful to set it in the common - * x86_pmu.rtm_abort_event. - */ - x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xc9, .umask=0x04); td_attr = adl_hybrid_events_attrs; mem_attr = adl_hybrid_mem_attrs; @@ -6649,6 +6608,7 @@ __init int intel_pmu_init(void) pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; pmu->cpu_type = hybrid_big; + intel_pmu_init_glc(&pmu->pmu); pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; @@ -6678,16 +6638,13 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 1; pmu->intel_cap.pebs_output_pt_available = 0; - memcpy(pmu->hw_cache_event_ids, glc_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, glc_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->event_constraints = intel_glc_event_constraints; - pmu->pebs_constraints = intel_glc_pebs_event_constraints; pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; pmu->cpu_type = hybrid_small; + intel_pmu_init_grt(&pmu->pmu); pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; pmu->num_counters_fixed = x86_pmu.num_counters_fixed; @@ -6699,12 +6656,6 @@ __init int intel_pmu_init(void) pmu->intel_cap.perf_metrics = 0; pmu->intel_cap.pebs_output_pt_available = 1; - memcpy(pmu->hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(pmu->hw_cache_event_ids)); - memcpy(pmu->hw_cache_extra_regs, tnt_hw_cache_extra_regs, sizeof(pmu->hw_cache_extra_regs)); - pmu->hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - pmu->event_constraints = intel_slm_event_constraints; - pmu->pebs_constraints = intel_grt_pebs_event_constraints; - pmu->extra_regs = intel_grt_extra_regs; if (is_mtl(boot_cpu_data.x86_model)) { x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs; x86_pmu.pebs_latency_data = mtl_latency_data_small; -- cgit v1.2.3 From b0560bfd4b70277a4936c82e50e940aa253c95bf Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:05 -0700 Subject: perf/x86/intel: Clean up the hybrid CPU type handling code There is a fairly long list of grievances about the current code. The main beefs: 1. hybrid_big_small assumes that the *HARDWARE* (CPUID) provided core types are a bitmap. They are not. If Intel happened to make a core type of 0xff, hilarity would ensue. 2. adl_get_hybrid_cpu_type() utterly inscrutable. There are precisely zero comments and zero changelog about what it is attempting to do. According to Kan, the adl_get_hybrid_cpu_type() is there because some Alder Lake (ADL) CPUs can do some silly things. Some ADL models are *supposed* to be hybrid CPUs with big and little cores, but there are some SKUs that only have big cores. CPUID(0x1a) on those CPUs does not say that the CPUs are big cores. It apparently just returns 0x0. It confuses perf because it expects to see either 0x40 (Core) or 0x20 (Atom). The perf workaround for this is to watch for a CPU core saying it is type 0x0. If that happens on an Alder Lake, it calls x86_pmu.get_hybrid_cpu_type() and just assumes that the core is a Core (0x40) CPU. To fix up the mess, separate out the CPU types and the 'pmu' types. This allows 'hybrid_pmu_type' bitmaps without worrying that some future CPU type will set multiple bits. Since the types are now separate, add a function to glue them back together again. Actual comment on the situation in the glue function (find_hybrid_pmu_for_cpu()). Also, give ->get_hybrid_cpu_type() a real return type and make it clear that it is overriding the *CPU* type, not the PMU type. Rename cpu_type to pmu_type in the struct x86_hybrid_pmu to reflect the change. Originally-by: Dave Hansen Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-6-kan.liang@linux.intel.com --- arch/x86/events/core.c | 6 ++-- arch/x86/events/intel/core.c | 69 ++++++++++++++++++++++++++++++-------------- arch/x86/events/intel/ds.c | 2 +- arch/x86/events/perf_event.h | 35 +++++++++++++--------- 4 files changed, 72 insertions(+), 40 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 185f902e5f28..40ad1425ffa2 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1887,9 +1887,9 @@ ssize_t events_hybrid_sysfs_show(struct device *dev, str = pmu_attr->event_str; for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (!(x86_pmu.hybrid_pmu[i].cpu_type & pmu_attr->pmu_type)) + if (!(x86_pmu.hybrid_pmu[i].pmu_type & pmu_attr->pmu_type)) continue; - if (x86_pmu.hybrid_pmu[i].cpu_type & pmu->cpu_type) { + if (x86_pmu.hybrid_pmu[i].pmu_type & pmu->pmu_type) { next_str = strchr(str, ';'); if (next_str) return snprintf(page, next_str - str + 1, "%s", str); @@ -2169,7 +2169,7 @@ static int __init init_hw_perf_events(void) hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE; err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name, - (hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1); + (hybrid_pmu->pmu_type == hybrid_big) ? PERF_TYPE_RAW : -1); if (err) break; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a5ba491a6497..9ac2e122113b 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3849,7 +3849,7 @@ static inline bool require_mem_loads_aux_event(struct perf_event *event) return false; if (is_hybrid()) - return hybrid_pmu(event->pmu)->cpu_type == hybrid_big; + return hybrid_pmu(event->pmu)->pmu_type == hybrid_big; return true; } @@ -4341,9 +4341,9 @@ adl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return glc_get_event_constraints(cpuc, idx, event); - else if (pmu->cpu_type == hybrid_small) + else if (pmu->pmu_type == hybrid_small) return tnt_get_event_constraints(cpuc, idx, event); WARN_ON(1); @@ -4413,9 +4413,9 @@ mtl_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return rwc_get_event_constraints(cpuc, idx, event); - if (pmu->cpu_type == hybrid_small) + if (pmu->pmu_type == hybrid_small) return cmt_get_event_constraints(cpuc, idx, event); WARN_ON(1); @@ -4426,18 +4426,18 @@ static int adl_hw_config(struct perf_event *event) { struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu); - if (pmu->cpu_type == hybrid_big) + if (pmu->pmu_type == hybrid_big) return hsw_hw_config(event); - else if (pmu->cpu_type == hybrid_small) + else if (pmu->pmu_type == hybrid_small) return intel_pmu_hw_config(event); WARN_ON(1); return -EOPNOTSUPP; } -static u8 adl_get_hybrid_cpu_type(void) +static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) { - return hybrid_big; + return HYBRID_INTEL_CORE; } /* @@ -4613,22 +4613,47 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) } } -static bool init_hybrid_pmu(int cpu) +static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); u8 cpu_type = get_this_hybrid_cpu_type(); - struct x86_hybrid_pmu *pmu = NULL; int i; - if (!cpu_type && x86_pmu.get_hybrid_cpu_type) - cpu_type = x86_pmu.get_hybrid_cpu_type(); + /* + * This is running on a CPU model that is known to have hybrid + * configurations. But the CPU told us it is not hybrid, shame + * on it. There should be a fixup function provided for these + * troublesome CPUs (->get_hybrid_cpu_type). + */ + if (cpu_type == HYBRID_INTEL_NONE) { + if (x86_pmu.get_hybrid_cpu_type) + cpu_type = x86_pmu.get_hybrid_cpu_type(); + else + return NULL; + } + /* + * This essentially just maps between the 'hybrid_cpu_type' + * and 'hybrid_pmu_type' enums: + */ for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - if (x86_pmu.hybrid_pmu[i].cpu_type == cpu_type) { - pmu = &x86_pmu.hybrid_pmu[i]; - break; - } + enum hybrid_pmu_type pmu_type = x86_pmu.hybrid_pmu[i].pmu_type; + + if (cpu_type == HYBRID_INTEL_CORE && + pmu_type == hybrid_big) + return &x86_pmu.hybrid_pmu[i]; + if (cpu_type == HYBRID_INTEL_ATOM && + pmu_type == hybrid_small) + return &x86_pmu.hybrid_pmu[i]; } + + return NULL; +} + +static bool init_hybrid_pmu(int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct x86_hybrid_pmu *pmu = find_hybrid_pmu_for_cpu(); + if (WARN_ON_ONCE(!pmu || (pmu->pmu.type == -1))) { cpuc->pmu = NULL; return false; @@ -5679,7 +5704,7 @@ static bool is_attr_for_this_pmu(struct kobject *kobj, struct attribute *attr) struct perf_pmu_events_hybrid_attr *pmu_attr = container_of(attr, struct perf_pmu_events_hybrid_attr, attr.attr); - return pmu->cpu_type & pmu_attr->pmu_type; + return pmu->pmu_type & pmu_attr->pmu_type; } static umode_t hybrid_events_is_visible(struct kobject *kobj, @@ -5716,7 +5741,7 @@ static umode_t hybrid_format_is_visible(struct kobject *kobj, container_of(attr, struct perf_pmu_format_hybrid_attr, attr.attr); int cpu = hybrid_find_supported_cpu(pmu); - return (cpu >= 0) && (pmu->cpu_type & pmu_attr->pmu_type) ? attr->mode : 0; + return (cpu >= 0) && (pmu->pmu_type & pmu_attr->pmu_type) ? attr->mode : 0; } static struct attribute_group hybrid_group_events_td = { @@ -6607,7 +6632,7 @@ __init int intel_pmu_init(void) /* Initialize big core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; pmu->name = "cpu_core"; - pmu->cpu_type = hybrid_big; + pmu->pmu_type = hybrid_big; intel_pmu_init_glc(&pmu->pmu); pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { @@ -6643,7 +6668,7 @@ __init int intel_pmu_init(void) /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; pmu->name = "cpu_atom"; - pmu->cpu_type = hybrid_small; + pmu->pmu_type = hybrid_small; intel_pmu_init_grt(&pmu->pmu); pmu->mid_ack = true; pmu->num_counters = x86_pmu.num_counters; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 74642469ca7b..bf97ab904d40 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -261,7 +261,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status, { u64 val; - WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); + WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big); dse &= PERF_PEBS_DATA_SOURCE_MASK; val = hybrid_var(event->pmu, pebs_data_source)[dse]; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 96a427fc55cf..53dd5d495ba6 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -652,10 +652,29 @@ enum { #define PERF_PEBS_DATA_SOURCE_MAX 0x10 #define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1) +enum hybrid_cpu_type { + HYBRID_INTEL_NONE, + HYBRID_INTEL_ATOM = 0x20, + HYBRID_INTEL_CORE = 0x40, +}; + +enum hybrid_pmu_type { + not_hybrid, + hybrid_small = BIT(0), + hybrid_big = BIT(1), + + hybrid_big_small = hybrid_big | hybrid_small, /* only used for matching */ +}; + +#define X86_HYBRID_PMU_ATOM_IDX 0 +#define X86_HYBRID_PMU_CORE_IDX 1 + +#define X86_HYBRID_NUM_PMUS 2 + struct x86_hybrid_pmu { struct pmu pmu; const char *name; - u8 cpu_type; + enum hybrid_pmu_type pmu_type; cpumask_t supported_cpus; union perf_capabilities intel_cap; u64 intel_ctrl; @@ -721,18 +740,6 @@ extern struct static_key_false perf_is_hybrid; __Fp; \ }) -enum hybrid_pmu_type { - hybrid_big = 0x40, - hybrid_small = 0x20, - - hybrid_big_small = hybrid_big | hybrid_small, -}; - -#define X86_HYBRID_PMU_ATOM_IDX 0 -#define X86_HYBRID_PMU_CORE_IDX 1 - -#define X86_HYBRID_NUM_PMUS 2 - /* * struct x86_pmu - generic x86 pmu */ @@ -940,7 +947,7 @@ struct x86_pmu { */ int num_hybrid_pmus; struct x86_hybrid_pmu *hybrid_pmu; - u8 (*get_hybrid_cpu_type) (void); + enum hybrid_cpu_type (*get_hybrid_cpu_type) (void); }; struct x86_perf_task_context_opt { -- cgit v1.2.3 From 97588df87b56e27fd2b5d928d61c7a53e38afbb0 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 29 Aug 2023 05:58:06 -0700 Subject: perf/x86/intel: Add common intel_pmu_init_hybrid() The current hybrid initialization codes aren't well organized and are hard to read. Factor out intel_pmu_init_hybrid() to do a common setup for each hybrid PMU. The PMU-specific capability will be updated later via either hard code (ADL) or CPUID hybrid enumeration (MTL). Splitting the ADL and MTL initialization codes, since they have different uarches. The hard code PMU capabilities are not required for MTL either. They can be enumerated by the new leaf 0x23 and IA32_PERF_CAPABILITIES MSR. The hybrid enumeration of the IA32_PERF_CAPABILITIES MSR is broken on MTL. Using the default value. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230829125806.3016082-7-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 162 +++++++++++++++++++++++++++++-------------- 1 file changed, 111 insertions(+), 51 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9ac2e122113b..83dfbba1ee69 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4598,6 +4598,16 @@ static void intel_pmu_check_num_counters(int *num_counters, int *num_counters_fixed, u64 *intel_ctrl, u64 fixed_mask); +static inline bool intel_pmu_broken_perf_cap(void) +{ + /* The Perf Metric (Bit 15) is always cleared */ + if ((boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE) || + (boot_cpu_data.x86_model == INTEL_FAM6_METEORLAKE_L)) + return true; + + return false; +} + static void update_pmu_cap(struct x86_hybrid_pmu *pmu) { unsigned int sub_bitmaps = cpuid_eax(ARCH_PERFMON_EXT_LEAF); @@ -4610,7 +4620,27 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) pmu->num_counters_fixed = fls(ebx); intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, &pmu->intel_ctrl, ebx); + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); } + + + if (!intel_pmu_broken_perf_cap()) { + /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ + rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); + } + + if (pmu->intel_cap.perf_metrics) + pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; + else + pmu->intel_ctrl &= ~(1ULL << GLOBAL_CTRL_EN_PERF_METRICS); + + if (pmu->intel_cap.pebs_output_pt_available) + pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; + else + pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; } static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) @@ -5915,10 +5945,52 @@ static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) } } -static __always_inline bool is_mtl(u8 x86_model) +static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { + { hybrid_small, "cpu_atom" }, + { hybrid_big, "cpu_core" }, +}; + +static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) { - return (x86_model == INTEL_FAM6_METEORLAKE) || - (x86_model == INTEL_FAM6_METEORLAKE_L); + unsigned long pmus_mask = pmus; + struct x86_hybrid_pmu *pmu; + int idx = 0, bit; + + x86_pmu.num_hybrid_pmus = hweight_long(pmus_mask); + x86_pmu.hybrid_pmu = kcalloc(x86_pmu.num_hybrid_pmus, + sizeof(struct x86_hybrid_pmu), + GFP_KERNEL); + if (!x86_pmu.hybrid_pmu) + return -ENOMEM; + + static_branch_enable(&perf_is_hybrid); + x86_pmu.filter = intel_pmu_filter; + + for_each_set_bit(bit, &pmus_mask, ARRAY_SIZE(intel_hybrid_pmu_type_map)) { + pmu = &x86_pmu.hybrid_pmu[idx++]; + pmu->pmu_type = intel_hybrid_pmu_type_map[bit].id; + pmu->name = intel_hybrid_pmu_type_map[bit].name; + + pmu->num_counters = x86_pmu.num_counters; + pmu->num_counters_fixed = x86_pmu.num_counters_fixed; + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); + + pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; + if (pmu->pmu_type & hybrid_small) { + pmu->intel_cap.perf_metrics = 0; + pmu->intel_cap.pebs_output_pt_available = 1; + pmu->mid_ack = true; + } else if (pmu->pmu_type & hybrid_big) { + pmu->intel_cap.perf_metrics = 1; + pmu->intel_cap.pebs_output_pt_available = 0; + pmu->late_ack = true; + } + } + + return 0; } static __always_inline void intel_pmu_init_glc(struct pmu *pmu) @@ -6602,23 +6674,14 @@ __init int intel_pmu_init(void) case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: - case INTEL_FAM6_METEORLAKE: - case INTEL_FAM6_METEORLAKE_L: /* * Alder Lake has 2 types of CPU, core and atom. * * Initialize the common PerfMon capabilities here. */ - x86_pmu.hybrid_pmu = kcalloc(X86_HYBRID_NUM_PMUS, - sizeof(struct x86_hybrid_pmu), - GFP_KERNEL); - if (!x86_pmu.hybrid_pmu) - return -ENOMEM; - static_branch_enable(&perf_is_hybrid); - x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS; + intel_pmu_init_hybrid(hybrid_big_small); x86_pmu.pebs_latency_data = adl_latency_data_small; - x86_pmu.filter = intel_pmu_filter; x86_pmu.get_event_constraints = adl_get_event_constraints; x86_pmu.hw_config = adl_hw_config; x86_pmu.get_hybrid_cpu_type = adl_get_hybrid_cpu_type; @@ -6631,10 +6694,7 @@ __init int intel_pmu_init(void) /* Initialize big core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; - pmu->name = "cpu_core"; - pmu->pmu_type = hybrid_big; intel_pmu_init_glc(&pmu->pmu); - pmu->late_ack = true; if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { pmu->num_counters = x86_pmu.num_counters + 2; pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1; @@ -6659,45 +6719,45 @@ __init int intel_pmu_init(void) pmu->unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, 0, pmu->num_counters, 0, 0); - pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - pmu->intel_cap.perf_metrics = 1; - pmu->intel_cap.pebs_output_pt_available = 0; - pmu->extra_regs = intel_glc_extra_regs; /* Initialize Atom core specific PerfMon capabilities.*/ pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; - pmu->name = "cpu_atom"; - pmu->pmu_type = hybrid_small; intel_pmu_init_grt(&pmu->pmu); - pmu->mid_ack = true; - pmu->num_counters = x86_pmu.num_counters; - pmu->num_counters_fixed = x86_pmu.num_counters_fixed; - pmu->max_pebs_events = x86_pmu.max_pebs_events; - pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); - pmu->intel_cap.capabilities = x86_pmu.intel_cap.capabilities; - pmu->intel_cap.perf_metrics = 0; - pmu->intel_cap.pebs_output_pt_available = 1; - - if (is_mtl(boot_cpu_data.x86_model)) { - x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].extra_regs = intel_rwc_extra_regs; - x86_pmu.pebs_latency_data = mtl_latency_data_small; - extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? - mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; - mem_attr = mtl_hybrid_mem_attrs; - intel_pmu_pebs_data_source_mtl(); - x86_pmu.get_event_constraints = mtl_get_event_constraints; - pmu->extra_regs = intel_cmt_extra_regs; - pr_cont("Meteorlake Hybrid events, "); - name = "meteorlake_hybrid"; - } else { - x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; - intel_pmu_pebs_data_source_adl(); - pr_cont("Alderlake Hybrid events, "); - name = "alderlake_hybrid"; - } + + x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; + intel_pmu_pebs_data_source_adl(); + pr_cont("Alderlake Hybrid events, "); + name = "alderlake_hybrid"; + break; + + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: + intel_pmu_init_hybrid(hybrid_big_small); + + x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.get_event_constraints = mtl_get_event_constraints; + x86_pmu.hw_config = adl_hw_config; + + td_attr = adl_hybrid_events_attrs; + mem_attr = mtl_hybrid_mem_attrs; + tsx_attr = adl_hybrid_tsx_attrs; + extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? + mtl_hybrid_extra_attr_rtm : mtl_hybrid_extra_attr; + + /* Initialize big core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX]; + intel_pmu_init_glc(&pmu->pmu); + pmu->extra_regs = intel_rwc_extra_regs; + + /* Initialize Atom core specific PerfMon capabilities.*/ + pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX]; + intel_pmu_init_grt(&pmu->pmu); + pmu->extra_regs = intel_cmt_extra_regs; + + intel_pmu_pebs_data_source_mtl(); + pr_cont("Meteorlake Hybrid events, "); + name = "meteorlake_hybrid"; break; default: @@ -6809,7 +6869,7 @@ __init int intel_pmu_init(void) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - if (is_hybrid()) + if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) intel_pmu_check_hybrid_pmus((u64)fixed_mask); if (x86_pmu.intel_cap.pebs_timing_info) -- cgit v1.2.3 From 950ecdc672aec9cd29036b2e2535b07c103af494 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 11 Sep 2023 06:51:28 -0700 Subject: perf/x86/intel: Fix broken fixed event constraints extension Unnecessary multiplexing is triggered when running an "instructions" event on an MTL. perf stat -e cpu_core/instructions/,cpu_core/instructions/ -a sleep 1 Performance counter stats for 'system wide': 115,489,000 cpu_core/instructions/ (50.02%) 127,433,777 cpu_core/instructions/ (49.98%) 1.002294504 seconds time elapsed Linux architectural perf events, e.g., cycles and instructions, usually have dedicated fixed counters. These events also have equivalent events which can be used in the general-purpose counters. The counters are precious. In the intel_pmu_check_event_constraints(), perf check/extend the event constraints of these events. So these events can utilize both fixed counters and general-purpose counters. The following cleanup commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") forgot adding the intel_pmu_check_event_constraints() into update_pmu_cap(). The architectural perf events cannot utilize the general-purpose counters. The code to check and update the counters, event constraints and extra_regs is the same among hybrid systems. Move intel_pmu_check_hybrid_pmus() to init_hybrid_pmu(), and emove the duplicate check in update_pmu_cap(). Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230911135128.2322833-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 65 ++++++++++++++++++-------------------------- 1 file changed, 26 insertions(+), 39 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 83dfbba1ee69..e1543d6dc48a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4598,6 +4598,13 @@ static void intel_pmu_check_num_counters(int *num_counters, int *num_counters_fixed, u64 *intel_ctrl, u64 fixed_mask); +static void intel_pmu_check_event_constraints(struct event_constraint *event_constraints, + int num_counters, + int num_counters_fixed, + u64 intel_ctrl); + +static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs); + static inline bool intel_pmu_broken_perf_cap(void) { /* The Perf Metric (Bit 15) is always cleared */ @@ -4618,12 +4625,6 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) &eax, &ebx, &ecx, &edx); pmu->num_counters = fls(eax); pmu->num_counters_fixed = fls(ebx); - intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, - &pmu->intel_ctrl, ebx); - pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); - pmu->unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, - 0, pmu->num_counters, 0, 0); } @@ -4631,6 +4632,16 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) /* Perf Metric (Bit 15) and PEBS via PT (Bit 16) are hybrid enumeration */ rdmsrl(MSR_IA32_PERF_CAPABILITIES, pmu->intel_cap.capabilities); } +} + +static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) +{ + intel_pmu_check_num_counters(&pmu->num_counters, &pmu->num_counters_fixed, + &pmu->intel_ctrl, (1ULL << pmu->num_counters_fixed) - 1); + pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters); + pmu->unconstrained = (struct event_constraint) + __EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1, + 0, pmu->num_counters, 0, 0); if (pmu->intel_cap.perf_metrics) pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; @@ -4641,6 +4652,13 @@ static void update_pmu_cap(struct x86_hybrid_pmu *pmu) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + + intel_pmu_check_event_constraints(pmu->event_constraints, + pmu->num_counters, + pmu->num_counters_fixed, + pmu->intel_ctrl); + + intel_pmu_check_extra_regs(pmu->extra_regs); } static struct x86_hybrid_pmu *find_hybrid_pmu_for_cpu(void) @@ -4696,6 +4714,8 @@ static bool init_hybrid_pmu(int cpu) if (this_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) update_pmu_cap(pmu); + intel_pmu_check_hybrid_pmus(pmu); + if (!check_hw_exists(&pmu->pmu, pmu->num_counters, pmu->num_counters_fixed)) return false; @@ -5915,36 +5935,6 @@ static void intel_pmu_check_extra_regs(struct extra_reg *extra_regs) } } -static void intel_pmu_check_hybrid_pmus(u64 fixed_mask) -{ - struct x86_hybrid_pmu *pmu; - int i; - - for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) { - pmu = &x86_pmu.hybrid_pmu[i]; - - intel_pmu_check_num_counters(&pmu->num_counters, - &pmu->num_counters_fixed, - &pmu->intel_ctrl, - fixed_mask); - - if (pmu->intel_cap.perf_metrics) { - pmu->intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - pmu->intel_ctrl |= INTEL_PMC_MSK_FIXED_SLOTS; - } - - if (pmu->intel_cap.pebs_output_pt_available) - pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; - - intel_pmu_check_event_constraints(pmu->event_constraints, - pmu->num_counters, - pmu->num_counters_fixed, - pmu->intel_ctrl); - - intel_pmu_check_extra_regs(pmu->extra_regs); - } -} - static const struct { enum hybrid_pmu_type id; char *name; } intel_hybrid_pmu_type_map[] __initconst = { { hybrid_small, "cpu_atom" }, { hybrid_big, "cpu_core" }, @@ -6869,9 +6859,6 @@ __init int intel_pmu_init(void) if (!is_hybrid() && x86_pmu.intel_cap.perf_metrics) x86_pmu.intel_ctrl |= 1ULL << GLOBAL_CTRL_EN_PERF_METRICS; - if (is_hybrid() && !boot_cpu_has(X86_FEATURE_ARCH_PERFMON_EXT)) - intel_pmu_check_hybrid_pmus((u64)fixed_mask); - if (x86_pmu.intel_cap.pebs_timing_info) x86_pmu.flags |= PMU_FL_RETIRE_LATENCY; -- cgit v1.2.3 From ffbe4ab0beda55b5c467aa3d95ca14db75a84717 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Mon, 11 Sep 2023 07:41:38 -0700 Subject: perf/x86/intel: Extend the ref-cycles event to GP counters The current ref-cycles event is only available on the fixed counter 2. Starting from the GLC and GRT core, the architectural UnHalted Reference Cycles event (0x013c) which is available on general-purpose counters can collect the exact same events as the fixed counter 2. Update the mapping of ref-cycles to 0x013c. So the ref-cycles can be available on both fixed counter 2 and general-purpose counters. Signed-off-by: Kan Liang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230911144139.2354015-1-kan.liang@linux.intel.com --- arch/x86/events/intel/core.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e1543d6dc48a..a08f794a0e79 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -211,6 +211,14 @@ static struct event_constraint intel_slm_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_grt_event_constraints[] __read_mostly = { + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_skl_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ @@ -314,6 +322,7 @@ static struct event_constraint intel_glc_event_constraints[] = { FIXED_EVENT_CONSTRAINT(0x0100, 0), /* INST_RETIRED.PREC_DIST */ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x013c, 2), /* CPU_CLK_UNHALTED.REF_TSC_P */ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_RETIRING, 0), METRIC_EVENT_CONSTRAINT(INTEL_TD_METRIC_BAD_SPEC, 1), @@ -5983,6 +5992,12 @@ static __always_inline int intel_pmu_init_hybrid(enum hybrid_pmu_type pmus) return 0; } +static __always_inline void intel_pmu_ref_cycles_ext(void) +{ + if (!(x86_pmu.events_maskl & (INTEL_PMC_MSK_FIXED_REF_CYCLES >> INTEL_PMC_IDX_FIXED))) + intel_perfmon_event_map[PERF_COUNT_HW_REF_CPU_CYCLES] = 0x013c; +} + static __always_inline void intel_pmu_init_glc(struct pmu *pmu) { x86_pmu.late_ack = true; @@ -6005,6 +6020,8 @@ static __always_inline void intel_pmu_init_glc(struct pmu *pmu) memcpy(hybrid_var(pmu, hw_cache_extra_regs), glc_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); hybrid(pmu, event_constraints) = intel_glc_event_constraints; hybrid(pmu, pebs_constraints) = intel_glc_pebs_event_constraints; + + intel_pmu_ref_cycles_ext(); } static __always_inline void intel_pmu_init_grt(struct pmu *pmu) @@ -6021,9 +6038,11 @@ static __always_inline void intel_pmu_init_grt(struct pmu *pmu) memcpy(hybrid_var(pmu, hw_cache_event_ids), glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hybrid_var(pmu, hw_cache_extra_regs), tnt_hw_cache_extra_regs, sizeof(hw_cache_extra_regs)); hybrid_var(pmu, hw_cache_event_ids)[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; - hybrid(pmu, event_constraints) = intel_slm_event_constraints; + hybrid(pmu, event_constraints) = intel_grt_event_constraints; hybrid(pmu, pebs_constraints) = intel_grt_pebs_event_constraints; hybrid(pmu, extra_regs) = intel_grt_extra_regs; + + intel_pmu_ref_cycles_ext(); } __init int intel_pmu_init(void) -- cgit v1.2.3 From 1ce19bf90bd55bf54f9ed75d594029db63d395b0 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 7 Aug 2023 16:51:14 +0200 Subject: perf/x86/rapl: Stop doing cpu_relax() in the local64_cmpxchg() loop in rapl_event_update() According to the following commit: f5fe24ef17b5 ("lockref: stop doing cpu_relax in the cmpxchg loop") "On the x86-64 architecture even a failing cmpxchg grants exclusive access to the cacheline, making it preferable to retry the failed op immediately instead of stalling with the pause instruction." Based on the above observation, remove cpu_relax() from the local64_cmpxchg() loop of rapl_event_update(). Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Linus Torvalds Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20230807145134.3176-1-ubizjak@gmail.com --- arch/x86/events/rapl.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 1579429846cc..e8f53b2590a5 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -184,10 +184,8 @@ again: rdmsrl(event->hw.event_base, new_raw_count); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) { - cpu_relax(); + new_raw_count) != prev_raw_count) goto again; - } /* * Now we have the new raw value and have updated the prev -- cgit v1.2.3 From bcc6ec3d954bbcc8bec34a21c05ea536a2e96d6f Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 7 Aug 2023 16:51:15 +0200 Subject: perf/x86/rapl: Use local64_try_cmpxchg in rapl_event_update() Use local64_try_cmpxchg() instead of local64_cmpxchg(*ptr, old, new) == old. X86 CMPXCHG instruction returns success in ZF flag, so this change saves a compare after CMPXCHG (and related move instruction in front of CMPXCHG). Also, try_cmpxchg() implicitly assigns old *ptr value to "old" when CMPXCHG fails. There is no need to re-read the value in the loop. No functional change intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Linus Torvalds Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20230807145134.3176-2-ubizjak@gmail.com --- arch/x86/events/rapl.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index e8f53b2590a5..6d3e73848643 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -179,13 +179,11 @@ static u64 rapl_event_update(struct perf_event *event) s64 delta, sdelta; int shift = RAPL_CNTR_WIDTH; -again: prev_raw_count = local64_read(&hwc->prev_count); - rdmsrl(event->hw.event_base, new_raw_count); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; + do { + rdmsrl(event->hw.event_base, new_raw_count); + } while (!local64_try_cmpxchg(&hwc->prev_count, + &prev_raw_count, new_raw_count)); /* * Now we have the new raw value and have updated the prev -- cgit v1.2.3 From 618e77d77494bfd6273256d43eafe4368445c745 Mon Sep 17 00:00:00 2001 From: David Reaver Date: Tue, 1 Aug 2023 08:56:51 -0700 Subject: perf/x86/rapl: Fix "Using plain integer as NULL pointer" Sparse warning Change 0 to NULL when initializing the test field of perf_msr structs to avoid the following sparse warnings: make C=2 arch/x86/events/rapl.o CHECK arch/x86/events/rapl.c ... arch/x86/events/rapl.c:540:59: warning: Using plain integer as NULL pointer arch/x86/events/rapl.c:542:59: warning: Using plain integer as NULL pointer arch/x86/events/rapl.c:543:59: warning: Using plain integer as NULL pointer arch/x86/events/rapl.c:544:59: warning: Using plain integer as NULL pointer Signed-off-by: David Reaver Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230801155651.108076-1-me@davidreaver.com --- arch/x86/events/rapl.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 6d3e73848643..c2c37fb0a228 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -533,11 +533,11 @@ static struct perf_msr intel_rapl_spr_msrs[] = { * - want to use same event codes across both architectures */ static struct perf_msr amd_rapl_msrs[] = { - [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, 0, false, 0 }, + [PERF_RAPL_PP0] = { 0, &rapl_events_cores_group, NULL, false, 0 }, [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr, false, RAPL_MSR_MASK }, - [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, 0, false, 0 }, - [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, 0, false, 0 }, - [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, 0, false, 0 }, + [PERF_RAPL_RAM] = { 0, &rapl_events_ram_group, NULL, false, 0 }, + [PERF_RAPL_PP1] = { 0, &rapl_events_gpu_group, NULL, false, 0 }, + [PERF_RAPL_PSYS] = { 0, &rapl_events_psys_group, NULL, false, 0 }, }; static int rapl_cpu_offline(unsigned int cpu) -- cgit v1.2.3 From a56d5551e1993ca84dd0c69df5a3d8223d13fb5f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 6 Oct 2023 13:17:54 -0700 Subject: perf/x86/rapl: Annotate 'struct rapl_pmus' with __counted_by Prepare for the coming implementation by GCC and Clang of the __counted_by attribute. Flexible array members annotated with __counted_by can have their accesses bounds-checked at run-time via CONFIG_UBSAN_BOUNDS=y (for array indexing) and CONFIG_FORTIFY_SOURCE=y (for strcpy/memcpy-family functions). Found with Coccinelle: https://github.com/kees/kernel-tools/blob/trunk/coccinelle/examples/counted_by.cocci [1] Add __counted_by for 'struct rapl_pmus'. No change in functionality intended. Signed-off-by: Kees Cook Signed-off-by: Ingo Molnar Reviewed-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20231006201754.work.473-kees@kernel.org --- arch/x86/events/rapl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index c2c37fb0a228..8d98d468b976 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -115,7 +115,7 @@ struct rapl_pmu { struct rapl_pmus { struct pmu pmu; unsigned int maxdie; - struct rapl_pmu *pmus[]; + struct rapl_pmu *pmus[] __counted_by(maxdie); }; enum rapl_unit_quirk { -- cgit v1.2.3 From 38cd5b6a875adc877681faf8e3ad47fdbd6eceb5 Mon Sep 17 00:00:00 2001 From: Lucy Mielke Date: Mon, 9 Oct 2023 08:54:54 +0200 Subject: perf/x86/intel/pt: Fix kernel-doc comments Some parameters or return codes were either wrong or missing, update them. Signed-off-by: Lucy Mielke Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/ZSOjQW3e2nJR4bAo@fedora.fritz.box --- arch/x86/events/intel/pt.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 42a55794004a..8e2a12235e62 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -736,6 +736,7 @@ static bool topa_table_full(struct topa *topa) /** * topa_insert_pages() - create a list of ToPA tables * @buf: PT buffer being initialized. + * @cpu: CPU on which to allocate. * @gfp: Allocation flags. * * This initializes a list of ToPA tables with entries from @@ -1207,8 +1208,11 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf) /** * pt_buffer_init_topa() - initialize ToPA table for pt buffer * @buf: PT buffer. - * @size: Total size of all regions within this ToPA. + * @cpu: CPU on which to allocate. + * @nr_pages: No. of pages to allocate. * @gfp: Allocation flags. + * + * Return: 0 on success or error code. */ static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu, unsigned long nr_pages, gfp_t gfp) @@ -1281,7 +1285,7 @@ out: /** * pt_buffer_setup_aux() - set up topa tables for a PT buffer - * @cpu: Cpu on which to allocate, -1 means current. + * @event: Performance event * @pages: Array of pointers to buffer pages passed from perf core. * @nr_pages: Number of pages in the buffer. * @snapshot: If this is a snapshot/overwrite counter. -- cgit v1.2.3 From 05276d4831fe023b6674a72bd6b8c5b39796e690 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Tue, 12 Sep 2023 15:44:31 +0300 Subject: perf/x86/cstate: Allow reading the package statistics from local CPU The MSR registers for reading the package residency counters are available on every CPU of the package. To avoid doing unnecessary SMP calls to read the values for these from the various CPUs inside a package, allow reading them from any CPU of the package. Suggested-by: Kan Liang Signed-off-by: Tero Kristo Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230912124432.3616761-2-tero.kristo@linux.intel.com --- arch/x86/events/intel/cstate.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/events') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 96fffb2d521d..cbeb6d2bf5b4 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -336,6 +336,9 @@ static int cstate_pmu_event_init(struct perf_event *event) cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!(pkg_msr_mask & (1 << cfg))) return -EINVAL; + + event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG; + event->hw.event_base = pkg_msr[cfg].msr; cpu = cpumask_any_and(&cstate_pkg_cpu_mask, topology_die_cpumask(event->cpu)); -- cgit v1.2.3 From d6389d3ccc136a4229a8d497899c64f80fd3c5b3 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:11 +0530 Subject: perf/x86/amd/uncore: Refactor uncore management Since struct amd_uncore is used to manage per-cpu contexts, rename it to amd_uncore_ctx in order to better reflect its purpose. Add a new struct amd_uncore_pmu to encapsulate all attributes which are shared by per-cpu contexts for a corresponding PMU. These include the number of counters, active mask, MSR and RDPMC base addresses, etc. Since the struct pmu is now embedded, the corresponding amd_uncore_pmu for a given event can be found by simply using container_of(). Finally, move all PMU-specific code to separate functions. While the original event management functions continue to provide the base functionality, all PMU-specific quirks and customizations are applied in separate functions. The motivation is to simplify the management of uncore PMUs. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/24b38c49a5dae65d8c96e5d75a2b96ae97aaa651.1696425185.git.sandipan.das@amd.com --- arch/x86/events/amd/uncore.c | 737 ++++++++++++++++++++++--------------------- 1 file changed, 379 insertions(+), 358 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 83f15fe411b3..ffcecda13d65 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -27,56 +27,41 @@ #define COUNTER_SHIFT 16 +#define NUM_UNCORES_MAX 2 /* DF (or NB) and L3 (or L2) */ +#define UNCORE_NAME_LEN 16 + #undef pr_fmt #define pr_fmt(fmt) "amd_uncore: " fmt static int pmu_version; -static int num_counters_llc; -static int num_counters_nb; -static bool l3_mask; static HLIST_HEAD(uncore_unused_list); -struct amd_uncore { +struct amd_uncore_ctx { int id; int refcnt; int cpu; - int num_counters; - int rdpmc_base; - u32 msr_base; - cpumask_t *active_mask; - struct pmu *pmu; struct perf_event **events; struct hlist_node node; }; -static struct amd_uncore * __percpu *amd_uncore_nb; -static struct amd_uncore * __percpu *amd_uncore_llc; - -static struct pmu amd_nb_pmu; -static struct pmu amd_llc_pmu; - -static cpumask_t amd_nb_active_mask; -static cpumask_t amd_llc_active_mask; - -static bool is_nb_event(struct perf_event *event) -{ - return event->pmu->type == amd_nb_pmu.type; -} +struct amd_uncore_pmu { + char name[UNCORE_NAME_LEN]; + int num_counters; + int rdpmc_base; + u32 msr_base; + cpumask_t active_mask; + struct pmu pmu; + struct amd_uncore_ctx * __percpu *ctx; + int (*id)(unsigned int cpu); +}; -static bool is_llc_event(struct perf_event *event) -{ - return event->pmu->type == amd_llc_pmu.type; -} +static struct amd_uncore_pmu pmus[NUM_UNCORES_MAX]; +static int num_pmus __read_mostly; -static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) +static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) { - if (is_nb_event(event) && amd_uncore_nb) - return *per_cpu_ptr(amd_uncore_nb, event->cpu); - else if (is_llc_event(event) && amd_uncore_llc) - return *per_cpu_ptr(amd_uncore_llc, event->cpu); - - return NULL; + return container_of(event->pmu, struct amd_uncore_pmu, pmu); } static void amd_uncore_read(struct perf_event *event) @@ -118,7 +103,7 @@ static void amd_uncore_stop(struct perf_event *event, int flags) hwc->state |= PERF_HES_STOPPED; if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - amd_uncore_read(event); + event->pmu->read(event); hwc->state |= PERF_HES_UPTODATE; } } @@ -126,15 +111,16 @@ static void amd_uncore_stop(struct perf_event *event, int flags) static int amd_uncore_add(struct perf_event *event, int flags) { int i; - struct amd_uncore *uncore = event_to_amd_uncore(event); + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; /* are we already assigned? */ - if (hwc->idx != -1 && uncore->events[hwc->idx] == event) + if (hwc->idx != -1 && ctx->events[hwc->idx] == event) goto out; - for (i = 0; i < uncore->num_counters; i++) { - if (uncore->events[i] == event) { + for (i = 0; i < pmu->num_counters; i++) { + if (ctx->events[i] == event) { hwc->idx = i; goto out; } @@ -142,8 +128,8 @@ static int amd_uncore_add(struct perf_event *event, int flags) /* if not, take the first available counter */ hwc->idx = -1; - for (i = 0; i < uncore->num_counters; i++) { - if (cmpxchg(&uncore->events[i], NULL, event) == NULL) { + for (i = 0; i < pmu->num_counters; i++) { + if (cmpxchg(&ctx->events[i], NULL, event) == NULL) { hwc->idx = i; break; } @@ -153,23 +139,13 @@ out: if (hwc->idx == -1) return -EBUSY; - hwc->config_base = uncore->msr_base + (2 * hwc->idx); - hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx); - hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx; + hwc->config_base = pmu->msr_base + (2 * hwc->idx); + hwc->event_base = pmu->msr_base + 1 + (2 * hwc->idx); + hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - /* - * The first four DF counters are accessible via RDPMC index 6 to 9 - * followed by the L3 counters from index 10 to 15. For processors - * with more than four DF counters, the DF RDPMC assignments become - * discontiguous as the additional counters are accessible starting - * from index 16. - */ - if (is_nb_event(event) && hwc->idx >= NUM_COUNTERS_NB) - hwc->event_base_rdpmc += NUM_COUNTERS_L3; - if (flags & PERF_EF_START) - amd_uncore_start(event, PERF_EF_RELOAD); + event->pmu->start(event, PERF_EF_RELOAD); return 0; } @@ -177,55 +153,36 @@ out: static void amd_uncore_del(struct perf_event *event, int flags) { int i; - struct amd_uncore *uncore = event_to_amd_uncore(event); + struct amd_uncore_pmu *pmu = event_to_amd_uncore_pmu(event); + struct amd_uncore_ctx *ctx = *per_cpu_ptr(pmu->ctx, event->cpu); struct hw_perf_event *hwc = &event->hw; - amd_uncore_stop(event, PERF_EF_UPDATE); + event->pmu->stop(event, PERF_EF_UPDATE); - for (i = 0; i < uncore->num_counters; i++) { - if (cmpxchg(&uncore->events[i], event, NULL) == event) + for (i = 0; i < pmu->num_counters; i++) { + if (cmpxchg(&ctx->events[i], event, NULL) == event) break; } hwc->idx = -1; } -/* - * Return a full thread and slice mask unless user - * has provided them - */ -static u64 l3_thread_slice_mask(u64 config) -{ - if (boot_cpu_data.x86 <= 0x18) - return ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | - ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); - - /* - * If the user doesn't specify a threadmask, they're not trying to - * count core 0, so we enable all cores & threads. - * We'll also assume that they want to count slice 0 if they specify - * a threadmask and leave sliceid and enallslices unpopulated. - */ - if (!(config & AMD64_L3_F19H_THREAD_MASK)) - return AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | - AMD64_L3_EN_ALL_CORES; - - return config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | - AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | - AMD64_L3_COREID_MASK); -} - static int amd_uncore_event_init(struct perf_event *event) { - struct amd_uncore *uncore; + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; struct hw_perf_event *hwc = &event->hw; - u64 event_mask = AMD64_RAW_EVENT_MASK_NB; if (event->attr.type != event->pmu->type) return -ENOENT; - if (pmu_version >= 2 && is_nb_event(event)) - event_mask = AMD64_PERFMON_V2_RAW_EVENT_MASK_NB; + if (event->cpu < 0) + return -EINVAL; + + pmu = event_to_amd_uncore_pmu(event); + ctx = *per_cpu_ptr(pmu->ctx, event->cpu); + if (!ctx) + return -ENODEV; /* * NB and Last level cache counters (MSRs) are shared across all cores @@ -235,28 +192,14 @@ static int amd_uncore_event_init(struct perf_event *event) * out. So we do not support sampling and per-thread events via * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - hwc->config = event->attr.config & event_mask; + hwc->config = event->attr.config; hwc->idx = -1; - if (event->cpu < 0) - return -EINVAL; - - /* - * SliceMask and ThreadMask need to be set for certain L3 events. - * For other events, the two fields do not affect the count. - */ - if (l3_mask && is_llc_event(event)) - hwc->config |= l3_thread_slice_mask(event->attr.config); - - uncore = event_to_amd_uncore(event); - if (!uncore) - return -ENODEV; - /* * since request can come in to any of the shared cores, we will remap * to a single common cpu. */ - event->cpu = uncore->cpu; + event->cpu = ctx->cpu; return 0; } @@ -278,17 +221,10 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { - cpumask_t *active_mask; - struct pmu *pmu = dev_get_drvdata(dev); - - if (pmu->type == amd_nb_pmu.type) - active_mask = &amd_nb_active_mask; - else if (pmu->type == amd_llc_pmu.type) - active_mask = &amd_llc_active_mask; - else - return 0; + struct pmu *ptr = dev_get_drvdata(dev); + struct amd_uncore_pmu *pmu = container_of(ptr, struct amd_uncore_pmu, pmu); - return cpumap_print_to_pagebuf(true, buf, active_mask); + return cpumap_print_to_pagebuf(true, buf, &pmu->active_mask); } static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL); @@ -396,113 +332,57 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = { NULL, }; -static struct pmu amd_nb_pmu = { - .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_df_attr_groups, - .name = "amd_nb", - .event_init = amd_uncore_event_init, - .add = amd_uncore_add, - .del = amd_uncore_del, - .start = amd_uncore_start, - .stop = amd_uncore_stop, - .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, - .module = THIS_MODULE, -}; - -static struct pmu amd_llc_pmu = { - .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_l3_attr_groups, - .attr_update = amd_uncore_l3_attr_update, - .name = "amd_l2", - .event_init = amd_uncore_event_init, - .add = amd_uncore_add, - .del = amd_uncore_del, - .start = amd_uncore_start, - .stop = amd_uncore_stop, - .read = amd_uncore_read, - .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, - .module = THIS_MODULE, -}; - -static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) -{ - return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL, - cpu_to_node(cpu)); -} - -static inline struct perf_event ** -amd_uncore_events_alloc(unsigned int num, unsigned int cpu) -{ - return kzalloc_node(sizeof(struct perf_event *) * num, GFP_KERNEL, - cpu_to_node(cpu)); -} - static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore_nb = NULL, *uncore_llc = NULL; - - if (amd_uncore_nb) { - *per_cpu_ptr(amd_uncore_nb, cpu) = NULL; - uncore_nb = amd_uncore_alloc(cpu); - if (!uncore_nb) - goto fail; - uncore_nb->cpu = cpu; - uncore_nb->num_counters = num_counters_nb; - uncore_nb->rdpmc_base = RDPMC_BASE_NB; - uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; - uncore_nb->active_mask = &amd_nb_active_mask; - uncore_nb->pmu = &amd_nb_pmu; - uncore_nb->events = amd_uncore_events_alloc(num_counters_nb, cpu); - if (!uncore_nb->events) + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int node = cpu_to_node(cpu), i; + + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + ctx = kzalloc_node(sizeof(struct amd_uncore_ctx), GFP_KERNEL, + node); + if (!ctx) goto fail; - uncore_nb->id = -1; - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; - } - if (amd_uncore_llc) { - *per_cpu_ptr(amd_uncore_llc, cpu) = NULL; - uncore_llc = amd_uncore_alloc(cpu); - if (!uncore_llc) - goto fail; - uncore_llc->cpu = cpu; - uncore_llc->num_counters = num_counters_llc; - uncore_llc->rdpmc_base = RDPMC_BASE_LLC; - uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore_llc->active_mask = &amd_llc_active_mask; - uncore_llc->pmu = &amd_llc_pmu; - uncore_llc->events = amd_uncore_events_alloc(num_counters_llc, cpu); - if (!uncore_llc->events) + ctx->cpu = cpu; + ctx->events = kzalloc_node(sizeof(struct perf_event *) * + pmu->num_counters, GFP_KERNEL, + node); + if (!ctx->events) goto fail; - uncore_llc->id = -1; - *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc; + + ctx->id = -1; + *per_cpu_ptr(pmu->ctx, cpu) = ctx; } return 0; fail: - if (uncore_nb) { - kfree(uncore_nb->events); - kfree(uncore_nb); - } + /* Rollback */ + for (; i >= 0; i--) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (!ctx) + continue; - if (uncore_llc) { - kfree(uncore_llc->events); - kfree(uncore_llc); + kfree(ctx->events); + kfree(ctx); } return -ENOMEM; } -static struct amd_uncore * -amd_uncore_find_online_sibling(struct amd_uncore *this, - struct amd_uncore * __percpu *uncores) +static struct amd_uncore_ctx * +amd_uncore_find_online_sibling(struct amd_uncore_ctx *this, + struct amd_uncore_pmu *pmu) { unsigned int cpu; - struct amd_uncore *that; + struct amd_uncore_ctx *that; for_each_online_cpu(cpu) { - that = *per_cpu_ptr(uncores, cpu); + that = *per_cpu_ptr(pmu->ctx, cpu); if (!that) continue; @@ -523,24 +403,16 @@ amd_uncore_find_online_sibling(struct amd_uncore *this, static int amd_uncore_cpu_starting(unsigned int cpu) { - unsigned int eax, ebx, ecx, edx; - struct amd_uncore *uncore; - - if (amd_uncore_nb) { - uncore = *per_cpu_ptr(amd_uncore_nb, cpu); - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - uncore->id = ecx & 0xff; - - uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb); - *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; - } - - if (amd_uncore_llc) { - uncore = *per_cpu_ptr(amd_uncore_llc, cpu); - uncore->id = get_llc_id(cpu); + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int i; - uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); - *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + ctx->id = pmu->id(cpu); + ctx = amd_uncore_find_online_sibling(ctx, pmu); + *per_cpu_ptr(pmu->ctx, cpu) = ctx; } return 0; @@ -548,195 +420,359 @@ static int amd_uncore_cpu_starting(unsigned int cpu) static void uncore_clean_online(void) { - struct amd_uncore *uncore; + struct amd_uncore_ctx *ctx; struct hlist_node *n; - hlist_for_each_entry_safe(uncore, n, &uncore_unused_list, node) { - hlist_del(&uncore->node); - kfree(uncore->events); - kfree(uncore); + hlist_for_each_entry_safe(ctx, n, &uncore_unused_list, node) { + hlist_del(&ctx->node); + kfree(ctx->events); + kfree(ctx); } } -static void uncore_online(unsigned int cpu, - struct amd_uncore * __percpu *uncores) +static int amd_uncore_cpu_online(unsigned int cpu) { - struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int i; uncore_clean_online(); - if (cpu == uncore->cpu) - cpumask_set_cpu(cpu, uncore->active_mask); + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (cpu == ctx->cpu) + cpumask_set_cpu(cpu, &pmu->active_mask); + } + + return 0; } -static int amd_uncore_cpu_online(unsigned int cpu) +static int amd_uncore_cpu_down_prepare(unsigned int cpu) { - if (amd_uncore_nb) - uncore_online(cpu, amd_uncore_nb); + struct amd_uncore_ctx *this, *that; + struct amd_uncore_pmu *pmu; + int i, j; + + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + this = *per_cpu_ptr(pmu->ctx, cpu); + + /* this cpu is going down, migrate to a shared sibling if possible */ + for_each_online_cpu(j) { + that = *per_cpu_ptr(pmu->ctx, j); + + if (cpu == j) + continue; + + if (this == that) { + perf_pmu_migrate_context(&pmu->pmu, cpu, j); + cpumask_clear_cpu(cpu, &pmu->active_mask); + cpumask_set_cpu(j, &pmu->active_mask); + that->cpu = j; + break; + } + } + } - if (amd_uncore_llc) - uncore_online(cpu, amd_uncore_llc); + return 0; +} + +static int amd_uncore_cpu_dead(unsigned int cpu) +{ + struct amd_uncore_ctx *ctx; + struct amd_uncore_pmu *pmu; + int i; + + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (cpu == ctx->cpu) + cpumask_clear_cpu(cpu, &pmu->active_mask); + + if (!--ctx->refcnt) { + kfree(ctx->events); + kfree(ctx); + } + + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + } return 0; } -static void uncore_down_prepare(unsigned int cpu, - struct amd_uncore * __percpu *uncores) +static int amd_uncore_df_id(unsigned int cpu) { - unsigned int i; - struct amd_uncore *this = *per_cpu_ptr(uncores, cpu); + unsigned int eax, ebx, ecx, edx; - if (this->cpu != cpu) - return; + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); - /* this cpu is going down, migrate to a shared sibling if possible */ - for_each_online_cpu(i) { - struct amd_uncore *that = *per_cpu_ptr(uncores, i); + return ecx & 0xff; +} - if (cpu == i) - continue; +static int amd_uncore_df_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int ret = amd_uncore_event_init(event); - if (this == that) { - perf_pmu_migrate_context(this->pmu, cpu, i); - cpumask_clear_cpu(cpu, that->active_mask); - cpumask_set_cpu(i, that->active_mask); - that->cpu = i; - break; - } - } + if (ret || pmu_version < 2) + return ret; + + hwc->config = event->attr.config & + (pmu_version >= 2 ? AMD64_PERFMON_V2_RAW_EVENT_MASK_NB : + AMD64_RAW_EVENT_MASK_NB); + + return 0; } -static int amd_uncore_cpu_down_prepare(unsigned int cpu) +static int amd_uncore_df_add(struct perf_event *event, int flags) { - if (amd_uncore_nb) - uncore_down_prepare(cpu, amd_uncore_nb); + int ret = amd_uncore_add(event, flags & ~PERF_EF_START); + struct hw_perf_event *hwc = &event->hw; + + if (ret) + return ret; + + /* + * The first four DF counters are accessible via RDPMC index 6 to 9 + * followed by the L3 counters from index 10 to 15. For processors + * with more than four DF counters, the DF RDPMC assignments become + * discontiguous as the additional counters are accessible starting + * from index 16. + */ + if (hwc->idx >= NUM_COUNTERS_NB) + hwc->event_base_rdpmc += NUM_COUNTERS_L3; - if (amd_uncore_llc) - uncore_down_prepare(cpu, amd_uncore_llc); + /* Delayed start after rdpmc base update */ + if (flags & PERF_EF_START) + amd_uncore_start(event, PERF_EF_RELOAD); return 0; } -static void uncore_dead(unsigned int cpu, struct amd_uncore * __percpu *uncores) +static int amd_uncore_df_init(void) { - struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + struct attribute **df_attr = amd_uncore_df_format_attr; + struct amd_uncore_pmu *pmu = &pmus[num_pmus]; + union cpuid_0x80000022_ebx ebx; + int ret; - if (cpu == uncore->cpu) - cpumask_clear_cpu(cpu, uncore->active_mask); + if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB)) + return 0; - if (!--uncore->refcnt) { - kfree(uncore->events); - kfree(uncore); + /* + * For Family 17h and above, the Northbridge counters are repurposed + * as Data Fabric counters. The PMUs are exported based on family as + * either NB or DF. + */ + strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", + sizeof(pmu->name)); + + pmu->num_counters = NUM_COUNTERS_NB; + pmu->msr_base = MSR_F15H_NB_PERF_CTL; + pmu->rdpmc_base = RDPMC_BASE_NB; + pmu->id = amd_uncore_df_id; + + if (pmu_version >= 2) { + *df_attr++ = &format_attr_event14v2.attr; + *df_attr++ = &format_attr_umask12.attr; + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + pmu->num_counters = ebx.split.num_df_pmc; + } else if (boot_cpu_data.x86 >= 0x17) { + *df_attr = &format_attr_event14.attr; + } + + pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); + if (!pmu->ctx) + return -ENOMEM; + + pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_df_attr_groups, + .name = pmu->name, + .event_init = amd_uncore_df_event_init, + .add = amd_uncore_df_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, + }; + + ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); + if (ret) { + free_percpu(pmu->ctx); + pmu->ctx = NULL; + return ret; } - *per_cpu_ptr(uncores, cpu) = NULL; + pr_info("%d %s %s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pmu->pmu.name); + + num_pmus++; + + return 0; } -static int amd_uncore_cpu_dead(unsigned int cpu) +static int amd_uncore_l3_id(unsigned int cpu) { - if (amd_uncore_nb) - uncore_dead(cpu, amd_uncore_nb); + return get_llc_id(cpu); +} + +static int amd_uncore_l3_event_init(struct perf_event *event) +{ + int ret = amd_uncore_event_init(event); + struct hw_perf_event *hwc = &event->hw; + u64 config = event->attr.config; + u64 mask; + + hwc->config = config & AMD64_RAW_EVENT_MASK_NB; + + /* + * SliceMask and ThreadMask need to be set for certain L3 events. + * For other events, the two fields do not affect the count. + */ + if (ret || boot_cpu_data.x86 < 0x17) + return ret; + + mask = config & (AMD64_L3_F19H_THREAD_MASK | AMD64_L3_SLICEID_MASK | + AMD64_L3_EN_ALL_CORES | AMD64_L3_EN_ALL_SLICES | + AMD64_L3_COREID_MASK); + + if (boot_cpu_data.x86 <= 0x18) + mask = ((config & AMD64_L3_SLICE_MASK) ? : AMD64_L3_SLICE_MASK) | + ((config & AMD64_L3_THREAD_MASK) ? : AMD64_L3_THREAD_MASK); + + /* + * If the user doesn't specify a ThreadMask, they're not trying to + * count core 0, so we enable all cores & threads. + * We'll also assume that they want to count slice 0 if they specify + * a ThreadMask and leave SliceId and EnAllSlices unpopulated. + */ + else if (!(config & AMD64_L3_F19H_THREAD_MASK)) + mask = AMD64_L3_F19H_THREAD_MASK | AMD64_L3_EN_ALL_SLICES | + AMD64_L3_EN_ALL_CORES; - if (amd_uncore_llc) - uncore_dead(cpu, amd_uncore_llc); + hwc->config |= mask; return 0; } -static int __init amd_uncore_init(void) +static int amd_uncore_l3_init(void) { - struct attribute **df_attr = amd_uncore_df_format_attr; struct attribute **l3_attr = amd_uncore_l3_format_attr; - union cpuid_0x80000022_ebx ebx; - int ret = -ENODEV; + struct amd_uncore_pmu *pmu = &pmus[num_pmus]; + int ret; - if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && - boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) - return -ENODEV; + if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) + return 0; - if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) - return -ENODEV; + /* + * For Family 17h and above, L3 cache counters are available instead + * of L2 cache counters. The PMUs are exported based on family as + * either L2 or L3. + */ + strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", + sizeof(pmu->name)); - if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) - pmu_version = 2; + pmu->num_counters = NUM_COUNTERS_L2; + pmu->msr_base = MSR_F16H_L2I_PERF_CTL; + pmu->rdpmc_base = RDPMC_BASE_LLC; + pmu->id = amd_uncore_l3_id; - num_counters_nb = NUM_COUNTERS_NB; - num_counters_llc = NUM_COUNTERS_L2; if (boot_cpu_data.x86 >= 0x17) { - /* - * For F17h and above, the Northbridge counters are - * repurposed as Data Fabric counters. Also, L3 - * counters are supported too. The PMUs are exported - * based on family as either L2 or L3 and NB or DF. - */ - num_counters_llc = NUM_COUNTERS_L3; - amd_nb_pmu.name = "amd_df"; - amd_llc_pmu.name = "amd_l3"; - l3_mask = true; + *l3_attr++ = &format_attr_event8.attr; + *l3_attr++ = &format_attr_umask8.attr; + *l3_attr++ = boot_cpu_data.x86 >= 0x19 ? + &format_attr_threadmask2.attr : + &format_attr_threadmask8.attr; + pmu->num_counters = NUM_COUNTERS_L3; } - if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { - if (pmu_version >= 2) { - *df_attr++ = &format_attr_event14v2.attr; - *df_attr++ = &format_attr_umask12.attr; - } else if (boot_cpu_data.x86 >= 0x17) { - *df_attr = &format_attr_event14.attr; - } + pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); + if (!pmu->ctx) + return -ENOMEM; + + pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_l3_attr_groups, + .attr_update = amd_uncore_l3_attr_update, + .name = pmu->name, + .event_init = amd_uncore_l3_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, + }; + + ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); + if (ret) { + free_percpu(pmu->ctx); + pmu->ctx = NULL; + return ret; + } - amd_uncore_nb = alloc_percpu(struct amd_uncore *); - if (!amd_uncore_nb) { - ret = -ENOMEM; - goto fail_nb; - } - ret = perf_pmu_register(&amd_nb_pmu, amd_nb_pmu.name, -1); - if (ret) - goto fail_nb; + pr_info("%d %s %s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pmu->pmu.name); - if (pmu_version >= 2) { - ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); - num_counters_nb = ebx.split.num_df_pmc; - } + num_pmus++; - pr_info("%d %s %s counters detected\n", num_counters_nb, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", - amd_nb_pmu.name); + return 0; +} - ret = 0; - } +static void uncore_free(void) +{ + struct amd_uncore_pmu *pmu; + int i; - if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { - if (boot_cpu_data.x86 >= 0x19) { - *l3_attr++ = &format_attr_event8.attr; - *l3_attr++ = &format_attr_umask8.attr; - *l3_attr++ = &format_attr_threadmask2.attr; - } else if (boot_cpu_data.x86 >= 0x17) { - *l3_attr++ = &format_attr_event8.attr; - *l3_attr++ = &format_attr_umask8.attr; - *l3_attr++ = &format_attr_threadmask8.attr; - } + for (i = 0; i < num_pmus; i++) { + pmu = &pmus[i]; + if (!pmu->ctx) + continue; - amd_uncore_llc = alloc_percpu(struct amd_uncore *); - if (!amd_uncore_llc) { - ret = -ENOMEM; - goto fail_llc; - } - ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1); - if (ret) - goto fail_llc; - - pr_info("%d %s %s counters detected\n", num_counters_llc, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", - amd_llc_pmu.name); - ret = 0; + perf_pmu_unregister(&pmu->pmu); + free_percpu(pmu->ctx); + pmu->ctx = NULL; } + num_pmus = 0; +} + +static int __init amd_uncore_init(void) +{ + int ret; + + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && + boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) + return -ENODEV; + + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) + return -ENODEV; + + if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) + pmu_version = 2; + + ret = amd_uncore_df_init(); + if (ret) + goto fail; + + ret = amd_uncore_l3_init(); + if (ret) + goto fail; + /* * Install callbacks. Core will call them for each online cpu. */ if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, "perf/x86/amd/uncore:prepare", amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) - goto fail_llc; + goto fail; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, "perf/x86/amd/uncore:starting", @@ -753,12 +789,8 @@ fail_start: cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); fail_prep: cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); -fail_llc: - if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) - perf_pmu_unregister(&amd_nb_pmu); - free_percpu(amd_uncore_llc); -fail_nb: - free_percpu(amd_uncore_nb); +fail: + uncore_free(); return ret; } @@ -768,18 +800,7 @@ static void __exit amd_uncore_exit(void) cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); - - if (boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) { - perf_pmu_unregister(&amd_llc_pmu); - free_percpu(amd_uncore_llc); - amd_uncore_llc = NULL; - } - - if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) { - perf_pmu_unregister(&amd_nb_pmu); - free_percpu(amd_uncore_nb); - amd_uncore_nb = NULL; - } + uncore_free(); } module_init(amd_uncore_init); -- cgit v1.2.3 From 07888daa056e809de0b6b234116b575c11f9f99d Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:12 +0530 Subject: perf/x86/amd/uncore: Move discovery and registration Uncore PMUs have traditionally been registered in the module init path. This is fine for the existing DF and L3 PMUs since the CPUID information does not vary across CPUs but not for the memory controller (UMC) PMUs since information like active memory channels can vary for each socket depending on how the DIMMs have been physically populated. To overcome this, the discovery of PMU information using CPUID is moved to the startup of UNCORE_STARTING. This cannot be done in the startup of UNCORE_PREP since the hotplug callback does not run on the CPU that is being brought online. Previously, the startup of UNCORE_PREP was used for allocating uncore contexts following which, the startup of UNCORE_STARTING was used to find and reuse an existing sibling context, if possible. Any unused contexts were added to a list for reclaimation later during the startup of UNCORE_ONLINE. Since all required CPUID info is now available only after the startup of UNCORE_STARTING has completed, context allocation has been moved to the startup of UNCORE_ONLINE. Before allocating contexts, the first CPU that comes online has to take up the additional responsibility of registering the PMUs. This is a one-time process though. Since sibling discovery now happens prior to deciding whether a new context is required, there is no longer a need to track and free up unused contexts. The teardown of UNCORE_ONLINE and UNCORE_PREP functionally remain the same. Overall, the flow of control described above is achieved using the following handlers for managing uncore PMUs. It is mandatory to define them for each type of uncore PMU. * scan() runs during startup of UNCORE_STARTING and collects PMU info using CPUID. * init() runs during startup of UNCORE_ONLINE, registers PMUs and sets up uncore contexts. * move() runs during teardown of UNCORE_ONLINE and migrates uncore contexts to a shared sibling, if possible. * free() runs during teardown of UNCORE_PREP and frees up uncore contexts. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/e6c447e48872fcab8452e0dd81b1c9cb09f39eb4.1696425185.git.sandipan.das@amd.com --- arch/x86/events/amd/uncore.c | 494 ++++++++++++++++++++++++++----------------- 1 file changed, 305 insertions(+), 189 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index ffcecda13d65..ff1d09cc07ad 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -26,8 +26,6 @@ #define RDPMC_BASE_LLC 10 #define COUNTER_SHIFT 16 - -#define NUM_UNCORES_MAX 2 /* DF (or NB) and L3 (or L2) */ #define UNCORE_NAME_LEN 16 #undef pr_fmt @@ -35,10 +33,7 @@ static int pmu_version; -static HLIST_HEAD(uncore_unused_list); - struct amd_uncore_ctx { - int id; int refcnt; int cpu; struct perf_event **events; @@ -53,11 +48,36 @@ struct amd_uncore_pmu { cpumask_t active_mask; struct pmu pmu; struct amd_uncore_ctx * __percpu *ctx; - int (*id)(unsigned int cpu); }; -static struct amd_uncore_pmu pmus[NUM_UNCORES_MAX]; -static int num_pmus __read_mostly; +enum { + UNCORE_TYPE_DF, + UNCORE_TYPE_L3, + + UNCORE_TYPE_MAX +}; + +union amd_uncore_info { + struct { + u64 aux_data:32; /* auxiliary data */ + u64 num_pmcs:8; /* number of counters */ + u64 cid:8; /* context id */ + } split; + u64 full; +}; + +struct amd_uncore { + union amd_uncore_info * __percpu info; + struct amd_uncore_pmu *pmus; + unsigned int num_pmus; + bool init_done; + void (*scan)(struct amd_uncore *uncore, unsigned int cpu); + int (*init)(struct amd_uncore *uncore, unsigned int cpu); + void (*move)(struct amd_uncore *uncore, unsigned int cpu); + void (*free)(struct amd_uncore *uncore, unsigned int cpu); +}; + +static struct amd_uncore uncores[UNCORE_TYPE_MAX]; static struct amd_uncore_pmu *event_to_amd_uncore_pmu(struct perf_event *event) { @@ -332,182 +352,192 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = { NULL, }; -static int amd_uncore_cpu_up_prepare(unsigned int cpu) +static __always_inline +int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) { - struct amd_uncore_pmu *pmu; - struct amd_uncore_ctx *ctx; - int node = cpu_to_node(cpu), i; - - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - *per_cpu_ptr(pmu->ctx, cpu) = NULL; - ctx = kzalloc_node(sizeof(struct amd_uncore_ctx), GFP_KERNEL, - node); - if (!ctx) - goto fail; - - ctx->cpu = cpu; - ctx->events = kzalloc_node(sizeof(struct perf_event *) * - pmu->num_counters, GFP_KERNEL, - node); - if (!ctx->events) - goto fail; - - ctx->id = -1; - *per_cpu_ptr(pmu->ctx, cpu) = ctx; - } - - return 0; - -fail: - /* Rollback */ - for (; i >= 0; i--) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - if (!ctx) - continue; - - kfree(ctx->events); - kfree(ctx); - } + union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); + return info->split.cid; +} - return -ENOMEM; +static __always_inline +int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu) +{ + union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); + return info->split.num_pmcs; } -static struct amd_uncore_ctx * -amd_uncore_find_online_sibling(struct amd_uncore_ctx *this, - struct amd_uncore_pmu *pmu) +static void amd_uncore_ctx_free(struct amd_uncore *uncore, unsigned int cpu) { - unsigned int cpu; - struct amd_uncore_ctx *that; + struct amd_uncore_pmu *pmu; + struct amd_uncore_ctx *ctx; + int i; - for_each_online_cpu(cpu) { - that = *per_cpu_ptr(pmu->ctx, cpu); + if (!uncore->init_done) + return; - if (!that) + for (i = 0; i < uncore->num_pmus; i++) { + pmu = &uncore->pmus[i]; + ctx = *per_cpu_ptr(pmu->ctx, cpu); + if (!ctx) continue; - if (this == that) - continue; + if (cpu == ctx->cpu) + cpumask_clear_cpu(cpu, &pmu->active_mask); - if (this->id == that->id) { - hlist_add_head(&this->node, &uncore_unused_list); - this = that; - break; + if (!--ctx->refcnt) { + kfree(ctx->events); + kfree(ctx); } - } - this->refcnt++; - return this; + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + } } -static int amd_uncore_cpu_starting(unsigned int cpu) +static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { + struct amd_uncore_ctx *curr, *prev; struct amd_uncore_pmu *pmu; - struct amd_uncore_ctx *ctx; - int i; + int node, cid, i, j; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - ctx->id = pmu->id(cpu); - ctx = amd_uncore_find_online_sibling(ctx, pmu); - *per_cpu_ptr(pmu->ctx, cpu) = ctx; - } + if (!uncore->init_done || !uncore->num_pmus) + return 0; - return 0; -} + cid = amd_uncore_ctx_cid(uncore, cpu); -static void uncore_clean_online(void) -{ - struct amd_uncore_ctx *ctx; - struct hlist_node *n; + for (i = 0; i < uncore->num_pmus; i++) { + pmu = &uncore->pmus[i]; + *per_cpu_ptr(pmu->ctx, cpu) = NULL; + curr = NULL; - hlist_for_each_entry_safe(ctx, n, &uncore_unused_list, node) { - hlist_del(&ctx->node); - kfree(ctx->events); - kfree(ctx); - } -} + /* Find a sibling context */ + for_each_online_cpu(j) { + if (cpu == j) + continue; -static int amd_uncore_cpu_online(unsigned int cpu) -{ - struct amd_uncore_pmu *pmu; - struct amd_uncore_ctx *ctx; - int i; + prev = *per_cpu_ptr(pmu->ctx, j); + if (!prev) + continue; - uncore_clean_online(); + if (cid == amd_uncore_ctx_cid(uncore, j)) { + curr = prev; + break; + } + } + + /* Allocate context if sibling does not exist */ + if (!curr) { + node = cpu_to_node(cpu); + curr = kzalloc_node(sizeof(*curr), GFP_KERNEL, node); + if (!curr) + goto fail; + + curr->cpu = cpu; + curr->events = kzalloc_node(sizeof(*curr->events) * + pmu->num_counters, + GFP_KERNEL, node); + if (!curr->events) { + kfree(curr); + goto fail; + } - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - if (cpu == ctx->cpu) cpumask_set_cpu(cpu, &pmu->active_mask); + } + + curr->refcnt++; + *per_cpu_ptr(pmu->ctx, cpu) = curr; } return 0; + +fail: + amd_uncore_ctx_free(uncore, cpu); + + return -ENOMEM; } -static int amd_uncore_cpu_down_prepare(unsigned int cpu) +static void amd_uncore_ctx_move(struct amd_uncore *uncore, unsigned int cpu) { - struct amd_uncore_ctx *this, *that; + struct amd_uncore_ctx *curr, *next; struct amd_uncore_pmu *pmu; int i, j; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - this = *per_cpu_ptr(pmu->ctx, cpu); + if (!uncore->init_done) + return; - /* this cpu is going down, migrate to a shared sibling if possible */ - for_each_online_cpu(j) { - that = *per_cpu_ptr(pmu->ctx, j); + for (i = 0; i < uncore->num_pmus; i++) { + pmu = &uncore->pmus[i]; + curr = *per_cpu_ptr(pmu->ctx, cpu); + if (!curr) + continue; - if (cpu == j) + /* Migrate to a shared sibling if possible */ + for_each_online_cpu(j) { + next = *per_cpu_ptr(pmu->ctx, j); + if (!next || cpu == j) continue; - if (this == that) { + if (curr == next) { perf_pmu_migrate_context(&pmu->pmu, cpu, j); cpumask_clear_cpu(cpu, &pmu->active_mask); cpumask_set_cpu(j, &pmu->active_mask); - that->cpu = j; + next->cpu = j; break; } } } +} + +static int amd_uncore_cpu_starting(unsigned int cpu) +{ + struct amd_uncore *uncore; + int i; + + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + uncore->scan(uncore, cpu); + } return 0; } -static int amd_uncore_cpu_dead(unsigned int cpu) +static int amd_uncore_cpu_online(unsigned int cpu) { - struct amd_uncore_ctx *ctx; - struct amd_uncore_pmu *pmu; + struct amd_uncore *uncore; int i; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - ctx = *per_cpu_ptr(pmu->ctx, cpu); - if (cpu == ctx->cpu) - cpumask_clear_cpu(cpu, &pmu->active_mask); + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + if (uncore->init(uncore, cpu)) + break; + } - if (!--ctx->refcnt) { - kfree(ctx->events); - kfree(ctx); - } + return 0; +} - *per_cpu_ptr(pmu->ctx, cpu) = NULL; +static int amd_uncore_cpu_down_prepare(unsigned int cpu) +{ + struct amd_uncore *uncore; + int i; + + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + uncore->move(uncore, cpu); } return 0; } -static int amd_uncore_df_id(unsigned int cpu) +static int amd_uncore_cpu_dead(unsigned int cpu) { - unsigned int eax, ebx, ecx, edx; + struct amd_uncore *uncore; + int i; - cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + uncore->free(uncore, cpu); + } - return ecx & 0xff; + return 0; } static int amd_uncore_df_event_init(struct perf_event *event) @@ -550,41 +580,66 @@ static int amd_uncore_df_add(struct perf_event *event, int flags) return 0; } -static int amd_uncore_df_init(void) +static +void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { - struct attribute **df_attr = amd_uncore_df_format_attr; - struct amd_uncore_pmu *pmu = &pmus[num_pmus]; union cpuid_0x80000022_ebx ebx; - int ret; + union amd_uncore_info info; if (!boot_cpu_has(X86_FEATURE_PERFCTR_NB)) - return 0; + return; + + info.split.aux_data = 0; + info.split.num_pmcs = NUM_COUNTERS_NB; + info.split.cid = topology_die_id(cpu); + + if (pmu_version >= 2) { + ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); + info.split.num_pmcs = ebx.split.num_df_pmc; + } + + *per_cpu_ptr(uncore->info, cpu) = info; +} + +static +int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) +{ + struct attribute **df_attr = amd_uncore_df_format_attr; + struct amd_uncore_pmu *pmu; + + /* Run just once */ + if (uncore->init_done) + return amd_uncore_ctx_init(uncore, cpu); + + /* No grouping, single instance for a system */ + uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); + if (!uncore->pmus) { + uncore->num_pmus = 0; + goto done; + } /* * For Family 17h and above, the Northbridge counters are repurposed * as Data Fabric counters. The PMUs are exported based on family as * either NB or DF. */ + pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_df" : "amd_nb", sizeof(pmu->name)); - - pmu->num_counters = NUM_COUNTERS_NB; + pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_NB; - pmu->id = amd_uncore_df_id; if (pmu_version >= 2) { *df_attr++ = &format_attr_event14v2.attr; *df_attr++ = &format_attr_umask12.attr; - ebx.full = cpuid_ebx(EXT_PERFMON_DEBUG_FEATURES); - pmu->num_counters = ebx.split.num_df_pmc; } else if (boot_cpu_data.x86 >= 0x17) { *df_attr = &format_attr_event14.attr; } pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); if (!pmu->ctx) - return -ENOMEM; + goto done; pmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, @@ -600,25 +655,22 @@ static int amd_uncore_df_init(void) .module = THIS_MODULE, }; - ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); - if (ret) { + if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { free_percpu(pmu->ctx); pmu->ctx = NULL; - return ret; + goto done; } - pr_info("%d %s %s counters detected\n", pmu->num_counters, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pr_info("%d %s%s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", pmu->pmu.name); - num_pmus++; + uncore->num_pmus = 1; - return 0; -} +done: + uncore->init_done = true; -static int amd_uncore_l3_id(unsigned int cpu) -{ - return get_llc_id(cpu); + return amd_uncore_ctx_init(uncore, cpu); } static int amd_uncore_l3_event_init(struct perf_event *event) @@ -660,27 +712,52 @@ static int amd_uncore_l3_event_init(struct perf_event *event) return 0; } -static int amd_uncore_l3_init(void) +static +void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) { - struct attribute **l3_attr = amd_uncore_l3_format_attr; - struct amd_uncore_pmu *pmu = &pmus[num_pmus]; - int ret; + union amd_uncore_info info; if (!boot_cpu_has(X86_FEATURE_PERFCTR_LLC)) - return 0; + return; + + info.split.aux_data = 0; + info.split.num_pmcs = NUM_COUNTERS_L2; + info.split.cid = get_llc_id(cpu); + + if (boot_cpu_data.x86 >= 0x17) + info.split.num_pmcs = NUM_COUNTERS_L3; + + *per_cpu_ptr(uncore->info, cpu) = info; +} + +static +int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) +{ + struct attribute **l3_attr = amd_uncore_l3_format_attr; + struct amd_uncore_pmu *pmu; + + /* Run just once */ + if (uncore->init_done) + return amd_uncore_ctx_init(uncore, cpu); + + /* No grouping, single instance for a system */ + uncore->pmus = kzalloc(sizeof(*uncore->pmus), GFP_KERNEL); + if (!uncore->pmus) { + uncore->num_pmus = 0; + goto done; + } /* * For Family 17h and above, L3 cache counters are available instead * of L2 cache counters. The PMUs are exported based on family as * either L2 or L3. */ + pmu = &uncore->pmus[0]; strscpy(pmu->name, boot_cpu_data.x86 >= 0x17 ? "amd_l3" : "amd_l2", sizeof(pmu->name)); - - pmu->num_counters = NUM_COUNTERS_L2; + pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_LLC; - pmu->id = amd_uncore_l3_id; if (boot_cpu_data.x86 >= 0x17) { *l3_attr++ = &format_attr_event8.attr; @@ -688,12 +765,11 @@ static int amd_uncore_l3_init(void) *l3_attr++ = boot_cpu_data.x86 >= 0x19 ? &format_attr_threadmask2.attr : &format_attr_threadmask8.attr; - pmu->num_counters = NUM_COUNTERS_L3; } pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); if (!pmu->ctx) - return -ENOMEM; + goto done; pmu->pmu = (struct pmu) { .task_ctx_nr = perf_invalid_context, @@ -710,43 +786,45 @@ static int amd_uncore_l3_init(void) .module = THIS_MODULE, }; - ret = perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1); - if (ret) { + if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { free_percpu(pmu->ctx); pmu->ctx = NULL; - return ret; + goto done; } - pr_info("%d %s %s counters detected\n", pmu->num_counters, - boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON" : "", + pr_info("%d %s%s counters detected\n", pmu->num_counters, + boot_cpu_data.x86_vendor == X86_VENDOR_HYGON ? "HYGON " : "", pmu->pmu.name); - num_pmus++; - - return 0; -} - -static void uncore_free(void) -{ - struct amd_uncore_pmu *pmu; - int i; + uncore->num_pmus = 1; - for (i = 0; i < num_pmus; i++) { - pmu = &pmus[i]; - if (!pmu->ctx) - continue; - - perf_pmu_unregister(&pmu->pmu); - free_percpu(pmu->ctx); - pmu->ctx = NULL; - } +done: + uncore->init_done = true; - num_pmus = 0; + return amd_uncore_ctx_init(uncore, cpu); } +static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { + /* UNCORE_TYPE_DF */ + { + .scan = amd_uncore_df_ctx_scan, + .init = amd_uncore_df_ctx_init, + .move = amd_uncore_ctx_move, + .free = amd_uncore_ctx_free, + }, + /* UNCORE_TYPE_L3 */ + { + .scan = amd_uncore_l3_ctx_scan, + .init = amd_uncore_l3_ctx_init, + .move = amd_uncore_ctx_move, + .free = amd_uncore_ctx_free, + }, +}; + static int __init amd_uncore_init(void) { - int ret; + struct amd_uncore *uncore; + int ret, i; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) @@ -758,20 +836,27 @@ static int __init amd_uncore_init(void) if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) pmu_version = 2; - ret = amd_uncore_df_init(); - if (ret) - goto fail; + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; - ret = amd_uncore_l3_init(); - if (ret) - goto fail; + BUG_ON(!uncore->scan); + BUG_ON(!uncore->init); + BUG_ON(!uncore->move); + BUG_ON(!uncore->free); + + uncore->info = alloc_percpu(union amd_uncore_info); + if (!uncore->info) { + ret = -ENOMEM; + goto fail; + } + }; /* * Install callbacks. Core will call them for each online cpu. */ if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, "perf/x86/amd/uncore:prepare", - amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) + NULL, amd_uncore_cpu_dead)) goto fail; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, @@ -790,17 +875,48 @@ fail_start: fail_prep: cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); fail: - uncore_free(); + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + if (uncore->info) { + free_percpu(uncore->info); + uncore->info = NULL; + } + } return ret; } static void __exit amd_uncore_exit(void) { + struct amd_uncore *uncore; + struct amd_uncore_pmu *pmu; + int i, j; + cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE); cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); - uncore_free(); + + for (i = 0; i < UNCORE_TYPE_MAX; i++) { + uncore = &uncores[i]; + if (!uncore->info) + continue; + + free_percpu(uncore->info); + uncore->info = NULL; + + for (j = 0; j < uncore->num_pmus; j++) { + pmu = &uncore->pmus[j]; + if (!pmu->ctx) + continue; + + perf_pmu_unregister(&pmu->pmu); + free_percpu(pmu->ctx); + pmu->ctx = NULL; + } + + kfree(uncore->pmus); + uncore->pmus = NULL; + } } module_init(amd_uncore_init); -- cgit v1.2.3 From 7ef0343855dc23a979a53b3143540f93f3e5bef8 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:13 +0530 Subject: perf/x86/amd/uncore: Use rdmsr if rdpmc is unavailable Not all uncore PMUs may support the use of the RDPMC instruction for reading counters. In such cases, read the count from the corresponding PERF_CTR register using the RDMSR instruction. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/e9d994e32a3fcb39fa59fcf43ab4260d11aba097.1696425185.git.sandipan.das@amd.com --- arch/x86/events/amd/uncore.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index ff1d09cc07ad..2fe623923034 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -96,7 +96,16 @@ static void amd_uncore_read(struct perf_event *event) */ prev = local64_read(&hwc->prev_count); - rdpmcl(hwc->event_base_rdpmc, new); + + /* + * Some uncore PMUs do not have RDPMC assignments. In such cases, + * read counts directly from the corresponding PERF_CTR. + */ + if (hwc->event_base_rdpmc < 0) + rdmsrl(hwc->event_base, new); + else + rdpmcl(hwc->event_base_rdpmc, new); + local64_set(&hwc->prev_count, new); delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); delta >>= COUNTER_SHIFT; @@ -164,6 +173,9 @@ out: hwc->event_base_rdpmc = pmu->rdpmc_base + hwc->idx; hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + if (pmu->rdpmc_base < 0) + hwc->event_base_rdpmc = -1; + if (flags & PERF_EF_START) event->pmu->start(event, PERF_EF_RELOAD); -- cgit v1.2.3 From 83a43c622123e714b0317a57176b336187f5deb3 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:14 +0530 Subject: perf/x86/amd/uncore: Add group exclusivity In some cases, it may be necessary to restrict opening PMU events to a subset of CPUs. E.g. Unified Memory Controller (UMC) PMUs are specific to each active memory channel and the MSR address space for the PERF_CTL and PERF_CTR registers is reused on each socket. Thus, opening events for a specific UMC PMU should be restricted to CPUs belonging to the same socket as that of the UMC. The "cpumask" of the PMU should also reflect this accordingly. Uncore PMUs which require this can use the new group attribute in struct amd_uncore_pmu to set a valid group ID during the scan() phase. Later, during init(), an uncore context for a CPU will be unavailable if the group ID does not match. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/937d6d71010a48ea4e069f4904b3116a5f99ecdf.1696425185.git.sandipan.das@amd.com --- arch/x86/events/amd/uncore.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 2fe623923034..318982951dd2 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -27,6 +27,7 @@ #define COUNTER_SHIFT 16 #define UNCORE_NAME_LEN 16 +#define UNCORE_GROUP_MAX 256 #undef pr_fmt #define pr_fmt(fmt) "amd_uncore: " fmt @@ -45,6 +46,7 @@ struct amd_uncore_pmu { int num_counters; int rdpmc_base; u32 msr_base; + int group; cpumask_t active_mask; struct pmu pmu; struct amd_uncore_ctx * __percpu *ctx; @@ -61,6 +63,7 @@ union amd_uncore_info { struct { u64 aux_data:32; /* auxiliary data */ u64 num_pmcs:8; /* number of counters */ + u64 gid:8; /* group id */ u64 cid:8; /* context id */ } split; u64 full; @@ -371,6 +374,13 @@ int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) return info->split.cid; } +static __always_inline +int amd_uncore_ctx_gid(struct amd_uncore *uncore, unsigned int cpu) +{ + union amd_uncore_info *info = per_cpu_ptr(uncore->info, cpu); + return info->split.gid; +} + static __always_inline int amd_uncore_ctx_num_pmcs(struct amd_uncore *uncore, unsigned int cpu) { @@ -409,18 +419,23 @@ static int amd_uncore_ctx_init(struct amd_uncore *uncore, unsigned int cpu) { struct amd_uncore_ctx *curr, *prev; struct amd_uncore_pmu *pmu; - int node, cid, i, j; + int node, cid, gid, i, j; if (!uncore->init_done || !uncore->num_pmus) return 0; cid = amd_uncore_ctx_cid(uncore, cpu); + gid = amd_uncore_ctx_gid(uncore, cpu); for (i = 0; i < uncore->num_pmus; i++) { pmu = &uncore->pmus[i]; *per_cpu_ptr(pmu->ctx, cpu) = NULL; curr = NULL; + /* Check for group exclusivity */ + if (gid != pmu->group) + continue; + /* Find a sibling context */ for_each_online_cpu(j) { if (cpu == j) @@ -603,6 +618,7 @@ void amd_uncore_df_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_NB; + info.split.gid = 0; info.split.cid = topology_die_id(cpu); if (pmu_version >= 2) { @@ -641,6 +657,7 @@ int amd_uncore_df_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F15H_NB_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_NB; + pmu->group = amd_uncore_ctx_gid(uncore, cpu); if (pmu_version >= 2) { *df_attr++ = &format_attr_event14v2.attr; @@ -734,6 +751,7 @@ void amd_uncore_l3_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) info.split.aux_data = 0; info.split.num_pmcs = NUM_COUNTERS_L2; + info.split.gid = 0; info.split.cid = get_llc_id(cpu); if (boot_cpu_data.x86 >= 0x17) @@ -770,6 +788,7 @@ int amd_uncore_l3_ctx_init(struct amd_uncore *uncore, unsigned int cpu) pmu->num_counters = amd_uncore_ctx_num_pmcs(uncore, cpu); pmu->msr_base = MSR_F16H_L2I_PERF_CTL; pmu->rdpmc_base = RDPMC_BASE_LLC; + pmu->group = amd_uncore_ctx_gid(uncore, cpu); if (boot_cpu_data.x86 >= 0x17) { *l3_attr++ = &format_attr_event8.attr; -- cgit v1.2.3 From 25e56847821f7375bdee7dae1027c7917d07ce4b Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 5 Oct 2023 10:53:15 +0530 Subject: perf/x86/amd/uncore: Add memory controller support Unified Memory Controller (UMC) events were introduced with Zen 4 as a part of the Performance Monitoring Version 2 (PerfMonV2) enhancements. An event is specified using the EventSelect bits and the RdWrMask bits can be used for additional filtering of read and write requests. As of now, a maximum of 12 channels of DDR5 are available on each socket and each channel is controlled by a dedicated UMC. Each UMC, in turn, has its own set of performance monitoring counters. Since the MSR address space for the UMC PERF_CTL and PERF_CTR registers are reused across sockets, uncore groups are created on the basis of socket IDs. Hence, group exclusivity is mandatory while opening events so that events for an UMC can only be opened on CPUs which are on the same socket as the corresponding memory channel. For each socket, the total number of available UMC counters and active memory channels are determined from CPUID leaf 0x80000022 EBX and ECX respectively. Usually, on Zen 4, each UMC has four counters. MSR assignments are determined on the basis of active UMCs. E.g. if UMCs 1, 4 and 9 are active for a given socket, then * UMC 1 gets MSRs 0xc0010800 to 0xc0010807 as PERF_CTLs and PERF_CTRs * UMC 4 gets MSRs 0xc0010808 to 0xc001080f as PERF_CTLs and PERF_CTRs * UMC 9 gets MSRs 0xc0010810 to 0xc0010817 as PERF_CTLs and PERF_CTRs If there are sockets without any online CPUs when the amd_uncore driver is loaded, UMCs for such sockets will not be discoverable since the mechanism relies on executing the CPUID instruction on an online CPU from the socket. Signed-off-by: Sandipan Das Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/b25f391205c22733493abec1ed850b71784edc5f.1696425185.git.sandipan.das@amd.com --- arch/x86/events/amd/uncore.c | 156 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 318982951dd2..9b444ce24108 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -55,6 +55,7 @@ struct amd_uncore_pmu { enum { UNCORE_TYPE_DF, UNCORE_TYPE_L3, + UNCORE_TYPE_UMC, UNCORE_TYPE_MAX }; @@ -286,7 +287,7 @@ static struct device_attribute format_attr_##_var = \ DEFINE_UNCORE_FORMAT_ATTR(event12, event, "config:0-7,32-35"); DEFINE_UNCORE_FORMAT_ATTR(event14, event, "config:0-7,32-35,59-60"); /* F17h+ DF */ DEFINE_UNCORE_FORMAT_ATTR(event14v2, event, "config:0-7,32-37"); /* PerfMonV2 DF */ -DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3 */ +DEFINE_UNCORE_FORMAT_ATTR(event8, event, "config:0-7"); /* F17h+ L3, PerfMonV2 UMC */ DEFINE_UNCORE_FORMAT_ATTR(umask8, umask, "config:8-15"); DEFINE_UNCORE_FORMAT_ATTR(umask12, umask, "config:8-15,24-27"); /* PerfMonV2 DF */ DEFINE_UNCORE_FORMAT_ATTR(coreid, coreid, "config:42-44"); /* F19h L3 */ @@ -296,6 +297,7 @@ DEFINE_UNCORE_FORMAT_ATTR(threadmask2, threadmask, "config:56-57"); /* F19h L DEFINE_UNCORE_FORMAT_ATTR(enallslices, enallslices, "config:46"); /* F19h L3 */ DEFINE_UNCORE_FORMAT_ATTR(enallcores, enallcores, "config:47"); /* F19h L3 */ DEFINE_UNCORE_FORMAT_ATTR(sliceid, sliceid, "config:48-50"); /* F19h L3 */ +DEFINE_UNCORE_FORMAT_ATTR(rdwrmask, rdwrmask, "config:8-9"); /* PerfMonV2 UMC */ /* Common DF and NB attributes */ static struct attribute *amd_uncore_df_format_attr[] = { @@ -312,6 +314,13 @@ static struct attribute *amd_uncore_l3_format_attr[] = { NULL, }; +/* Common UMC attributes */ +static struct attribute *amd_uncore_umc_format_attr[] = { + &format_attr_event8.attr, /* event */ + &format_attr_rdwrmask.attr, /* rdwrmask */ + NULL, +}; + /* F17h unique L3 attributes */ static struct attribute *amd_f17h_uncore_l3_format_attr[] = { &format_attr_slicemask.attr, /* slicemask */ @@ -349,6 +358,11 @@ static struct attribute_group amd_f19h_uncore_l3_format_group = { .is_visible = amd_f19h_uncore_is_visible, }; +static struct attribute_group amd_uncore_umc_format_group = { + .name = "format", + .attrs = amd_uncore_umc_format_attr, +}; + static const struct attribute_group *amd_uncore_df_attr_groups[] = { &amd_uncore_attr_group, &amd_uncore_df_format_group, @@ -367,6 +381,12 @@ static const struct attribute_group *amd_uncore_l3_attr_update[] = { NULL, }; +static const struct attribute_group *amd_uncore_umc_attr_groups[] = { + &amd_uncore_attr_group, + &amd_uncore_umc_format_group, + NULL, +}; + static __always_inline int amd_uncore_ctx_cid(struct amd_uncore *uncore, unsigned int cpu) { @@ -835,6 +855,133 @@ done: return amd_uncore_ctx_init(uncore, cpu); } +static int amd_uncore_umc_event_init(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int ret = amd_uncore_event_init(event); + + if (ret) + return ret; + + hwc->config = event->attr.config & AMD64_PERFMON_V2_RAW_EVENT_MASK_UMC; + + return 0; +} + +static void amd_uncore_umc_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + + hwc->state = 0; + wrmsrl(hwc->config_base, (hwc->config | AMD64_PERFMON_V2_ENABLE_UMC)); + perf_event_update_userpage(event); +} + +static +void amd_uncore_umc_ctx_scan(struct amd_uncore *uncore, unsigned int cpu) +{ + union cpuid_0x80000022_ebx ebx; + union amd_uncore_info info; + unsigned int eax, ecx, edx; + + if (pmu_version < 2) + return; + + cpuid(EXT_PERFMON_DEBUG_FEATURES, &eax, &ebx.full, &ecx, &edx); + info.split.aux_data = ecx; /* stash active mask */ + info.split.num_pmcs = ebx.split.num_umc_pmc; + info.split.gid = topology_die_id(cpu); + info.split.cid = topology_die_id(cpu); + *per_cpu_ptr(uncore->info, cpu) = info; +} + +static +int amd_uncore_umc_ctx_init(struct amd_uncore *uncore, unsigned int cpu) +{ + DECLARE_BITMAP(gmask, UNCORE_GROUP_MAX) = { 0 }; + u8 group_num_pmus[UNCORE_GROUP_MAX] = { 0 }; + u8 group_num_pmcs[UNCORE_GROUP_MAX] = { 0 }; + union amd_uncore_info info; + struct amd_uncore_pmu *pmu; + int index = 0, gid, i; + + if (pmu_version < 2) + return 0; + + /* Run just once */ + if (uncore->init_done) + return amd_uncore_ctx_init(uncore, cpu); + + /* Find unique groups */ + for_each_online_cpu(i) { + info = *per_cpu_ptr(uncore->info, i); + gid = info.split.gid; + if (test_bit(gid, gmask)) + continue; + + __set_bit(gid, gmask); + group_num_pmus[gid] = hweight32(info.split.aux_data); + group_num_pmcs[gid] = info.split.num_pmcs; + uncore->num_pmus += group_num_pmus[gid]; + } + + uncore->pmus = kzalloc(sizeof(*uncore->pmus) * uncore->num_pmus, + GFP_KERNEL); + if (!uncore->pmus) { + uncore->num_pmus = 0; + goto done; + } + + for_each_set_bit(gid, gmask, UNCORE_GROUP_MAX) { + for (i = 0; i < group_num_pmus[gid]; i++) { + pmu = &uncore->pmus[index]; + snprintf(pmu->name, sizeof(pmu->name), "amd_umc_%d", index); + pmu->num_counters = group_num_pmcs[gid] / group_num_pmus[gid]; + pmu->msr_base = MSR_F19H_UMC_PERF_CTL + i * pmu->num_counters * 2; + pmu->rdpmc_base = -1; + pmu->group = gid; + + pmu->ctx = alloc_percpu(struct amd_uncore_ctx *); + if (!pmu->ctx) + goto done; + + pmu->pmu = (struct pmu) { + .task_ctx_nr = perf_invalid_context, + .attr_groups = amd_uncore_umc_attr_groups, + .name = pmu->name, + .event_init = amd_uncore_umc_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_umc_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, + .module = THIS_MODULE, + }; + + if (perf_pmu_register(&pmu->pmu, pmu->pmu.name, -1)) { + free_percpu(pmu->ctx); + pmu->ctx = NULL; + goto done; + } + + pr_info("%d %s counters detected\n", pmu->num_counters, + pmu->pmu.name); + + index++; + } + } + +done: + uncore->num_pmus = index; + uncore->init_done = true; + + return amd_uncore_ctx_init(uncore, cpu); +} + static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { /* UNCORE_TYPE_DF */ { @@ -850,6 +997,13 @@ static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { .move = amd_uncore_ctx_move, .free = amd_uncore_ctx_free, }, + /* UNCORE_TYPE_UMC */ + { + .scan = amd_uncore_umc_ctx_scan, + .init = amd_uncore_umc_ctx_init, + .move = amd_uncore_ctx_move, + .free = amd_uncore_ctx_free, + }, }; static int __init amd_uncore_init(void) -- cgit v1.2.3 From 7543365739a4ff61d40ad53ab68c17d2e7dfb0c9 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Oct 2023 10:18:12 +0300 Subject: perf/x86/amd/uncore: Fix uninitialized return value in amd_uncore_init() Some of the error paths in this function return don't initialize the error code. Return -ENODEV by default. Fixes: d6389d3ccc13 ("perf/x86/amd/uncore: Refactor uncore management") Signed-off-by: Dan Carpenter Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/cec62eba-c4b8-4cb7-9671-58894dd4b974@moroto.mountain --- arch/x86/events/amd/uncore.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index 9b444ce24108..a389828f378c 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -1009,7 +1009,8 @@ static struct amd_uncore uncores[UNCORE_TYPE_MAX] = { static int __init amd_uncore_init(void) { struct amd_uncore *uncore; - int ret, i; + int ret = -ENODEV; + int i; if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) -- cgit v1.2.3 From 744940f1921c8feb90e3c4bcc1e153fdd6e10fe2 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Mon, 16 Oct 2023 11:37:43 +0530 Subject: perf/x86/amd/uncore: Pass through error code for initialization failures, instead of -ENODEV Pass through the appropriate error code when the registration of hotplug callbacks fail during initialization, instead of returning a blanket -ENODEV. [ mingo: Updated the changelog. ] Reported-by: Dan Carpenter Signed-off-by: Sandipan Das Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231016060743.332051-1-sandipan.das@amd.com --- arch/x86/events/amd/uncore.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch/x86/events') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a389828f378c..4429eac00185 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -1040,20 +1040,25 @@ static int __init amd_uncore_init(void) /* * Install callbacks. Core will call them for each online cpu. */ - if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, - "perf/x86/amd/uncore:prepare", - NULL, amd_uncore_cpu_dead)) + ret = cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, + "perf/x86/amd/uncore:prepare", + NULL, amd_uncore_cpu_dead); + if (ret) goto fail; - if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, - "perf/x86/amd/uncore:starting", - amd_uncore_cpu_starting, NULL)) + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, + "perf/x86/amd/uncore:starting", + amd_uncore_cpu_starting, NULL); + if (ret) goto fail_prep; - if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, - "perf/x86/amd/uncore:online", - amd_uncore_cpu_online, - amd_uncore_cpu_down_prepare)) + + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_ONLINE, + "perf/x86/amd/uncore:online", + amd_uncore_cpu_online, + amd_uncore_cpu_down_prepare); + if (ret) goto fail_start; + return 0; fail_start: -- cgit v1.2.3