diff options
Diffstat (limited to 'arch/x86/events/amd/core.c')
-rw-r--r-- | arch/x86/events/amd/core.c | 65 |
1 files changed, 51 insertions, 14 deletions
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 985ef3b47919..920e3a640cad 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -432,8 +432,10 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, * be removed on one CPU at a time AND PMU is disabled * when we come here */ - for (i = 0; i < x86_pmu.num_counters; i++) { - if (cmpxchg(nb->owners + i, event, NULL) == event) + for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { + struct perf_event *tmp = event; + + if (try_cmpxchg(nb->owners + i, &tmp, NULL)) break; } } @@ -499,7 +501,7 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev * because of successive calls to x86_schedule_events() from * hw_perf_group_sched_in() without hw_perf_enable() */ - for_each_set_bit(idx, c->idxmsk, x86_pmu.num_counters) { + for_each_set_bit(idx, c->idxmsk, x86_pmu_max_num_counters(NULL)) { if (new == -1 || hwc->idx == idx) /* assign free slot, prefer hwc->idx */ old = cmpxchg(nb->owners + idx, NULL, event); @@ -542,7 +544,7 @@ static struct amd_nb *amd_alloc_nb(int cpu) /* * initialize all possible NB constraints */ - for (i = 0; i < x86_pmu.num_counters; i++) { + for_each_set_bit(i, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { __set_bit(i, nb->event_constraints[i].idxmsk); nb->event_constraints[i].weight = 1; } @@ -647,7 +649,7 @@ static void amd_pmu_cpu_dead(int cpu) } } -static inline void amd_pmu_set_global_ctl(u64 ctl) +static __always_inline void amd_pmu_set_global_ctl(u64 ctl) { wrmsrl(MSR_AMD64_PERF_CNTR_GLOBAL_CTL, ctl); } @@ -735,7 +737,7 @@ static void amd_pmu_check_overflow(void) * counters are always enabled when this function is called and * ARCH_PERFMON_EVENTSEL_INT is always set. */ - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -755,7 +757,7 @@ static void amd_pmu_enable_all(int added) amd_brs_enable_all(); - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { /* only activate events which are marked as active */ if (!test_bit(idx, cpuc->active_mask)) continue; @@ -907,6 +909,37 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) return amd_pmu_adjust_nmi_window(handled); } +/* + * AMD-specific callback invoked through perf_snapshot_branch_stack static + * call, defined in include/linux/perf_event.h. See its definition for API + * details. It's up to caller to provide enough space in *entries* to fit all + * LBR records, otherwise returned result will be truncated to *cnt* entries. + */ +static int amd_pmu_v2_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt) +{ + struct cpu_hw_events *cpuc; + unsigned long flags; + + /* + * The sequence of steps to freeze LBR should be completely inlined + * and contain no branches to minimize contamination of LBR snapshot + */ + local_irq_save(flags); + amd_pmu_core_disable_all(); + __amd_pmu_lbr_disable(); + + cpuc = this_cpu_ptr(&cpu_hw_events); + + amd_pmu_lbr_read(); + cnt = min(cnt, x86_pmu.lbr_nr); + memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt); + + amd_pmu_v2_enable_all(0); + local_irq_restore(flags); + + return cnt; +} + static int amd_pmu_v2_handle_irq(struct pt_regs *regs) { struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events); @@ -947,7 +980,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) /* Clear any reserved bits set by buggy microcode */ status &= amd_pmu_global_cntr_mask; - for (idx = 0; idx < x86_pmu.num_counters; idx++) { + for_each_set_bit(idx, x86_pmu.cntr_mask, X86_PMC_IDX_MAX) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -1282,7 +1315,7 @@ static __initconst const struct x86_pmu amd_pmu = { .addr_offset = amd_pmu_addr_offset, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = AMD64_NUM_COUNTERS, + .cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS - 1, 0), .add = amd_pmu_add_event, .del = amd_pmu_del_event, .cntval_bits = 48, @@ -1381,7 +1414,7 @@ static int __init amd_core_pmu_init(void) */ x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; - x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + x86_pmu.cntr_mask64 = GENMASK_ULL(AMD64_NUM_COUNTERS_CORE - 1, 0); /* Check for Performance Monitoring v2 support */ if (boot_cpu_has(X86_FEATURE_PERFMON_V2)) { @@ -1391,9 +1424,9 @@ static int __init amd_core_pmu_init(void) x86_pmu.version = 2; /* Find the number of available Core PMCs */ - x86_pmu.num_counters = ebx.split.num_core_pmc; + x86_pmu.cntr_mask64 = GENMASK_ULL(ebx.split.num_core_pmc - 1, 0); - amd_pmu_global_cntr_mask = (1ULL << x86_pmu.num_counters) - 1; + amd_pmu_global_cntr_mask = x86_pmu.cntr_mask64; /* Update PMC handling functions */ x86_pmu.enable_all = amd_pmu_v2_enable_all; @@ -1421,12 +1454,12 @@ static int __init amd_core_pmu_init(void) * even numbered counter that has a consecutive adjacent odd * numbered counter following it. */ - for (i = 0; i < x86_pmu.num_counters - 1; i += 2) + for (i = 0; i < x86_pmu_max_num_counters(NULL) - 1; i += 2) even_ctr_mask |= BIT_ULL(i); pair_constraint = (struct event_constraint) __EVENT_CONSTRAINT(0, even_ctr_mask, 0, - x86_pmu.num_counters / 2, 0, + x86_pmu_max_num_counters(NULL) / 2, 0, PERF_X86_EVENT_PAIR); x86_pmu.get_event_constraints = amd_get_event_constraints_f17h; @@ -1443,6 +1476,10 @@ static int __init amd_core_pmu_init(void) static_call_update(amd_pmu_branch_reset, amd_pmu_lbr_reset); static_call_update(amd_pmu_branch_add, amd_pmu_lbr_add); static_call_update(amd_pmu_branch_del, amd_pmu_lbr_del); + + /* Only support branch_stack snapshot on perfmon v2 */ + if (x86_pmu.handle_irq == amd_pmu_v2_handle_irq) + static_call_update(perf_snapshot_branch_stack, amd_pmu_v2_snapshot_branch_stack); } else if (!amd_brs_init()) { /* * BRS requires special event constraints and flushing on ctxsw. |