summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/perf/isa207-common.c
diff options
context:
space:
mode:
authorKajol Jain <kjain@linux.ibm.com>2021-02-09 15:22:34 +0530
committerMichael Ellerman <mpe@ellerman.id.au>2021-02-11 23:35:36 +1100
commit82d2c16b350f72aa21ac2a6860c542aa4b43a51e (patch)
tree211576b43194d8c00ed547d6c2a7be3f1bf21740 /arch/powerpc/perf/isa207-common.c
parentb3abe590c80e0ba55b6fce48762232d90dbc37a5 (diff)
downloadlinux-stable-82d2c16b350f72aa21ac2a6860c542aa4b43a51e.tar.gz
linux-stable-82d2c16b350f72aa21ac2a6860c542aa4b43a51e.tar.bz2
linux-stable-82d2c16b350f72aa21ac2a6860c542aa4b43a51e.zip
powerpc/perf: Adds support for programming of Thresholding in P10
Thresholding, a performance monitoring unit feature, can be used to identify marked instructions which take more than expected cycles between start event and end event. Threshold compare (thresh_cmp) bits are programmed in MMCRA register. In Power9, thresh_cmp bits were part of the event code. But in case of P10, thresh_cmp are not part of event code due to inclusion of MMCR3 bits. Patch here adds an option to use attr.config1 variable to be used to pass thresh_cmp value to be programmed in MMCRA register. A new ppmu flag called PPMU_HAS_ATTR_CONFIG1 has been added and this flag is used to notify the use of attr.config1 variable. Patch has extended the parameter list of 'compute_mmcr', to include power_pmu's 'flags' element and parameter list of get_constraint to include attr.config1 value. It also extend parameter list of power_check_constraints inorder to pass perf_event list. As stated by commit ef0e3b650f8d ("powerpc/perf: Fix Threshold Event Counter Multiplier width for P10"), constraint bits for thresh_cmp is also needed to be increased to 11 bits, which is handled as part of this patch. We added bit number 53 as part of constraint bits of thresh_cmp for power10 to make it an 11 bit field. Updated layout for p10: /* * Layout of constraint bits: * * 60 56 52 48 44 40 36 32 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ fab_match ] [ thresh_cmp ] [ thresh_ctl ] [ ] * | | * [ thresh_cmp bits for p10] thresh_sel -* * * 28 24 20 16 12 8 4 0 * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | * [ ] | [ ] | [ sample ] [ ] [6] [5] [4] [3] [2] [1] * | | | | | * BHRB IFM -* | | |*radix_scope | Count of events for each PMC. * EBB -* | | p1, p2, p3, p4, p5, p6. * L1 I/D qualifier -* | * nc - number of counters -* * * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints * we want the low bit of each field to be added to any existing value. * * Everything else is a value field. */ Result: command#: cat /sys/devices/cpu/format/thresh_cmp config1:0-17 ex. usage: command#: perf record -I --weight -d -e cpu/event=0x67340101EC,thresh_cmp=500/ ./ebizzy -S 2 -t 1 -s 4096 1826636 records/s real 2.00 s user 2.00 s sys 0.00 s [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.038 MB perf.data (61 samples) ] Signed-off-by: Kajol Jain <kjain@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20210209095234.837356-1-kjain@linux.ibm.com
Diffstat (limited to 'arch/powerpc/perf/isa207-common.c')
-rw-r--r--arch/powerpc/perf/isa207-common.c67
1 files changed, 59 insertions, 8 deletions
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 6ab5b272090a..e4f577da33d8 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -108,12 +108,57 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
*mmcra |= MMCRA_SDAR_MODE_TLB;
}
+static u64 p10_thresh_cmp_val(u64 value)
+{
+ int exp = 0;
+ u64 result = value;
+
+ if (!value)
+ return value;
+
+ /*
+ * Incase of P10, thresh_cmp value is not part of raw event code
+ * and provided via attr.config1 parameter. To program threshold in MMCRA,
+ * take a 18 bit number N and shift right 2 places and increment
+ * the exponent E by 1 until the upper 10 bits of N are zero.
+ * Write E to the threshold exponent and write the lower 8 bits of N
+ * to the threshold mantissa.
+ * The max threshold that can be written is 261120.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+ if (value > 261120)
+ value = 261120;
+ while ((64 - __builtin_clzl(value)) > 8) {
+ exp++;
+ value >>= 2;
+ }
+
+ /*
+ * Note that it is invalid to write a mantissa with the
+ * upper 2 bits of mantissa being zero, unless the
+ * exponent is also zero.
+ */
+ if (!(value & 0xC0) && exp)
+ result = 0;
+ else
+ result = (exp << 8) | value;
+ }
+ return result;
+}
+
static u64 thresh_cmp_val(u64 value)
{
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ value = p10_thresh_cmp_val(value);
+
+ /*
+ * Since location of threshold compare bits in MMCRA
+ * is different for p8, using different shift value.
+ */
if (cpu_has_feature(CPU_FTR_ARCH_300))
return value << p9_MMCRA_THR_CMP_SHIFT;
-
- return value << MMCRA_THR_CMP_SHIFT;
+ else
+ return value << MMCRA_THR_CMP_SHIFT;
}
static unsigned long combine_from_event(u64 event)
@@ -141,13 +186,13 @@ static bool is_thresh_cmp_valid(u64 event)
{
unsigned int cmp, exp;
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ return p10_thresh_cmp_val(event) != 0;
+
/*
* Check the mantissa upper two bits are not zero, unless the
* exponent is also zero. See the THRESH_CMP_MANTISSA doc.
- * Power10: thresh_cmp is replaced by l2_l3 event select.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- return false;
cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
exp = cmp >> 7;
@@ -256,7 +301,7 @@ void isa207_get_mem_weight(u64 *weight)
*weight = mantissa << (2 * exp);
}
-int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
+int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp, u64 event_config1)
{
unsigned int unit, pmc, cache, ebb;
unsigned long mask, value;
@@ -355,9 +400,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp)
}
if (cpu_has_feature(CPU_FTR_ARCH_31)) {
- if (event_is_threshold(event)) {
+ if (event_is_threshold(event) && is_thresh_cmp_valid(event_config1)) {
mask |= CNST_THRESH_CTL_SEL_MASK;
value |= CNST_THRESH_CTL_SEL_VAL(event >> EVENT_THRESH_SHIFT);
+ mask |= p10_CNST_THRESH_CMP_MASK;
+ value |= p10_CNST_THRESH_CMP_VAL(p10_thresh_cmp_val(event_config1));
}
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
@@ -411,7 +458,7 @@ ebb_bhrb:
int isa207_compute_mmcr(u64 event[], int n_ev,
unsigned int hwc[], struct mmcr_regs *mmcr,
- struct perf_event *pevents[])
+ struct perf_event *pevents[], u32 flags)
{
unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
unsigned long mmcr3;
@@ -504,6 +551,10 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
val = (event[i] >> EVENT_THR_CMP_SHIFT) &
EVENT_THR_CMP_MASK;
mmcra |= thresh_cmp_val(val);
+ } else if (flags & PPMU_HAS_ATTR_CONFIG1) {
+ val = (pevents[i]->attr.config1 >> p10_EVENT_THR_CMP_SHIFT) &
+ p10_EVENT_THR_CMP_MASK;
+ mmcra |= thresh_cmp_val(val);
}
}