summaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2016-06-08 09:29:23 +0200
committerIngo Molnar <mingo@kernel.org>2016-06-08 09:29:23 +0200
commitaa3a655b159f11b1afe0dcdac5fb5b172f02b778 (patch)
tree577058fb95c7f2e2aacb3566874e75e17fcec9f8 /tools/perf/util
parent616d1c1b98ac79f30216a57a170dd7cea19b3df3 (diff)
parent7db91f251056f90fec4121f028680ab3153a0f3c (diff)
downloadlinux-stable-aa3a655b159f11b1afe0dcdac5fb5b172f02b778.tar.gz
linux-stable-aa3a655b159f11b1afe0dcdac5fb5b172f02b778.tar.bz2
linux-stable-aa3a655b159f11b1afe0dcdac5fb5b172f02b778.zip
Merge tag 'perf-core-for-mingo-20160606' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Tooling support for TopDown counters, recently added to the kernel (Andi Kleen) - Show call graphs in 'perf script' when 1st event doesn't have it but some other has (He Kuang) - Fix terminal cleanup when handling invalid .perfconfig files in 'perf top' (Taeung Song) Build fixes: - Respect CROSS_COMPILE for the linker in libapi (Lucas Stach) Infrastructure changes: - Fix perf_evlist__alloc_mmap() failure path (Wang Nan) - Provide way to extract integer value from format_field (Arnaldo Carvalho de Melo) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/config.c22
-rw-r--r--tools/perf/util/evlist.c5
-rw-r--r--tools/perf/util/evsel.c25
-rw-r--r--tools/perf/util/evsel.h2
-rw-r--r--tools/perf/util/group.h7
-rw-r--r--tools/perf/util/parse-events.l1
-rw-r--r--tools/perf/util/stat-shadow.c162
-rw-r--r--tools/perf/util/stat.c5
-rw-r--r--tools/perf/util/stat.h5
9 files changed, 213 insertions, 21 deletions
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index dad7d8272168..c73f1c4d1ca9 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -275,7 +275,8 @@ static int perf_parse_file(config_fn_t fn, void *data)
break;
}
}
- die("bad config file line %d in %s", config_linenr, config_file_name);
+ pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+ return -1;
}
static int parse_unit_factor(const char *end, unsigned long *val)
@@ -479,16 +480,15 @@ static int perf_config_global(void)
int perf_config(config_fn_t fn, void *data)
{
- int ret = 0, found = 0;
+ int ret = -1;
const char *home = NULL;
/* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
if (config_exclusive_filename)
return perf_config_from_file(fn, config_exclusive_filename, data);
if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
- ret += perf_config_from_file(fn, perf_etc_perfconfig(),
- data);
- found += 1;
+ if (perf_config_from_file(fn, perf_etc_perfconfig(), data) < 0)
+ goto out;
}
home = getenv("HOME");
@@ -514,14 +514,12 @@ int perf_config(config_fn_t fn, void *data)
if (!st.st_size)
goto out_free;
- ret += perf_config_from_file(fn, user_config, data);
- found += 1;
+ ret = perf_config_from_file(fn, user_config, data);
+
out_free:
free(user_config);
}
out:
- if (found == 0)
- return -1;
return ret;
}
@@ -609,8 +607,12 @@ static int collect_config(const char *var, const char *value,
struct perf_config_section *section = NULL;
struct perf_config_item *item = NULL;
struct perf_config_set *set = perf_config_set;
- struct list_head *sections = &set->sections;
+ struct list_head *sections;
+
+ if (set == NULL)
+ return -1;
+ sections = &set->sections;
key = ptr = strdup(var);
if (!key) {
pr_debug("%s: strdup failed\n", __func__);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index e0f30946ed1a..1b918aa075d6 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -946,9 +946,12 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
if (cpu_map__empty(evlist->cpus))
evlist->nr_mmaps = thread_map__nr(evlist->threads);
evlist->mmap = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+ if (!evlist->mmap)
+ return -ENOMEM;
+
for (i = 0; i < evlist->nr_mmaps; i++)
evlist->mmap[i].fd = -1;
- return evlist->mmap != NULL ? 0 : -ENOMEM;
+ return 0;
}
struct mmap_params {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 18e18f1d435e..9b2e3e624efe 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2251,17 +2251,11 @@ void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
return sample->raw_data + offset;
}
-u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
- const char *name)
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+ bool needs_swap)
{
- struct format_field *field = perf_evsel__field(evsel, name);
- void *ptr;
u64 value;
-
- if (!field)
- return 0;
-
- ptr = sample->raw_data + field->offset;
+ void *ptr = sample->raw_data + field->offset;
switch (field->size) {
case 1:
@@ -2279,7 +2273,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
return 0;
}
- if (!evsel->needs_swap)
+ if (!needs_swap)
return value;
switch (field->size) {
@@ -2296,6 +2290,17 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
return 0;
}
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+ const char *name)
+{
+ struct format_field *field = perf_evsel__field(evsel, name);
+
+ if (!field)
+ return 0;
+
+ return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
char *msg, size_t msgsize)
{
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 028412b32d5a..828ddd1c8947 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -261,6 +261,8 @@ static inline char *perf_evsel__strval(struct perf_evsel *evsel,
struct format_field;
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
#define perf_evsel__match(evsel, t, c) \
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644
index 000000000000..116debe7a995
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,7 @@
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 01af1ee90a27..3c15b33b2e84 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -260,6 +260,7 @@ cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
L1-dcache|l1-d|l1d|L1-data |
L1-icache|l1-i|l1i|L1-instruction |
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index aa9efe08762b..8a2bbd2a4d82 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -36,6 +36,11 @@ static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
static bool have_frontend_stalled;
struct stats walltime_nsecs_stats;
@@ -82,6 +87,11 @@ void perf_stat__reset_shadow_stats(void)
sizeof(runtime_transaction_stats));
memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+ memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots));
+ memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired));
+ memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
+ memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
+ memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
}
/*
@@ -105,6 +115,16 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
else if (perf_stat_evsel__is(counter, ELISION_START))
update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+ update_stats(&runtime_topdown_total_slots[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+ update_stats(&runtime_topdown_slots_issued[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+ update_stats(&runtime_topdown_slots_retired[ctx][cpu], count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+ update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu],count[0]);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+ update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
@@ -302,6 +322,107 @@ static void print_ll_cache_misses(int cpu,
out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
}
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ * TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+ if (x < 0 && x >= -0.02)
+ return 0.0;
+ return x;
+}
+
+static double td_total_slots(int ctx, int cpu)
+{
+ return avg_stats(&runtime_topdown_total_slots[ctx][cpu]);
+}
+
+static double td_bad_spec(int ctx, int cpu)
+{
+ double bad_spec = 0;
+ double total_slots;
+ double total;
+
+ total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) -
+ avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) +
+ avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]);
+ total_slots = td_total_slots(ctx, cpu);
+ if (total_slots)
+ bad_spec = total / total_slots;
+ return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu)
+{
+ double retiring = 0;
+ double total_slots = td_total_slots(ctx, cpu);
+ double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]);
+
+ if (total_slots)
+ retiring = ret_slots / total_slots;
+ return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu)
+{
+ double fe_bound = 0;
+ double total_slots = td_total_slots(ctx, cpu);
+ double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]);
+
+ if (total_slots)
+ fe_bound = fetch_bub / total_slots;
+ return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu)
+{
+ double sum = (td_fe_bound(ctx, cpu) +
+ td_bad_spec(ctx, cpu) +
+ td_retiring(ctx, cpu));
+ if (sum == 0)
+ return 0;
+ return sanitize_val(1.0 - sum);
+}
+
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
double avg, int cpu,
struct perf_stat_output_ctx *out)
@@ -309,6 +430,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
void *ctxp = out->ctx;
print_metric_t print_metric = out->print_metric;
double total, ratio = 0.0, total2;
+ const char *color = NULL;
int ctx = evsel_context(evsel);
if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
@@ -452,6 +574,46 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
avg / ratio);
else
print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+ double fe_bound = td_fe_bound(ctx, cpu);
+
+ if (fe_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+ fe_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+ double retiring = td_retiring(ctx, cpu);
+
+ if (retiring > 0.7)
+ color = PERF_COLOR_GREEN;
+ print_metric(ctxp, color, "%8.1f%%", "retiring",
+ retiring * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+ double bad_spec = td_bad_spec(ctx, cpu);
+
+ if (bad_spec > 0.1)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+ bad_spec * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+ double be_bound = td_be_bound(ctx, cpu);
+ const char *name = "backend bound";
+ static int have_recovery_bubbles = -1;
+
+ /* In case the CPU does not support topdown-recovery-bubbles */
+ if (have_recovery_bubbles < 0)
+ have_recovery_bubbles = pmu_have_event("cpu",
+ "topdown-recovery-bubbles");
+ if (!have_recovery_bubbles)
+ name = "backend bound/bad spec";
+
+ if (be_bound > 0.2)
+ color = PERF_COLOR_RED;
+ if (td_total_slots(ctx, cpu) > 0)
+ print_metric(ctxp, color, "%8.1f%%", name,
+ be_bound * 100.);
+ else
+ print_metric(ctxp, NULL, NULL, name, 0);
} else if (runtime_nsecs_stats[cpu].n != 0) {
char unit = 'M';
char unit_buf[10];
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ffa1d0653861..c1ba255f2abe 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -79,6 +79,11 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
ID(TRANSACTION_START, cpu/tx-start/),
ID(ELISION_START, cpu/el-start/),
ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
+ ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+ ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+ ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+ ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+ ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
};
#undef ID
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0150e786ccc7..c29bb94c48a4 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -17,6 +17,11 @@ enum perf_stat_evsel_id {
PERF_STAT_EVSEL_ID__TRANSACTION_START,
PERF_STAT_EVSEL_ID__ELISION_START,
PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+ PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+ PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
PERF_STAT_EVSEL_ID__MAX,
};