diff options
Diffstat (limited to 'tools/perf/builtin-stat.c')
-rw-r--r-- | tools/perf/builtin-stat.c | 254 |
1 files changed, 182 insertions, 72 deletions
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 265b05157972..9f3e4b257516 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -93,6 +93,7 @@ #include <linux/ctype.h> #include <perf/evlist.h> +#include <internal/threadmap.h> #define DEFAULT_SEPARATOR " " #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" @@ -173,14 +174,13 @@ static struct target target = { #define METRIC_ONLY_LEN 20 -static volatile pid_t child_pid = -1; +static volatile sig_atomic_t child_pid = -1; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; static bool smi_cost = false; static bool smi_reset = false; static int big_num_opt = -1; -static bool group = false; static const char *pre_cmd = NULL; static const char *post_cmd = NULL; static bool sync_run = false; @@ -208,7 +208,7 @@ struct perf_stat { static struct perf_stat perf_stat; #define STAT_RECORD perf_stat.record -static volatile int done = 0; +static volatile sig_atomic_t done = 0; static struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, @@ -266,7 +266,7 @@ static void evlist__check_cpu_maps(struct evlist *evlist) evsel__group_desc(leader, buf, sizeof(buf)); pr_warning(" %s\n", buf); - if (verbose) { + if (verbose > 0) { cpu_map__snprint(leader->core.cpus, buf, sizeof(buf)); pr_warning(" %s: %s\n", leader->name, buf); cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf)); @@ -465,15 +465,19 @@ static int read_bpf_map_counters(void) return 0; } -static void read_counters(struct timespec *rs) +static int read_counters(struct timespec *rs) { - struct evsel *counter; - if (!stat_config.stop_read_counter) { if (read_bpf_map_counters() || read_affinity_counters(rs)) - return; + return -1; } + return 0; +} + +static void process_counters(void) +{ + struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -482,6 +486,10 @@ static void read_counters(struct timespec *rs) pr_warning("failed to process counter %s\n", counter->name); counter->err = 0; } + + perf_stat_merge_counters(&stat_config, evsel_list); + perf_stat_process_percore(&stat_config, evsel_list); + perf_stat_process_shadow_stats(&stat_config, evsel_list); } static void process_interval(void) @@ -492,7 +500,10 @@ static void process_interval(void) diff_timespec(&rs, &ts, &ref_time); perf_stat__reset_shadow_per_stat(&rt_stat); - read_counters(&rs); + evlist__reset_aggr_stats(evsel_list); + + if (read_counters(&rs) == 0) + process_counters(); if (STAT_RECORD) { if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) @@ -528,26 +539,14 @@ static int enable_counters(void) return err; } - if (stat_config.initial_delay < 0) { - pr_info(EVLIST_DISABLED_MSG); - return 0; - } - - if (stat_config.initial_delay > 0) { - pr_info(EVLIST_DISABLED_MSG); - usleep(stat_config.initial_delay * USEC_PER_MSEC); - } - /* * We need to enable counters only if: * - we don't have tracee (attaching to task or cpu) * - we have initial delay configured */ - if (!target__none(&target) || stat_config.initial_delay) { + if (!target__none(&target)) { if (!all_counters_use_bpf) evlist__enable(evsel_list); - if (stat_config.initial_delay > 0) - pr_info(EVLIST_ENABLED_MSG); } return 0; } @@ -569,7 +568,7 @@ static void disable_counters(void) } } -static volatile int workload_exec_errno; +static volatile sig_atomic_t workload_exec_errno; /* * evlist__prepare_workload will send a SIGUSR1 @@ -769,9 +768,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) child_pid = evsel_list->workload.pid; } - if (group) - evlist__set_leader(evsel_list); - if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) { if (affinity__setup(&saved_affinity) < 0) return -1; @@ -918,14 +914,27 @@ try_again_reset: return err; } - err = enable_counters(); - if (err) - return -1; + if (stat_config.initial_delay) { + pr_info(EVLIST_DISABLED_MSG); + } else { + err = enable_counters(); + if (err) + return -1; + } /* Exec the command, if any */ if (forks) evlist__start_workload(evsel_list); + if (stat_config.initial_delay > 0) { + usleep(stat_config.initial_delay * USEC_PER_MSEC); + err = enable_counters(); + if (err) + return -1; + + pr_info(EVLIST_ENABLED_MSG); + } + t0 = rdclock(); clock_gettime(CLOCK_MONOTONIC, &ref_time); @@ -963,11 +972,9 @@ try_again_reset: init_stats(&walltime_nsecs_stats); update_stats(&walltime_nsecs_stats, t1 - t0); - if (stat_config.aggr_mode == AGGR_GLOBAL) - evlist__save_aggr_prev_raw_counts(evsel_list); - evlist__copy_prev_raw_counts(evsel_list); evlist__reset_prev_raw_counts(evsel_list); + evlist__reset_aggr_stats(evsel_list); perf_stat__reset_shadow_per_stat(&rt_stat); } else { update_stats(&walltime_nsecs_stats, t1 - t0); @@ -980,7 +987,8 @@ try_again_reset: * avoid arbitrary skew, we must read all counters before closing any * group leaders. */ - read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); + if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0) + process_counters(); /* * We need to keep evsel_list alive, because it's processed @@ -1023,13 +1031,13 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; - if (stat_config.quiet) + if (quiet) return; evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); } -static volatile int signr = -1; +static volatile sig_atomic_t signr = -1; static void skip_signal(int signo) { @@ -1181,8 +1189,6 @@ static struct option stat_options[] = { #endif OPT_BOOLEAN('a', "all-cpus", &target.system_wide, "system-wide collection from all CPUs"), - OPT_BOOLEAN('g', "group", &group, - "put the counters into a counter group"), OPT_BOOLEAN(0, "scale", &stat_config.scale, "Use --no-scale to disable counter scaling for multiplexing"), OPT_INCR('v', "verbose", &verbose, @@ -1273,8 +1279,8 @@ static struct option stat_options[] = { "print summary for interval mode"), OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, "don't print 'summary' for CSV summary output"), - OPT_BOOLEAN(0, "quiet", &stat_config.quiet, - "don't print output (useful with record)"), + OPT_BOOLEAN(0, "quiet", &quiet, + "don't print any output, messages or warnings (useful with record)"), OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", "Only enable events on applying cpu with this type " "for hybrid platform (e.g. core or atom)", @@ -1330,10 +1336,26 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __ return aggr_cpu_id__node(cpu, /*data=*/NULL); } +static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__global(cpu, /*data=*/NULL); +} + +static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__cpu(cpu, /*data=*/NULL); +} + static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + struct aggr_cpu_id id; + + /* per-process mode - should use global aggr mode */ + if (cpu.cpu == -1) + return get_id(config, cpu); if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); @@ -1366,16 +1388,16 @@ static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *co return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } -static bool term_percore_set(void) +static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config, + struct perf_cpu cpu) { - struct evsel *counter; - - evlist__for_each_entry(evsel_list, counter) { - if (counter->percore) - return true; - } + return perf_stat__get_aggr(config, perf_stat__get_global, cpu); +} - return false; +static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config, + struct perf_cpu cpu) +{ + return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu); } static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) @@ -1390,11 +1412,9 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) case AGGR_NODE: return aggr_cpu_id__node; case AGGR_NONE: - if (term_percore_set()) - return aggr_cpu_id__core; - - return NULL; + return aggr_cpu_id__cpu; case AGGR_GLOBAL: + return aggr_cpu_id__global; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1415,11 +1435,9 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) case AGGR_NODE: return perf_stat__get_node_cached; case AGGR_NONE: - if (term_percore_set()) { - return perf_stat__get_core_cached; - } - return NULL; + return perf_stat__get_cpu_cached; case AGGR_GLOBAL: + return perf_stat__get_global_cached; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1434,8 +1452,9 @@ static int perf_stat_init_aggr_mode(void) aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); if (get_id) { + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, - get_id, /*data=*/NULL); + get_id, /*data=*/NULL, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; @@ -1443,6 +1462,21 @@ static int perf_stat_init_aggr_mode(void) stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); } + if (stat_config.aggr_mode == AGGR_THREAD) { + nr = perf_thread_map__nr(evsel_list->core.threads); + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } + /* * The evsel_list->cpus is the base we operate on, * taking the highest cpu number to be the size of @@ -1527,6 +1561,26 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data) +{ + struct perf_env *env = data; + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + if (cpu.cpu != -1) { + /* + * core_id is relative to socket and die, + * we need a global id. So we set + * socket, die id and core id + */ + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; + id.core = env->cpu[cpu.cpu].core_id; + id.cpu = cpu; + } + + return id; +} + static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -1535,6 +1589,16 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused, + void *data __maybe_unused) +{ + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + /* it always aggregates to the cpu 0 */ + id.cpu = (struct perf_cpu){ .cpu = 0 }; + return id; +} + static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { @@ -1552,12 +1616,24 @@ static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *conf return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) { switch (aggr_mode) { @@ -1569,8 +1645,10 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) return perf_env__get_core_aggr_by_cpu; case AGGR_NODE: return perf_env__get_node_aggr_by_cpu; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_env__get_global_aggr_by_cpu; + case AGGR_NONE: + return perf_env__get_cpu_aggr_by_cpu; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1590,8 +1668,10 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) return perf_stat__get_core_file; case AGGR_NODE: return perf_stat__get_node_file; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_stat__get_global_file; + case AGGR_NONE: + return perf_stat__get_cpu_file; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1604,11 +1684,29 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) { struct perf_env *env = &st->session->header.env; aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; + + if (stat_config.aggr_mode == AGGR_THREAD) { + int nr = perf_thread_map__nr(evsel_list->core.threads); + + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } if (!get_id) return 0; - stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env); + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, + get_id, env, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; @@ -1991,13 +2089,11 @@ static int process_stat_round_event(struct perf_session *session, union perf_event *event) { struct perf_record_stat_round *stat_round = &event->stat_round; - struct evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; int argc = session->header.env.nr_cmdline; - evlist__for_each_entry(evsel_list, counter) - perf_stat_process_counter(&stat_config, counter); + process_counters(); if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) update_stats(&walltime_nsecs_stats, stat_round->time); @@ -2024,17 +2120,23 @@ int process_stat_config_event(struct perf_session *session, if (perf_cpu_map__empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); - return 0; - } - - if (st->aggr_mode != AGGR_UNSET) + } else if (st->aggr_mode != AGGR_UNSET) { stat_config.aggr_mode = st->aggr_mode; + } if (perf_stat.data.is_pipe) perf_stat_init_aggr_mode(); else perf_stat_init_aggr_mode_file(st); + if (stat_config.aggr_map) { + int nr_aggr = stat_config.aggr_map->nr; + + if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) { + pr_err("cannot allocate aggr counts\n"); + return -1; + } + } return 0; } @@ -2048,7 +2150,7 @@ static int set_maps(struct perf_stat *st) perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); - if (evlist__alloc_stats(evsel_list, true)) + if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true)) return -ENOMEM; st->maps_allocated = true; @@ -2277,7 +2379,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (!output && !stat_config.quiet) { + if (!output && !quiet) { struct timespec tm; mode = append_file ? "a" : "w"; @@ -2297,6 +2399,14 @@ int cmd_stat(int argc, const char **argv) } } + if (stat_config.interval_clear && !isatty(fileno(output))) { + fprintf(stderr, "--interval-clear does not work with output\n"); + parse_options_usage(stat_usage, stat_options, "o", 1); + parse_options_usage(NULL, stat_options, "log-fd", 0); + parse_options_usage(NULL, stat_options, "interval-clear", 0); + return -1; + } + stat_config.output = output; /* @@ -2383,7 +2493,7 @@ int cmd_stat(int argc, const char **argv) if (iostat_mode == IOSTAT_LIST) { iostat_list(evsel_list, &stat_config); goto out; - } else if (verbose) + } else if (verbose > 0) iostat_list(evsel_list, &stat_config); if (iostat_mode == IOSTAT_RUN && !target__has_cpu(&target)) target.system_wide = true; @@ -2495,10 +2605,10 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (evlist__alloc_stats(evsel_list, interval)) + if (perf_stat_init_aggr_mode()) goto out; - if (perf_stat_init_aggr_mode()) + if (evlist__alloc_stats(&stat_config, evsel_list, interval)) goto out; /* |