diff options
Diffstat (limited to 'tools/perf/util')
32 files changed, 620 insertions, 149 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 8dd3102301ea..6d5bbc8b589b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -145,6 +145,8 @@ perf-y += scripting-engines/ perf-$(CONFIG_ZLIB) += zlib.o perf-$(CONFIG_LZMA) += lzma.o +perf-$(CONFIG_ZSTD) += zstd.o + perf-y += demangle-java.o perf-y += demangle-rust.o diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 09762985c713..0b8573fd9b05 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1021,7 +1021,7 @@ static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 float ipc = n_insn / ((double)ch->cycles / (double)ch->num); /* Hide data when there are too many overlaps. */ - if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2) + if (ch->reset >= 0x7fff) return; for (offset = start; offset <= end; offset++) { diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h index 892e92e7e7fc..0cd3369af2a4 100644 --- a/tools/perf/util/compress.h +++ b/tools/perf/util/compress.h @@ -2,6 +2,11 @@ #ifndef PERF_COMPRESS_H #define PERF_COMPRESS_H +#include <stdbool.h> +#ifdef HAVE_ZSTD_SUPPORT +#include <zstd.h> +#endif + #ifdef HAVE_ZLIB_SUPPORT int gzip_decompress_to_file(const char *input, int output_fd); bool gzip_is_compressed(const char *input); @@ -12,4 +17,52 @@ int lzma_decompress_to_file(const char *input, int output_fd); bool lzma_is_compressed(const char *input); #endif +struct zstd_data { +#ifdef HAVE_ZSTD_SUPPORT + ZSTD_CStream *cstream; + ZSTD_DStream *dstream; +#endif +}; + +#ifdef HAVE_ZSTD_SUPPORT + +int zstd_init(struct zstd_data *data, int level); +int zstd_fini(struct zstd_data *data); + +size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, + void *src, size_t src_size, size_t max_record_size, + size_t process_header(void *record, size_t increment)); + +size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size, + void *dst, size_t dst_size); +#else /* !HAVE_ZSTD_SUPPORT */ + +static inline int zstd_init(struct zstd_data *data __maybe_unused, int level __maybe_unused) +{ + return 0; +} + +static inline int zstd_fini(struct zstd_data *data __maybe_unused) +{ + return 0; +} + +static inline +size_t zstd_compress_stream_to_records(struct zstd_data *data __maybe_unused, + void *dst __maybe_unused, size_t dst_size __maybe_unused, + void *src __maybe_unused, size_t src_size __maybe_unused, + size_t max_record_size __maybe_unused, + size_t process_header(void *record, size_t increment) __maybe_unused) +{ + return 0; +} + +static inline size_t zstd_decompress_stream(struct zstd_data *data __maybe_unused, void *src __maybe_unused, + size_t src_size __maybe_unused, void *dst __maybe_unused, + size_t dst_size __maybe_unused) +{ + return 0; +} +#endif + #endif /* PERF_COMPRESS_H */ diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 4f8e2b485c01..271a90b326c4 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -62,6 +62,11 @@ struct perf_env { struct cpu_topology_map *cpu; struct cpu_cache_level *caches; int caches_cnt; + u32 comp_ratio; + u32 comp_ver; + u32 comp_type; + u32 comp_level; + u32 comp_mmap_len; struct numa_node *numa_nodes; struct memory_node *memory_nodes; unsigned long long memory_bsize; @@ -80,6 +85,12 @@ struct perf_env { } bpf_progs; }; +enum perf_compress_type { + PERF_COMP_NONE = 0, + PERF_COMP_ZSTD, + PERF_COMP_MAX +}; + struct bpf_prog_info_node; struct btf_node; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index ba7be74fad6e..d1ad6c419724 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -68,6 +68,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE", [PERF_RECORD_TIME_CONV] = "TIME_CONV", [PERF_RECORD_HEADER_FEATURE] = "FEATURE", + [PERF_RECORD_COMPRESSED] = "COMPRESSED", }; static const char *perf_ns__names[] = { diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 4e908ec1ef64..9e999550f247 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -255,6 +255,7 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_EVENT_UPDATE = 78, PERF_RECORD_TIME_CONV = 79, PERF_RECORD_HEADER_FEATURE = 80, + PERF_RECORD_COMPRESSED = 81, PERF_RECORD_HEADER_MAX }; @@ -627,6 +628,11 @@ struct feature_event { char data[]; }; +struct compressed_event { + struct perf_event_header header; + char data[]; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -660,6 +666,7 @@ union perf_event { struct feature_event feat; struct ksymbol_event ksymbol_event; struct bpf_event bpf_event; + struct compressed_event pack; }; void perf_event__print_totals(void); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 4b6783ff5813..69d0fa8ab16f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1009,7 +1009,8 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, - bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush) + bool auxtrace_overwrite, int nr_cblocks, int affinity, int flush, + int comp_level) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1019,7 +1020,8 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, * Its value is decided by evsel's write_backward. * So &mp should not be passed through const pointer. */ - struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush }; + struct mmap_params mp = { .nr_cblocks = nr_cblocks, .affinity = affinity, .flush = flush, + .comp_level = comp_level }; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist, false); @@ -1051,7 +1053,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1); + return perf_evlist__mmap_ex(evlist, pages, 0, false, 0, PERF_AFFINITY_SYS, 1, 0); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index c9a0f72677fd..49354fe24d5f 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -178,7 +178,7 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, unsigned int auxtrace_pages, bool auxtrace_overwrite, int nr_cblocks, - int affinity, int flush); + int affinity, int flush, int comp_level); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a10cf4cde920..a6f572a40deb 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -813,6 +813,8 @@ static void apply_config_terms(struct perf_evsel *evsel, break; case PERF_EVSEL__CONFIG_TERM_DRV_CFG: break; + case PERF_EVSEL__CONFIG_TERM_PERCORE: + break; default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6d190cbf1070..cad54e8ba522 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -50,6 +50,7 @@ enum term_type { PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, + PERF_EVSEL__CONFIG_TERM_PERCORE, }; struct perf_evsel_config_term { @@ -67,6 +68,7 @@ struct perf_evsel_config_term { bool overwrite; char *branch; unsigned long max_events; + bool percore; } val; bool weak; }; @@ -158,6 +160,7 @@ struct perf_evsel { struct perf_evsel **metric_events; bool collect_stat; bool weak_group; + bool percore; const char *pmu_name; struct { perf_evsel__sb_cb_t *cb; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 2d2af2ac2b1e..847ae51a524b 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1344,6 +1344,30 @@ out: return ret; } +static int write_compressed(struct feat_fd *ff __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + int ret; + + ret = do_write(ff, &(ff->ph->env.comp_ver), sizeof(ff->ph->env.comp_ver)); + if (ret) + return ret; + + ret = do_write(ff, &(ff->ph->env.comp_type), sizeof(ff->ph->env.comp_type)); + if (ret) + return ret; + + ret = do_write(ff, &(ff->ph->env.comp_level), sizeof(ff->ph->env.comp_level)); + if (ret) + return ret; + + ret = do_write(ff, &(ff->ph->env.comp_ratio), sizeof(ff->ph->env.comp_ratio)); + if (ret) + return ret; + + return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len)); +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1688,6 +1712,13 @@ static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused) } } +static void print_compressed(struct feat_fd *ff, FILE *fp) +{ + fprintf(fp, "# compressed : %s, level = %d, ratio = %d\n", + ff->ph->env.comp_type == PERF_COMP_ZSTD ? "Zstd" : "Unknown", + ff->ph->env.comp_level, ff->ph->env.comp_ratio); +} + static void print_pmu_mappings(struct feat_fd *ff, FILE *fp) { const char *delimiter = "# pmu mappings: "; @@ -2667,6 +2698,27 @@ out: return err; } +static int process_compressed(struct feat_fd *ff, + void *data __maybe_unused) +{ + if (do_read_u32(ff, &(ff->ph->env.comp_ver))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_type))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_level))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_ratio))) + return -1; + + if (do_read_u32(ff, &(ff->ph->env.comp_mmap_len))) + return -1; + + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2730,6 +2782,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(DIR_FORMAT, dir_format, false), FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), FEAT_OPR(BPF_BTF, bpf_btf, false), + FEAT_OPR(COMPRESSED, compressed, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 386da49e1bfa..5b3abe4172e2 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -42,6 +42,7 @@ enum { HEADER_DIR_FORMAT, HEADER_BPF_PROG_INFO, HEADER_BPF_BTF, + HEADER_COMPRESSED, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 872fab163585..f4c3c84b090f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -58,6 +58,7 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_NO_IP, INTEL_PT_STATE_ERR_RESYNC, INTEL_PT_STATE_IN_SYNC, + INTEL_PT_STATE_TNT_CONT, INTEL_PT_STATE_TNT, INTEL_PT_STATE_TIP, INTEL_PT_STATE_TIP_PGD, @@ -72,8 +73,9 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) case INTEL_PT_STATE_NO_IP: case INTEL_PT_STATE_ERR_RESYNC: case INTEL_PT_STATE_IN_SYNC: - case INTEL_PT_STATE_TNT: + case INTEL_PT_STATE_TNT_CONT: return true; + case INTEL_PT_STATE_TNT: case INTEL_PT_STATE_TIP: case INTEL_PT_STATE_TIP_PGD: case INTEL_PT_STATE_FUP: @@ -888,16 +890,20 @@ static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder) timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; masked_timestamp = timestamp & decoder->period_mask; if (decoder->continuous_period) { - if (masked_timestamp != decoder->last_masked_timestamp) + if (masked_timestamp > decoder->last_masked_timestamp) return 1; } else { timestamp += 1; masked_timestamp = timestamp & decoder->period_mask; - if (masked_timestamp != decoder->last_masked_timestamp) { + if (masked_timestamp > decoder->last_masked_timestamp) { decoder->last_masked_timestamp = masked_timestamp; decoder->continuous_period = true; } } + + if (masked_timestamp < decoder->last_masked_timestamp) + return decoder->period_ticks; + return decoder->period_ticks - (timestamp - masked_timestamp); } @@ -926,7 +932,10 @@ static void intel_pt_sample_insn(struct intel_pt_decoder *decoder) case INTEL_PT_PERIOD_TICKS: timestamp = decoder->timestamp + decoder->timestamp_insn_cnt; masked_timestamp = timestamp & decoder->period_mask; - decoder->last_masked_timestamp = masked_timestamp; + if (masked_timestamp > decoder->last_masked_timestamp) + decoder->last_masked_timestamp = masked_timestamp; + else + decoder->last_masked_timestamp += decoder->period_ticks; break; case INTEL_PT_PERIOD_NONE: case INTEL_PT_PERIOD_MTC: @@ -1254,7 +1263,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) return -ENOENT; } decoder->tnt.count -= 1; - if (!decoder->tnt.count) + if (decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_TNT_CONT; + else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; decoder->tnt.payload <<= 1; decoder->state.from_ip = decoder->ip; @@ -1285,7 +1296,9 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) { decoder->tnt.count -= 1; - if (!decoder->tnt.count) + if (decoder->tnt.count) + decoder->pkt_state = INTEL_PT_STATE_TNT_CONT; + else decoder->pkt_state = INTEL_PT_STATE_IN_SYNC; if (decoder->tnt.payload & BIT63) { decoder->tnt.payload <<= 1; @@ -1305,8 +1318,11 @@ static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder) return 0; } decoder->ip += intel_pt_insn.length; - if (!decoder->tnt.count) + if (!decoder->tnt.count) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; return -EAGAIN; + } decoder->tnt.payload <<= 1; continue; } @@ -2365,6 +2381,7 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) err = intel_pt_walk_trace(decoder); break; case INTEL_PT_STATE_TNT: + case INTEL_PT_STATE_TNT_CONT: err = intel_pt_walk_tnt(decoder); if (err == -EAGAIN) err = intel_pt_walk_trace(decoder); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 3c520baa198c..28a9541c4835 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1234,8 +1234,9 @@ static char *get_kernel_version(const char *root_dir) if (!file) return NULL; - version[0] = '\0'; tmp = fgets(version, sizeof(version), file); + if (!tmp) + *version = '\0'; fclose(file); name = strstr(version, prefix); diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index ef3d79b2c90b..868c0b0e909c 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -157,6 +157,10 @@ void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __mayb } #ifdef HAVE_AIO_SUPPORT +static int perf_mmap__aio_enabled(struct perf_mmap *map) +{ + return map->aio.nr_cblocks > 0; +} #ifdef HAVE_LIBNUMA_SUPPORT static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) @@ -198,7 +202,7 @@ static int perf_mmap__aio_bind(struct perf_mmap *map, int idx, int cpu, int affi return 0; } -#else +#else /* !HAVE_LIBNUMA_SUPPORT */ static int perf_mmap__aio_alloc(struct perf_mmap *map, int idx) { map->aio.data[idx] = malloc(perf_mmap__mmap_len(map)); @@ -285,81 +289,12 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map) zfree(&map->aio.cblocks); zfree(&map->aio.aiocb); } - -int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, - int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off), - off_t *off) +#else /* !HAVE_AIO_SUPPORT */ +static int perf_mmap__aio_enabled(struct perf_mmap *map __maybe_unused) { - u64 head = perf_mmap__read_head(md); - unsigned char *data = md->base + page_size; - unsigned long size, size0 = 0; - void *buf; - int rc = 0; - - rc = perf_mmap__read_init(md); - if (rc < 0) - return (rc == -EAGAIN) ? 0 : -1; - - /* - * md->base data is copied into md->data[idx] buffer to - * release space in the kernel buffer as fast as possible, - * thru perf_mmap__consume() below. - * - * That lets the kernel to proceed with storing more - * profiling data into the kernel buffer earlier than other - * per-cpu kernel buffers are handled. - * - * Coping can be done in two steps in case the chunk of - * profiling data crosses the upper bound of the kernel buffer. - * In this case we first move part of data from md->start - * till the upper bound and then the reminder from the - * beginning of the kernel buffer till the end of - * the data chunk. - */ - - size = md->end - md->start; - - if ((md->start & md->mask) + size != (md->end & md->mask)) { - buf = &data[md->start & md->mask]; - size = md->mask + 1 - (md->start & md->mask); - md->start += size; - memcpy(md->aio.data[idx], buf, size); - size0 = size; - } - - buf = &data[md->start & md->mask]; - size = md->end - md->start; - md->start += size; - memcpy(md->aio.data[idx] + size0, buf, size); - - /* - * Increment md->refcount to guard md->data[idx] buffer - * from premature deallocation because md object can be - * released earlier than aio write request started - * on mmap->data[idx] is complete. - * - * perf_mmap__put() is done at record__aio_complete() - * after started request completion. - */ - perf_mmap__get(md); - - md->prev = head; - perf_mmap__consume(md); - - rc = push(to, &md->aio.cblocks[idx], md->aio.data[idx], size0 + size, *off); - if (!rc) { - *off += size0 + size; - } else { - /* - * Decrement md->refcount back if aio write - * operation failed to start. - */ - perf_mmap__put(md); - } - - return rc; + return 0; } -#else + static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused, struct mmap_params *mp __maybe_unused) { @@ -374,6 +309,10 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map __maybe_unused) void perf_mmap__munmap(struct perf_mmap *map) { perf_mmap__aio_munmap(map); + if (map->data != NULL) { + munmap(map->data, perf_mmap__mmap_len(map)); + map->data = NULL; + } if (map->base != NULL) { munmap(map->base, perf_mmap__mmap_len(map)); map->base = NULL; @@ -442,6 +381,19 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int c map->flush = mp->flush; + map->comp_level = mp->comp_level; + + if (map->comp_level && !perf_mmap__aio_enabled(map)) { + map->data = mmap(NULL, perf_mmap__mmap_len(map), PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (map->data == MAP_FAILED) { + pr_debug2("failed to mmap data buffer, error %d\n", + errno); + map->data = NULL; + return -1; + } + } + if (auxtrace_mmap__mmap(&map->auxtrace_mmap, &mp->auxtrace_mp, map->base, fd)) return -1; @@ -540,7 +492,7 @@ int perf_mmap__push(struct perf_mmap *md, void *to, rc = perf_mmap__read_init(md); if (rc < 0) - return (rc == -EAGAIN) ? 0 : -1; + return (rc == -EAGAIN) ? 1 : -1; size = md->end - md->start; diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index b82f8c2d55c4..274ce389cd84 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -40,6 +40,8 @@ struct perf_mmap { #endif cpu_set_t affinity_mask; u64 flush; + void *data; + int comp_level; }; /* @@ -71,7 +73,7 @@ enum bkw_mmap_state { }; struct mmap_params { - int prot, mask, nr_cblocks, affinity, flush; + int prot, mask, nr_cblocks, affinity, flush, comp_level; struct auxtrace_mmap_params auxtrace_mp; }; @@ -99,18 +101,6 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map); int perf_mmap__push(struct perf_mmap *md, void *to, int push(struct perf_mmap *map, void *to, void *buf, size_t size)); -#ifdef HAVE_AIO_SUPPORT -int perf_mmap__aio_push(struct perf_mmap *md, void *to, int idx, - int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off), - off_t *off); -#else -static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused, int idx __maybe_unused, - int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused, - off_t *off __maybe_unused) -{ - return 0; -} -#endif size_t perf_mmap__mmap_len(struct perf_mmap *map); diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4432bfe039fd..cf0b9b81c5aa 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -950,6 +950,7 @@ static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = { [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite", [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite", [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config", + [PARSE_EVENTS__TERM_TYPE_PERCORE] = "percore", }; static bool config_term_shrinked; @@ -970,6 +971,7 @@ config_term_avail(int term_type, struct parse_events_error *err) case PARSE_EVENTS__TERM_TYPE_CONFIG2: case PARSE_EVENTS__TERM_TYPE_NAME: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: + case PARSE_EVENTS__TERM_TYPE_PERCORE: return true; default: if (!err) @@ -1061,6 +1063,14 @@ do { \ case PARSE_EVENTS__TERM_TYPE_MAX_EVENTS: CHECK_TYPE_VAL(NUM); break; + case PARSE_EVENTS__TERM_TYPE_PERCORE: + CHECK_TYPE_VAL(NUM); + if ((unsigned int)term->val.num > 1) { + err->str = strdup("expected 0 or 1"); + err->idx = term->err_val; + return -EINVAL; + } + break; default: err->str = strdup("unknown term"); err->idx = term->err_term; @@ -1199,6 +1209,10 @@ do { \ case PARSE_EVENTS__TERM_TYPE_DRV_CFG: ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str); break; + case PARSE_EVENTS__TERM_TYPE_PERCORE: + ADD_CONFIG_TERM(PERCORE, percore, + term->val.num ? true : false); + break; default: break; } @@ -1260,6 +1274,18 @@ int parse_events_add_tool(struct parse_events_state *parse_state, return add_event_tool(list, &parse_state->idx, tool_event); } +static bool config_term_percore(struct list_head *config_terms) +{ + struct perf_evsel_config_term *term; + + list_for_each_entry(term, config_terms, list) { + if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE) + return term->val.percore; + } + + return false; +} + int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, char *name, struct list_head *head_config, @@ -1333,6 +1359,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->metric_name = info.metric_name; evsel->pmu_name = name; evsel->use_uncore_alias = use_uncore_alias; + evsel->percore = config_term_percore(&evsel->config_terms); } return evsel ? 0 : -ENOMEM; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index a052cd6ac63e..f7139e1a2fd3 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -75,6 +75,7 @@ enum { PARSE_EVENTS__TERM_TYPE_NOOVERWRITE, PARSE_EVENTS__TERM_TYPE_OVERWRITE, PARSE_EVENTS__TERM_TYPE_DRV_CFG, + PARSE_EVENTS__TERM_TYPE_PERCORE, __PARSE_EVENTS__TERM_TYPE_NR, }; diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index c54bfe88626c..ca6098874fe2 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -283,6 +283,7 @@ inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); } no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); } overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); } no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); } +percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); } , { return ','; } "/" { BEGIN(INITIAL); return '/'; } {name_minus} { return str(yyscanner, PE_NAME); } diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c index e6599e290f46..08581e276225 100644 --- a/tools/perf/util/parse-regs-options.c +++ b/tools/perf/util/parse-regs-options.c @@ -5,13 +5,14 @@ #include <subcmd/parse-options.h> #include "util/parse-regs-options.h" -int -parse_regs(const struct option *opt, const char *str, int unset) +static int +__parse_regs(const struct option *opt, const char *str, int unset, bool intr) { uint64_t *mode = (uint64_t *)opt->value; const struct sample_reg *r; char *s, *os = NULL, *p; int ret = -1; + uint64_t mask; if (unset) return 0; @@ -22,6 +23,11 @@ parse_regs(const struct option *opt, const char *str, int unset) if (*mode) return -1; + if (intr) + mask = arch__intr_reg_mask(); + else + mask = arch__user_reg_mask(); + /* str may be NULL in case no arg is passed to -I */ if (str) { /* because str is read-only */ @@ -37,19 +43,20 @@ parse_regs(const struct option *opt, const char *str, int unset) if (!strcmp(s, "?")) { fprintf(stderr, "available registers: "); for (r = sample_reg_masks; r->name; r++) { - fprintf(stderr, "%s ", r->name); + if (r->mask & mask) + fprintf(stderr, "%s ", r->name); } fputc('\n', stderr); /* just printing available regs */ return -1; } for (r = sample_reg_masks; r->name; r++) { - if (!strcasecmp(s, r->name)) + if ((r->mask & mask) && !strcasecmp(s, r->name)) break; } if (!r->name) { - ui__warning("unknown register %s," - " check man page\n", s); + ui__warning("Unknown register \"%s\", check man page or run \"perf record %s?\"\n", + s, intr ? "-I" : "--user-regs="); goto error; } @@ -65,8 +72,20 @@ parse_regs(const struct option *opt, const char *str, int unset) /* default to all possible regs */ if (*mode == 0) - *mode = PERF_REGS_MASK; + *mode = mask; error: free(os); return ret; } + +int +parse_user_regs(const struct option *opt, const char *str, int unset) +{ + return __parse_regs(opt, str, unset, false); +} + +int +parse_intr_regs(const struct option *opt, const char *str, int unset) +{ + return __parse_regs(opt, str, unset, true); +} diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h index cdefb1acf6be..2b23d25c6394 100644 --- a/tools/perf/util/parse-regs-options.h +++ b/tools/perf/util/parse-regs-options.h @@ -2,5 +2,6 @@ #ifndef _PERF_PARSE_REGS_OPTIONS_H #define _PERF_PARSE_REGS_OPTIONS_H 1 struct option; -int parse_regs(const struct option *opt, const char *str, int unset); +int parse_user_regs(const struct option *opt, const char *str, int unset); +int parse_intr_regs(const struct option *opt, const char *str, int unset); #endif /* _PERF_PARSE_REGS_OPTIONS_H */ diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 2acfcc527cac..2774cec1f15f 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -13,6 +13,16 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, return SDT_ARG_SKIP; } +uint64_t __weak arch__intr_reg_mask(void) +{ + return PERF_REGS_MASK; +} + +uint64_t __weak arch__user_reg_mask(void) +{ + return PERF_REGS_MASK; +} + #ifdef HAVE_PERF_REGS_SUPPORT int perf_reg_value(u64 *valp, struct regs_dump *regs, int id) { diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h index c9319f8d17a6..cb9c246c8962 100644 --- a/tools/perf/util/perf_regs.h +++ b/tools/perf/util/perf_regs.h @@ -12,6 +12,7 @@ struct sample_reg { uint64_t mask; }; #define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) } +#define SMPL_REG2(n, b) { .name = #n, .mask = 3ULL << (b) } #define SMPL_REG_END { .name = NULL } extern const struct sample_reg sample_reg_masks[]; @@ -22,6 +23,8 @@ enum { }; int arch_sdt_arg_parse_op(char *old_op, char **new_op); +uint64_t arch__intr_reg_mask(void); +uint64_t arch__user_reg_mask(void); #ifdef HAVE_PERF_REGS_SUPPORT #include <perf_regs.h> diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index bad5f87ae001..2310a1752983 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -29,6 +29,61 @@ #include "stat.h" #include "arch/common.h" +#ifdef HAVE_ZSTD_SUPPORT +static int perf_session__process_compressed_event(struct perf_session *session, + union perf_event *event, u64 file_offset) +{ + void *src; + size_t decomp_size, src_size; + u64 decomp_last_rem = 0; + size_t decomp_len = session->header.env.comp_mmap_len; + struct decomp *decomp, *decomp_last = session->decomp_last; + + decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (decomp == MAP_FAILED) { + pr_err("Couldn't allocate memory for decompression\n"); + return -1; + } + + decomp->file_pos = file_offset; + decomp->head = 0; + + if (decomp_last) { + decomp_last_rem = decomp_last->size - decomp_last->head; + memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem); + decomp->size = decomp_last_rem; + } + + src = (void *)event + sizeof(struct compressed_event); + src_size = event->pack.header.size - sizeof(struct compressed_event); + + decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size, + &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); + if (!decomp_size) { + munmap(decomp, sizeof(struct decomp) + decomp_len); + pr_err("Couldn't decompress data\n"); + return -1; + } + + decomp->size += decomp_size; + + if (session->decomp == NULL) { + session->decomp = decomp; + session->decomp_last = decomp; + } else { + session->decomp_last->next = decomp; + session->decomp_last = decomp; + } + + pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size); + + return 0; +} +#else /* !HAVE_ZSTD_SUPPORT */ +#define perf_session__process_compressed_event perf_session__process_compressed_event_stub +#endif + static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, struct perf_tool *tool, @@ -197,6 +252,21 @@ static void perf_session__delete_threads(struct perf_session *session) machine__delete_threads(&session->machines.host); } +static void perf_session__release_decomp_events(struct perf_session *session) +{ + struct decomp *next, *decomp; + size_t decomp_len; + next = session->decomp; + decomp_len = session->header.env.comp_mmap_len; + do { + decomp = next; + if (decomp == NULL) + break; + next = decomp->next; + munmap(decomp, decomp_len + sizeof(struct decomp)); + } while (1); +} + void perf_session__delete(struct perf_session *session) { if (session == NULL) @@ -205,6 +275,7 @@ void perf_session__delete(struct perf_session *session) auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); perf_session__delete_threads(session); + perf_session__release_decomp_events(session); perf_env__exit(&session->header.env); machines__exit(&session->machines); if (session->data) @@ -358,6 +429,14 @@ static int process_stat_round_stub(struct perf_session *perf_session __maybe_unu return 0; } +static int perf_session__process_compressed_event_stub(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + u64 file_offset __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + void perf_tool__fill_defaults(struct perf_tool *tool) { if (tool->sample == NULL) @@ -430,6 +509,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->time_conv = process_event_op2_stub; if (tool->feature == NULL) tool->feature = process_event_op2_stub; + if (tool->compressed == NULL) + tool->compressed = perf_session__process_compressed_event; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -1373,7 +1454,9 @@ static s64 perf_session__process_user_event(struct perf_session *session, int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, &sample); + if (event->header.type != PERF_RECORD_COMPRESSED || + tool->compressed == perf_session__process_compressed_event_stub) + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { @@ -1426,6 +1509,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, return tool->time_conv(session, event); case PERF_RECORD_HEADER_FEATURE: return tool->feature(session, event); + case PERF_RECORD_COMPRESSED: + err = tool->compressed(session, event, file_offset); + if (err) + dump_event(session->evlist, event, file_offset, &sample); + return err; default: return -EINVAL; } @@ -1708,6 +1796,8 @@ static int perf_session__flush_thread_stacks(struct perf_session *session) volatile int session_done; +static int __perf_session__process_decomp_events(struct perf_session *session); + static int __perf_session__process_pipe_events(struct perf_session *session) { struct ordered_events *oe = &session->ordered_events; @@ -1788,6 +1878,10 @@ more: if (skip > 0) head += skip; + err = __perf_session__process_decomp_events(session); + if (err) + goto out_err; + if (!session_done()) goto more; done: @@ -1836,6 +1930,39 @@ fetch_mmaped_event(struct perf_session *session, return event; } +static int __perf_session__process_decomp_events(struct perf_session *session) +{ + s64 skip; + u64 size, file_pos = 0; + struct decomp *decomp = session->decomp_last; + + if (!decomp) + return 0; + + while (decomp->head < decomp->size && !session_done()) { + union perf_event *event = fetch_mmaped_event(session, decomp->head, decomp->size, decomp->data); + + if (!event) + break; + + size = event->header.size; + + if (size < sizeof(struct perf_event_header) || + (skip = perf_session__process_event(session, event, file_pos)) < 0) { + pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", + decomp->file_pos + decomp->head, event->header.size, event->header.type); + return -EINVAL; + } + + if (skip) + size += skip; + + decomp->head += size; + } + + return 0; +} + /* * On 64bit we can mmap the data file in one go. No need for tiny mmap * slices. On 32bit we use 32MB. @@ -1945,6 +2072,10 @@ more: head += size; file_pos += size; + err = __perf_session__process_decomp_events(session); + if (err) + goto out; + ui_progress__update(prog, size); if (session_done()) diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index d96eccd7d27f..dd8920b745bc 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -8,6 +8,7 @@ #include "machine.h" #include "data.h" #include "ordered-events.h" +#include "util/compress.h" #include <linux/kernel.h> #include <linux/rbtree.h> #include <linux/perf_event.h> @@ -35,6 +36,19 @@ struct perf_session { struct ordered_events ordered_events; struct perf_data *data; struct perf_tool *tool; + u64 bytes_transferred; + u64 bytes_compressed; + struct zstd_data zstd_data; + struct decomp *decomp; + struct decomp *decomp_last; +}; + +struct decomp { + struct decomp *next; + u64 file_pos; + u64 head; + size_t size; + char data[]; }; struct perf_tool; diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index 3324f23c7efc..4c53bae5644b 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -88,9 +88,17 @@ static void aggr_printout(struct perf_stat_config *config, config->csv_sep); break; case AGGR_NONE: - fprintf(config->output, "CPU%*d%s", - config->csv_output ? 0 : -4, - perf_evsel__cpus(evsel)->map[id], config->csv_sep); + if (evsel->percore) { + fprintf(config->output, "S%d-C%*d%s", + cpu_map__id_to_socket(id), + config->csv_output ? 0 : -5, + cpu_map__id_to_cpu(id), config->csv_sep); + } else { + fprintf(config->output, "CPU%*d%s ", + config->csv_output ? 0 : -5, + perf_evsel__cpus(evsel)->map[id], + config->csv_sep); + } break; case AGGR_THREAD: fprintf(config->output, "%*s-%*d%s", @@ -594,6 +602,41 @@ static void aggr_cb(struct perf_stat_config *config, } } +static void print_counter_aggrdata(struct perf_stat_config *config, + struct perf_evsel *counter, int s, + char *prefix, bool metric_only, + bool *first) +{ + struct aggr_data ad; + FILE *output = config->output; + u64 ena, run, val; + int id, nr; + double uval; + + ad.id = id = config->aggr_map->map[s]; + ad.val = ad.ena = ad.run = 0; + ad.nr = 0; + if (!collect_data(config, counter, aggr_cb, &ad)) + return; + + nr = ad.nr; + ena = ad.ena; + run = ad.run; + val = ad.val; + if (*first && metric_only) { + *first = false; + aggr_printout(config, counter, id, nr); + } + if (prefix && !metric_only) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + printout(config, id, nr, counter, uval, prefix, + run, ena, 1.0, &rt_stat); + if (!metric_only) + fputc('\n', output); +} + static void print_aggr(struct perf_stat_config *config, struct perf_evlist *evlist, char *prefix) @@ -601,9 +644,7 @@ static void print_aggr(struct perf_stat_config *config, bool metric_only = config->metric_only; FILE *output = config->output; struct perf_evsel *counter; - int s, id, nr; - double uval; - u64 ena, run, val; + int s; bool first; if (!(config->aggr_map || config->aggr_get_id)) @@ -616,33 +657,14 @@ static void print_aggr(struct perf_stat_config *config, * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - struct aggr_data ad; if (prefix && metric_only) fprintf(output, "%s", prefix); - ad.id = id = config->aggr_map->map[s]; first = true; evlist__for_each_entry(evlist, counter) { - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(config, counter, aggr_cb, &ad)) - continue; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; - if (first && metric_only) { - first = false; - aggr_printout(config, counter, id, nr); - } - if (prefix && !metric_only) - fprintf(output, "%s", prefix); - - uval = val * counter->scale; - printout(config, id, nr, counter, uval, prefix, - run, ena, 1.0, &rt_stat); - if (!metric_only) - fputc('\n', output); + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); } if (metric_only) fputc('\n', output); @@ -1089,6 +1111,30 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } +static void print_percore(struct perf_stat_config *config, + struct perf_evsel *counter, char *prefix) +{ + bool metric_only = config->metric_only; + FILE *output = config->output; + int s; + bool first = true; + + if (!(config->aggr_map || config->aggr_get_id)) + return; + + for (s = 0; s < config->aggr_map->nr; s++) { + if (prefix && metric_only) + fprintf(output, "%s", prefix); + + print_counter_aggrdata(config, counter, s, + prefix, metric_only, + &first); + } + + if (metric_only) + fputc('\n', output); +} + void perf_evlist__print_counters(struct perf_evlist *evlist, struct perf_stat_config *config, @@ -1139,7 +1185,10 @@ perf_evlist__print_counters(struct perf_evlist *evlist, print_no_aggr_metric(config, evlist, prefix); else { evlist__for_each_entry(evlist, counter) { - print_counter(config, counter, prefix); + if (counter->percore) + print_percore(config, counter, prefix); + else + print_counter(config, counter, prefix); } } break; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 2856cc9d5a31..c3115d939b0b 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -277,9 +277,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel if (!evsel->snapshot) perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); - if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->val, cpu, - &rt_stat); + if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { + perf_stat__update_shadow_stats(evsel, count->val, + cpu, &rt_stat); + } + if (config->aggr_mode == AGGR_THREAD) { if (config->stats) perf_stat__update_shadow_stats(evsel, diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 50678d318185..403045a2bbea 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -15,6 +15,7 @@ #include "map.h" #include "symbol.h" #include "unwind.h" +#include "callchain.h" #include <api/fs/fs.h> @@ -327,7 +328,7 @@ static int thread__prepare_access(struct thread *thread) { int err = 0; - if (symbol_conf.use_callchain) + if (dwarf_callchain_users) err = __thread__prepare_access(thread); return err; diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 250391672f9f..9096a6e3de59 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -28,6 +28,7 @@ typedef int (*event_attr_op)(struct perf_tool *tool, typedef int (*event_op2)(struct perf_session *session, union perf_event *event); typedef s64 (*event_op3)(struct perf_session *session, union perf_event *event); +typedef int (*event_op4)(struct perf_session *session, union perf_event *event, u64 data); typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, struct ordered_events *oe); @@ -72,6 +73,7 @@ struct perf_tool { stat, stat_round, feature; + event_op4 compressed; event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index f3c666a84e4d..25e1406b1f8b 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -617,8 +617,6 @@ static unw_accessors_t accessors = { static int _unwind__prepare_access(struct thread *thread) { - if (!dwarf_callchain_users) - return 0; thread->addr_space = unw_create_addr_space(&accessors, 0); if (!thread->addr_space) { pr_err("unwind: Can't create unwind address space.\n"); @@ -631,15 +629,11 @@ static int _unwind__prepare_access(struct thread *thread) static void _unwind__flush_access(struct thread *thread) { - if (!dwarf_callchain_users) - return; unw_flush_cache(thread->addr_space, 0, 0); } static void _unwind__finish_access(struct thread *thread) { - if (!dwarf_callchain_users) - return; unw_destroy_addr_space(thread->addr_space); } diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 9778b3133b77..c0811977d7d5 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -5,6 +5,7 @@ #include "session.h" #include "debug.h" #include "env.h" +#include "callchain.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -24,6 +25,9 @@ int unwind__prepare_access(struct thread *thread, struct map *map, struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops; int err; + if (!dwarf_callchain_users) + return 0; + if (thread->addr_space) { pr_debug("unwind: thread map already set, dso=%s\n", map->dso->name); @@ -65,12 +69,18 @@ out_register: void unwind__flush_access(struct thread *thread) { + if (!dwarf_callchain_users) + return; + if (thread->unwind_libunwind_ops) thread->unwind_libunwind_ops->flush_access(thread); } void unwind__finish_access(struct thread *thread) { + if (!dwarf_callchain_users) + return; + if (thread->unwind_libunwind_ops) thread->unwind_libunwind_ops->finish_access(thread); } diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c new file mode 100644 index 000000000000..23bdb9884576 --- /dev/null +++ b/tools/perf/util/zstd.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <string.h> + +#include "util/compress.h" +#include "util/debug.h" + +int zstd_init(struct zstd_data *data, int level) +{ + size_t ret; + + data->dstream = ZSTD_createDStream(); + if (data->dstream == NULL) { + pr_err("Couldn't create decompression stream.\n"); + return -1; + } + + ret = ZSTD_initDStream(data->dstream); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize decompression stream: %s\n", ZSTD_getErrorName(ret)); + return -1; + } + + if (!level) + return 0; + + data->cstream = ZSTD_createCStream(); + if (data->cstream == NULL) { + pr_err("Couldn't create compression stream.\n"); + return -1; + } + + ret = ZSTD_initCStream(data->cstream, level); + if (ZSTD_isError(ret)) { + pr_err("Failed to initialize compression stream: %s\n", ZSTD_getErrorName(ret)); + return -1; + } + + return 0; +} + +int zstd_fini(struct zstd_data *data) +{ + if (data->dstream) { + ZSTD_freeDStream(data->dstream); + data->dstream = NULL; + } + + if (data->cstream) { + ZSTD_freeCStream(data->cstream); + data->cstream = NULL; + } + + return 0; +} + +size_t zstd_compress_stream_to_records(struct zstd_data *data, void *dst, size_t dst_size, + void *src, size_t src_size, size_t max_record_size, + size_t process_header(void *record, size_t increment)) +{ + size_t ret, size, compressed = 0; + ZSTD_inBuffer input = { src, src_size, 0 }; + ZSTD_outBuffer output; + void *record; + + while (input.pos < input.size) { + record = dst; + size = process_header(record, 0); + compressed += size; + dst += size; + dst_size -= size; + output = (ZSTD_outBuffer){ dst, (dst_size > max_record_size) ? + max_record_size : dst_size, 0 }; + ret = ZSTD_compressStream(data->cstream, &output, &input); + ZSTD_flushStream(data->cstream, &output); + if (ZSTD_isError(ret)) { + pr_err("failed to compress %ld bytes: %s\n", + (long)src_size, ZSTD_getErrorName(ret)); + memcpy(dst, src, src_size); + return src_size; + } + size = output.pos; + size = process_header(record, size); + compressed += size; + dst += size; + dst_size -= size; + } + + return compressed; +} + +size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size, + void *dst, size_t dst_size) +{ + size_t ret; + ZSTD_inBuffer input = { src, src_size, 0 }; + ZSTD_outBuffer output = { dst, dst_size, 0 }; + + while (input.pos < input.size) { + ret = ZSTD_decompressStream(data->dstream, &output, &input); + if (ZSTD_isError(ret)) { + pr_err("failed to decompress (B): %ld -> %ld : %s\n", + src_size, output.size, ZSTD_getErrorName(ret)); + break; + } + output.dst = dst + output.pos; + output.size = dst_size - output.pos; + } + + return output.pos; +} |