diff options
Diffstat (limited to 'tools/perf/builtin-record.c')
-rw-r--r-- | tools/perf/builtin-record.c | 229 |
1 files changed, 197 insertions, 32 deletions
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index c5e10552776a..e2c3a585a61e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -133,6 +133,11 @@ static int record__write(struct record *rec, struct perf_mmap *map __maybe_unuse return 0; } +static int record__aio_enabled(struct record *rec); +static int record__comp_enabled(struct record *rec); +static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, + void *src, size_t src_size); + #ifdef HAVE_AIO_SUPPORT static int record__aio_write(struct aiocb *cblock, int trace_fd, void *buf, size_t size, off_t off) @@ -183,9 +188,9 @@ static int record__aio_complete(struct perf_mmap *md, struct aiocb *cblock) if (rem_size == 0) { cblock->aio_fildes = -1; /* - * md->refcount is incremented in perf_mmap__push() for - * every enqueued aio write request so decrement it because - * the request is now complete. + * md->refcount is incremented in record__aio_pushfn() for + * every aio write request started in record__aio_push() so + * decrement it because the request is now complete. */ perf_mmap__put(md); rc = 1; @@ -240,18 +245,89 @@ static int record__aio_sync(struct perf_mmap *md, bool sync_all) } while (1); } -static int record__aio_pushfn(void *to, struct aiocb *cblock, void *bf, size_t size, off_t off) +struct record_aio { + struct record *rec; + void *data; + size_t size; +}; + +static int record__aio_pushfn(struct perf_mmap *map, void *to, void *buf, size_t size) { - struct record *rec = to; - int ret, trace_fd = rec->session->data->file.fd; + struct record_aio *aio = to; - rec->samples++; + /* + * map->base data pointed by buf is copied into free map->aio.data[] buffer + * to release space in the kernel buffer as fast as possible, calling + * perf_mmap__consume() from perf_mmap__push() function. + * + * That lets the kernel to proceed with storing more profiling data into + * the kernel buffer earlier than other per-cpu kernel buffers are handled. + * + * Coping can be done in two steps in case the chunk of profiling data + * crosses the upper bound of the kernel buffer. In this case we first move + * part of data from map->start till the upper bound and then the reminder + * from the beginning of the kernel buffer till the end of the data chunk. + */ + + if (record__comp_enabled(aio->rec)) { + size = zstd_compress(aio->rec->session, aio->data + aio->size, + perf_mmap__mmap_len(map) - aio->size, + buf, size); + } else { + memcpy(aio->data + aio->size, buf, size); + } + + if (!aio->size) { + /* + * Increment map->refcount to guard map->aio.data[] buffer + * from premature deallocation because map object can be + * released earlier than aio write request started on + * map->aio.data[] buffer is complete. + * + * perf_mmap__put() is done at record__aio_complete() + * after started aio request completion or at record__aio_push() + * if the request failed to start. + */ + perf_mmap__get(map); + } + + aio->size += size; + + return size; +} - ret = record__aio_write(cblock, trace_fd, bf, size, off); +static int record__aio_push(struct record *rec, struct perf_mmap *map, off_t *off) +{ + int ret, idx; + int trace_fd = rec->session->data->file.fd; + struct record_aio aio = { .rec = rec, .size = 0 }; + + /* + * Call record__aio_sync() to wait till map->aio.data[] buffer + * becomes available after previous aio write operation. + */ + + idx = record__aio_sync(map, false); + aio.data = map->aio.data[idx]; + ret = perf_mmap__push(map, &aio, record__aio_pushfn); + if (ret != 0) /* ret > 0 - no data, ret < 0 - error */ + return ret; + + rec->samples++; + ret = record__aio_write(&(map->aio.cblocks[idx]), trace_fd, aio.data, aio.size, *off); if (!ret) { - rec->bytes_written += size; + *off += aio.size; + rec->bytes_written += aio.size; if (switch_output_size(rec)) trigger_hit(&switch_output_trigger); + } else { + /* + * Decrement map->refcount incremented in record__aio_pushfn() + * back if record__aio_write() operation failed to start, otherwise + * map->refcount is decremented in record__aio_complete() after + * aio write operation finishes successfully. + */ + perf_mmap__put(map); } return ret; @@ -273,7 +349,7 @@ static void record__aio_mmap_read_sync(struct record *rec) struct perf_evlist *evlist = rec->evlist; struct perf_mmap *maps = evlist->mmap; - if (!rec->opts.nr_cblocks) + if (!record__aio_enabled(rec)) return; for (i = 0; i < evlist->nr_mmaps; i++) { @@ -307,13 +383,8 @@ static int record__aio_parse(const struct option *opt, #else /* HAVE_AIO_SUPPORT */ static int nr_cblocks_max = 0; -static int record__aio_sync(struct perf_mmap *md __maybe_unused, bool sync_all __maybe_unused) -{ - return -1; -} - -static int record__aio_pushfn(void *to __maybe_unused, struct aiocb *cblock __maybe_unused, - void *bf __maybe_unused, size_t size __maybe_unused, off_t off __maybe_unused) +static int record__aio_push(struct record *rec __maybe_unused, struct perf_mmap *map __maybe_unused, + off_t *off __maybe_unused) { return -1; } @@ -372,6 +443,32 @@ static int record__mmap_flush_parse(const struct option *opt, return 0; } +#ifdef HAVE_ZSTD_SUPPORT +static unsigned int comp_level_default = 1; + +static int record__parse_comp_level(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = opt->value; + + if (unset) { + opts->comp_level = 0; + } else { + if (str) + opts->comp_level = strtol(str, NULL, 0); + if (!opts->comp_level) + opts->comp_level = comp_level_default; + } + + return 0; +} +#endif +static unsigned int comp_level_max = 22; + +static int record__comp_enabled(struct record *rec) +{ + return rec->opts.comp_level > 0; +} + static int process_synthesized_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -385,6 +482,11 @@ static int record__pushfn(struct perf_mmap *map, void *to, void *bf, size_t size { struct record *rec = to; + if (record__comp_enabled(rec)) { + size = zstd_compress(rec->session, map->data, perf_mmap__mmap_len(map), bf, size); + bf = map->data; + } + rec->samples++; return record__write(rec, map, bf, size); } @@ -582,7 +684,7 @@ static int record__mmap_evlist(struct record *rec, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode, opts->nr_cblocks, opts->affinity, - opts->mmap_flush) < 0) { + opts->mmap_flush, opts->comp_level) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " @@ -771,6 +873,37 @@ static void record__adjust_affinity(struct record *rec, struct perf_mmap *map) } } +static size_t process_comp_header(void *record, size_t increment) +{ + struct compressed_event *event = record; + size_t size = sizeof(*event); + + if (increment) { + event->header.size += increment; + return increment; + } + + event->header.type = PERF_RECORD_COMPRESSED; + event->header.size = size; + + return size; +} + +static size_t zstd_compress(struct perf_session *session, void *dst, size_t dst_size, + void *src, size_t src_size) +{ + size_t compressed; + size_t max_record_size = PERF_SAMPLE_MAX_SIZE - sizeof(struct compressed_event) - 1; + + compressed = zstd_compress_stream_to_records(&session->zstd_data, dst, dst_size, src, src_size, + max_record_size, process_comp_header); + + session->bytes_transferred += src_size; + session->bytes_compressed += compressed; + + return compressed; +} + static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, bool overwrite, bool synch) { @@ -779,7 +912,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli int rc = 0; struct perf_mmap *maps; int trace_fd = rec->data.file.fd; - off_t off; + off_t off = 0; if (!evlist) return 0; @@ -805,20 +938,14 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli map->flush = 1; } if (!record__aio_enabled(rec)) { - if (perf_mmap__push(map, rec, record__pushfn) != 0) { + if (perf_mmap__push(map, rec, record__pushfn) < 0) { if (synch) map->flush = flush; rc = -1; goto out; } } else { - int idx; - /* - * Call record__aio_sync() to wait till map->data buffer - * becomes available after previous aio write request. - */ - idx = record__aio_sync(map, false); - if (perf_mmap__aio_push(map, rec, idx, record__aio_pushfn, &off) != 0) { + if (record__aio_push(rec, map, &off) < 0) { record__aio_set_pos(trace_fd, off); if (synch) map->flush = flush; @@ -888,6 +1015,8 @@ static void record__init_features(struct record *rec) perf_header__clear_feat(&session->header, HEADER_CLOCKID); perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); + if (!record__comp_enabled(rec)) + perf_header__clear_feat(&session->header, HEADER_COMPRESSED); perf_header__clear_feat(&session->header, HEADER_STAT); } @@ -1186,6 +1315,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) bool disabled = false, draining = false; struct perf_evlist *sb_evlist = NULL; int fd; + float ratio = 0; atexit(record__sig_exit); signal(SIGCHLD, sig_handler); @@ -1215,6 +1345,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) fd = perf_data__fd(data); rec->session = session; + if (zstd_init(&session->zstd_data, rec->opts.comp_level) < 0) { + pr_err("Compression initialization failed.\n"); + return -1; + } + + session->header.env.comp_type = PERF_COMP_ZSTD; + session->header.env.comp_level = rec->opts.comp_level; + record__init_features(rec); if (rec->opts.use_clockid && rec->opts.clockid_res_ns) @@ -1244,6 +1382,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) err = -1; goto out_child; } + session->header.env.comp_mmap_len = session->evlist->mmap_len; err = bpf__apply_obj_config(); if (err) { @@ -1491,6 +1630,11 @@ out_child: record__mmap_read_all(rec, true); record__aio_mmap_read_sync(rec); + if (rec->session->bytes_transferred && rec->session->bytes_compressed) { + ratio = (float)rec->session->bytes_transferred/(float)rec->session->bytes_compressed; + session->header.env.comp_ratio = ratio + 0.5; + } + if (forks) { int exit_status; @@ -1537,12 +1681,19 @@ out_child: else samples[0] = '\0'; - fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n", + fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s", perf_data__size(data) / 1024.0 / 1024.0, data->path, postfix, samples); + if (ratio) { + fprintf(stderr, ", compressed (original %.3f MB, ratio is %.3f)", + rec->session->bytes_transferred / 1024.0 / 1024.0, + ratio); + } + fprintf(stderr, " ]\n"); } out_delete_session: + zstd_fini(&session->zstd_data); perf_session__delete(session); if (!opts->no_bpf_event) @@ -2017,10 +2168,10 @@ static struct option __record_options[] = { "use per-thread mmaps"), OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register", "sample selected machine registers on interrupt," - " use -I ? to list register names", parse_regs), + " use '-I?' to list register names", parse_intr_regs), OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register", "sample selected machine registers on interrupt," - " use -I ? to list register names", parse_regs), + " use '--user-regs=?' to list register names", parse_user_regs), OPT_BOOLEAN(0, "running-time", &record.opts.running_time, "Record running/enabled time of read (:S) events"), OPT_CALLBACK('k', "clockid", &record.opts, @@ -2068,6 +2219,11 @@ static struct option __record_options[] = { OPT_CALLBACK(0, "affinity", &record.opts, "node|cpu", "Set affinity mask of trace reading thread to NUMA node cpu mask or cpu of processed mmap buffer", record__parse_affinity), +#ifdef HAVE_ZSTD_SUPPORT + OPT_CALLBACK_OPTARG('z', "compression-level", &record.opts, &comp_level_default, + "n", "Compressed records using specified level (default: 1 - fastest compression, 22 - greatest compression)", + record__parse_comp_level), +#endif OPT_END() }; @@ -2127,6 +2283,12 @@ int cmd_record(int argc, const char **argv) "cgroup monitoring only available in system-wide mode"); } + + if (rec->opts.comp_level != 0) { + pr_debug("Compression enabled, disabling build id collection at the end of the session.\n"); + rec->no_buildid = true; + } + if (rec->opts.record_switch_events && !perf_can_record_switch_events()) { ui__error("kernel does not support recording context switch events\n"); @@ -2272,12 +2434,15 @@ int cmd_record(int argc, const char **argv) if (rec->opts.nr_cblocks > nr_cblocks_max) rec->opts.nr_cblocks = nr_cblocks_max; - if (verbose > 0) - pr_info("nr_cblocks: %d\n", rec->opts.nr_cblocks); + pr_debug("nr_cblocks: %d\n", rec->opts.nr_cblocks); pr_debug("affinity: %s\n", affinity_tags[rec->opts.affinity]); pr_debug("mmap flush: %d\n", rec->opts.mmap_flush); + if (rec->opts.comp_level > comp_level_max) + rec->opts.comp_level = comp_level_max; + pr_debug("comp level: %d\n", rec->opts.comp_level); + err = __cmd_record(&record, argc, argv); out: perf_evlist__delete(rec->evlist); |