summaryrefslogtreecommitdiffstats
path: root/tools/perf/util
diff options
context:
space:
mode:
authorAlexey Budankov <alexey.budankov@linux.intel.com>2018-11-06 12:04:58 +0300
committerArnaldo Carvalho de Melo <acme@redhat.com>2018-12-17 14:55:08 -0300
commitd3d1af6f011a553a00d2bda90b2700c0d56bd8f7 (patch)
tree8f3deb8dcd8b095b3e876fee7d5825c851b10c1c /tools/perf/util
parent0b77383134f3dbb461189a9c4f3b46b20152045d (diff)
downloadlinux-d3d1af6f011a553a00d2bda90b2700c0d56bd8f7.tar.gz
linux-d3d1af6f011a553a00d2bda90b2700c0d56bd8f7.tar.bz2
linux-d3d1af6f011a553a00d2bda90b2700c0d56bd8f7.zip
perf record: Enable asynchronous trace writing
The trace file offset is read once before mmaps iterating loop and written back after all performance data is enqueued for aio writing. The trace file offset is incremented linearly after every successful aio write operation. record__aio_sync() blocks till completion of the started AIO operation and then proceeds. record__aio_mmap_read_sync() implements a barrier for all incomplete aio write requests. Signed-off-by: Alexey Budankov <alexey.budankov@linux.intel.com> Reviewed-by: Jiri Olsa <jolsa@redhat.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/ce2d45e9-d236-871c-7c8f-1bed2d37e8ac@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util')
-rw-r--r--tools/perf/util/evlist.c6
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/mmap.c77
-rw-r--r--tools/perf/util/mmap.h14
4 files changed, 94 insertions, 5 deletions
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6f010b9f0a81..e90575192209 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1018,7 +1018,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
*/
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite)
+ bool auxtrace_overwrite, int nr_cblocks)
{
struct perf_evsel *evsel;
const struct cpu_map *cpus = evlist->cpus;
@@ -1028,7 +1028,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
* Its value is decided by evsel's write_backward.
* So &mp should not be passed through const pointer.
*/
- struct mmap_params mp = { .nr_cblocks = 0 };
+ struct mmap_params mp = { .nr_cblocks = nr_cblocks };
if (!evlist->mmap)
evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
@@ -1060,7 +1060,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
{
- return perf_evlist__mmap_ex(evlist, pages, 0, false);
+ return perf_evlist__mmap_ex(evlist, pages, 0, false, 0);
}
int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index d108d167eb36..868294491194 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -162,7 +162,7 @@ unsigned long perf_event_mlock_kb_in_pages(void);
int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
unsigned int auxtrace_pages,
- bool auxtrace_overwrite);
+ bool auxtrace_overwrite, int nr_cblocks);
int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
void perf_evlist__munmap(struct perf_evlist *evlist);
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 47cdc3ad6546..61aa381d05d0 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -158,7 +158,8 @@ static int perf_mmap__aio_mmap(struct perf_mmap *map, struct mmap_params *mp)
{
int delta_max;
- if (mp->nr_cblocks) {
+ map->aio.nr_cblocks = mp->nr_cblocks;
+ if (map->aio.nr_cblocks) {
map->aio.data = malloc(perf_mmap__mmap_len(map));
if (!map->aio.data) {
pr_debug2("failed to allocate data buffer, error %m\n");
@@ -187,6 +188,80 @@ static void perf_mmap__aio_munmap(struct perf_mmap *map)
if (map->aio.data)
zfree(&map->aio.data);
}
+
+int perf_mmap__aio_push(struct perf_mmap *md, void *to,
+ int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
+ off_t *off)
+{
+ u64 head = perf_mmap__read_head(md);
+ unsigned char *data = md->base + page_size;
+ unsigned long size, size0 = 0;
+ void *buf;
+ int rc = 0;
+
+ rc = perf_mmap__read_init(md);
+ if (rc < 0)
+ return (rc == -EAGAIN) ? 0 : -1;
+
+ /*
+ * md->base data is copied into md->data buffer to
+ * release space in the kernel buffer as fast as possible,
+ * thru perf_mmap__consume() below.
+ *
+ * That lets the kernel to proceed with storing more
+ * profiling data into the kernel buffer earlier than other
+ * per-cpu kernel buffers are handled.
+ *
+ * Coping can be done in two steps in case the chunk of
+ * profiling data crosses the upper bound of the kernel buffer.
+ * In this case we first move part of data from md->start
+ * till the upper bound and then the reminder from the
+ * beginning of the kernel buffer till the end of
+ * the data chunk.
+ */
+
+ size = md->end - md->start;
+
+ if ((md->start & md->mask) + size != (md->end & md->mask)) {
+ buf = &data[md->start & md->mask];
+ size = md->mask + 1 - (md->start & md->mask);
+ md->start += size;
+ memcpy(md->aio.data, buf, size);
+ size0 = size;
+ }
+
+ buf = &data[md->start & md->mask];
+ size = md->end - md->start;
+ md->start += size;
+ memcpy(md->aio.data + size0, buf, size);
+
+ /*
+ * Increment md->refcount to guard md->data buffer
+ * from premature deallocation because md object can be
+ * released earlier than aio write request started
+ * on mmap->data is complete.
+ *
+ * perf_mmap__put() is done at record__aio_complete()
+ * after started request completion.
+ */
+ perf_mmap__get(md);
+
+ md->prev = head;
+ perf_mmap__consume(md);
+
+ rc = push(to, &md->aio.cblock, md->aio.data, size0 + size, *off);
+ if (!rc) {
+ *off += size0 + size;
+ } else {
+ /*
+ * Decrement md->refcount back if aio write
+ * operation failed to start.
+ */
+ perf_mmap__put(md);
+ }
+
+ return rc;
+}
#else
static int perf_mmap__aio_mmap(struct perf_mmap *map __maybe_unused,
struct mmap_params *mp __maybe_unused)
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index 3f10ad030c5e..b99213ba11b5 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -12,6 +12,7 @@
#include "auxtrace.h"
#include "event.h"
+struct aiocb;
/**
* struct perf_mmap - perf's ring buffer mmap details
*
@@ -33,6 +34,7 @@ struct perf_mmap {
struct {
void *data;
struct aiocb cblock;
+ int nr_cblocks;
} aio;
#endif
};
@@ -94,6 +96,18 @@ union perf_event *perf_mmap__read_event(struct perf_mmap *map);
int perf_mmap__push(struct perf_mmap *md, void *to,
int push(struct perf_mmap *map, void *to, void *buf, size_t size));
+#ifdef HAVE_AIO_SUPPORT
+int perf_mmap__aio_push(struct perf_mmap *md, void *to,
+ int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off),
+ off_t *off);
+#else
+static inline int perf_mmap__aio_push(struct perf_mmap *md __maybe_unused, void *to __maybe_unused,
+ int push(void *to, struct aiocb *cblock, void *buf, size_t size, off_t off) __maybe_unused,
+ off_t *off __maybe_unused)
+{
+ return 0;
+}
+#endif
size_t perf_mmap__mmap_len(struct perf_mmap *map);