summaryrefslogtreecommitdiffstats
path: root/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
diff options
context:
space:
mode:
authorGerman Gomez <german.gomez@arm.com>2023-01-20 14:37:00 +0000
committerArnaldo Carvalho de Melo <acme@redhat.com>2023-01-22 18:17:45 -0300
commita7fe9a443b6064c68f86a2ee09bdfa7736660ef3 (patch)
treef51359d0c2dde108b84345db0150fe256e1aa11a /tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
parent2e2f7ceecc19fcac31bc194485e96a3b67b7d65e (diff)
downloadlinux-stable-a7fe9a443b6064c68f86a2ee09bdfa7736660ef3.tar.gz
linux-stable-a7fe9a443b6064c68f86a2ee09bdfa7736660ef3.tar.bz2
linux-stable-a7fe9a443b6064c68f86a2ee09bdfa7736660ef3.zip
perf cs_etm: Set the time field in the synthetic samples
If virtual timestamps are detected, set sample time field accordingly, otherwise warn the user that the samples will not include accurate time data. | Test notes (FEAT_TRF platform) | | $ ./perf record -e cs_etm//u -a -- sleep 4 | $ ./perf script --fields +time | perf 422 [000] 163.375100: 1 branches:uH: 0 [unknown] ([unknown]) | perf 422 [000] 163.375100: 1 branches:uH: ffffb8009544 ioctl+0x14 (/lib/aarch64-linux-gnu/libc-2.27.so) | perf 422 [000] 163.375100: 1 branches:uH: aaaaab6bebf4 perf_evsel__run_ioctl+0x90 (/home/german/linux/tools/perf/perf) | [...] | perf 422 [000] 167.393100: 1 branches:uH: aaaaab6bda00 __xyarray__entry+0x74 (/home/german/linux/tools/perf/perf) | perf 422 [000] 167.393099: 1 branches:uH: aaaaab6bda0c __xyarray__entry+0x80 (/home/german/linux/tools/perf/perf) | perf 422 [000] 167.393099: 1 branches:uH: ffffb8009538 ioctl+0x8 (/lib/aarch64-linux-gnu/libc-2.27.so) | | The time from the first sample to the last sample is 4 seconds Now that times are converted to nanoseconds, also try to estimate the timestamps more accurately be dividing by some fixed value for instructions per ns. This prevents long ranges from being estimated too far in the past than would be realistic. Signed-off-by: German Gomez <german.gomez@arm.com> Acked-by: Suzuki Poulouse <suzuki.poulose@arm.com> Tested-by: Tanmay Jagdale <tanmay@marvell.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Bharat Bhushan <bbhushan2@marvell.com> Cc: George Cherian <gcherian@marvell.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: John Garry <john.g.garry@oracle.com> Cc: Leo Yan <leo.yan@linaro.org> Cc: Linu Cherian <lcherian@marvell.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Poirier <mathieu.poirier@linaro.org> Cc: Mike Leach <mike.leach@linaro.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Sunil Kovvuri Goutham <sgoutham@marvell.com> Cc: Will Deacon <will@kernel.org> Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230120143702.4035046-8-james.clark@arm.com Signed-off-by: James Clark <james.clark@arm.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools/perf/util/cs-etm-decoder/cs-etm-decoder.c')
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c47
1 files changed, 39 insertions, 8 deletions
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 31fa3b45134a..440fe844ed17 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -30,6 +30,15 @@
#endif
#endif
+/*
+ * Assume a maximum of 0.1ns elapsed per instruction. This would be the
+ * case with a theoretical 10GHz core executing 1 instruction per cycle.
+ * Used to estimate the sample time for synthesized instructions because
+ * Coresight only emits a timestamp for a range of instructions rather
+ * than per instruction.
+ */
+const u32 INSTR_PER_NS = 10;
+
struct cs_etm_decoder {
void *data;
void (*packet_printer)(const char *msg);
@@ -112,6 +121,20 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
return 1;
}
+/*
+ * Calculate the number of nanoseconds elapsed.
+ *
+ * instr_count is updated in place with the remainder of the instructions
+ * which didn't make up a whole nanosecond.
+ */
+static u32 cs_etm_decoder__dec_instr_count_to_ns(u32 *instr_count)
+{
+ const u32 instr_copy = *instr_count;
+
+ *instr_count %= INSTR_PER_NS;
+ return instr_copy / INSTR_PER_NS;
+}
+
static int cs_etm_decoder__gen_etmv3_config(struct cs_etm_trace_params *params,
ocsd_etmv3_cfg *config)
{
@@ -267,8 +290,8 @@ cs_etm_decoder__do_soft_timestamp(struct cs_etm_queue *etmq,
packet_queue->cs_timestamp = packet_queue->next_cs_timestamp;
/* Estimate the timestamp for the next range packet */
- packet_queue->next_cs_timestamp += packet_queue->instr_count;
- packet_queue->instr_count = 0;
+ packet_queue->next_cs_timestamp +=
+ cs_etm_decoder__dec_instr_count_to_ns(&packet_queue->instr_count);
/* Tell the front end which traceid_queue needs attention */
cs_etm__etmq_set_traceid_queue_timestamp(etmq, trace_chan_id);
@@ -283,6 +306,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
const ocsd_trc_index_t indx)
{
struct cs_etm_packet_queue *packet_queue;
+ u64 converted_timestamp;
/* First get the packet queue for this traceID */
packet_queue = cs_etm__etmq_get_packet_queue(etmq, trace_chan_id);
@@ -290,17 +314,23 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
return OCSD_RESP_FATAL_SYS_ERR;
/*
+ * Coresight timestamps are raw timer values which need to be scaled to ns. Assume
+ * 0 is a bad value so don't try to convert it.
+ */
+ converted_timestamp = elem->timestamp ?
+ cs_etm__convert_sample_time(etmq, elem->timestamp) : 0;
+
+ /*
* We've seen a timestamp packet before - simply record the new value.
* Function do_soft_timestamp() will report the value to the front end,
* hence asking the decoder to keep decoding rather than stopping.
*/
if (packet_queue->cs_timestamp) {
- packet_queue->next_cs_timestamp = elem->timestamp;
+ packet_queue->next_cs_timestamp = converted_timestamp;
return OCSD_RESP_CONT;
}
-
- if (!elem->timestamp) {
+ if (!converted_timestamp) {
/*
* Zero timestamps can be seen due to misconfiguration or hardware bugs.
* Warn once, and don't try to subtract instr_count as it would result in an
@@ -312,7 +342,7 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
". Decoding may be improved by prepending 'Z' to your current --itrace arguments.\n",
indx);
- } else if (packet_queue->instr_count > elem->timestamp) {
+ } else if (packet_queue->instr_count / INSTR_PER_NS > converted_timestamp) {
/*
* Sanity check that the elem->timestamp - packet_queue->instr_count would not
* result in an underflow. Warn and clamp at 0 if it would.
@@ -327,9 +357,10 @@ cs_etm_decoder__do_hard_timestamp(struct cs_etm_queue *etmq,
* which instructions started by subtracting the number of instructions
* executed to the timestamp.
*/
- packet_queue->cs_timestamp = elem->timestamp - packet_queue->instr_count;
+ packet_queue->cs_timestamp = converted_timestamp -
+ (packet_queue->instr_count / INSTR_PER_NS);
}
- packet_queue->next_cs_timestamp = elem->timestamp;
+ packet_queue->next_cs_timestamp = converted_timestamp;
packet_queue->instr_count = 0;
/* Tell the front end which traceid_queue needs attention */