diff options
author | Kan Liang <kan.liang@linux.intel.com> | 2022-07-18 09:43:10 -0700 |
---|---|---|
committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2022-07-25 12:28:00 -0300 |
commit | bc2373a58aacb241bf135b9a7452a1499d0d457e (patch) | |
tree | e4f7f6694618cea87bd00a6f2a5251b463b91a0d | |
parent | 9fe9b252c7c022df8e503435e778f15c04dfa3bf (diff) | |
download | linux-stable-bc2373a58aacb241bf135b9a7452a1499d0d457e.tar.gz linux-stable-bc2373a58aacb241bf135b9a7452a1499d0d457e.tar.bz2 linux-stable-bc2373a58aacb241bf135b9a7452a1499d0d457e.zip |
perf tsc: Add arch TSC frequency information
The TSC frequency information is required for the event metrics with the
literal, system_tsc_freq. For the newer Intel platform, the TSC
frequency information can be retrieved from the CPUID leaf 0x15. If the
TSC frequency information isn't present the /proc/cpuinfo approach is
used.
Refactor cpuid() for this use. Note, the previous stack pushing/popping
approach was broken on x86-64 that has stack red zones that would be
clobbered.
Committer testing:
Before:
$ perf record sleep 0.0001
[ perf record: Woken up 1 times to write data ]
$ perf report --header-only |& grep cpuid
# cpuid : AuthenticAMD,25,33,0
$
After the patch:
$ perf record sleep 0.0001
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 0.002 MB perf.data (8 samples) ]
$ perf report --header-only |& grep cpuid
# cpuid : AuthenticAMD,25,33,0
$
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kshipra Bopardikar <kshipra.bopardikar@intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20220718164312.3994191-2-irogers@google.com
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
-rw-r--r-- | tools/perf/arch/x86/util/cpuid.h | 34 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/header.c | 27 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/tsc.c | 33 | ||||
-rw-r--r-- | tools/perf/util/expr.c | 13 | ||||
-rw-r--r-- | tools/perf/util/tsc.h | 1 |
5 files changed, 92 insertions, 16 deletions
diff --git a/tools/perf/arch/x86/util/cpuid.h b/tools/perf/arch/x86/util/cpuid.h new file mode 100644 index 000000000000..0a3ae0ace7e9 --- /dev/null +++ b/tools/perf/arch/x86/util/cpuid.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef PERF_CPUID_H +#define PERF_CPUID_H 1 + + +static inline void +cpuid(unsigned int op, unsigned int op2, unsigned int *a, unsigned int *b, + unsigned int *c, unsigned int *d) +{ + /* + * Preserve %ebx/%rbx register by either placing it in %rdi or saving it + * on the stack - x86-64 needs to avoid the stack red zone. In PIC + * compilations %ebx contains the address of the global offset + * table. %rbx is occasionally used to address stack variables in + * presence of dynamic allocas. + */ + asm( +#if defined(__x86_64__) + "mov %%rbx, %%rdi\n" + "cpuid\n" + "xchg %%rdi, %%rbx\n" +#else + "pushl %%ebx\n" + "cpuid\n" + "movl %%ebx, %%edi\n" + "popl %%ebx\n" +#endif + : "=a"(*a), "=D"(*b), "=c"(*c), "=d"(*d) + : "a"(op), "2"(op2)); +} + +void get_cpuid_0(char *vendor, unsigned int *lvl); + +#endif diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 578c8c568ffd..a51444a77a5f 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -9,18 +9,17 @@ #include "../../../util/debug.h" #include "../../../util/header.h" +#include "cpuid.h" -static inline void -cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c, - unsigned int *d) +void get_cpuid_0(char *vendor, unsigned int *lvl) { - __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t" - "movl %%ebx, %%esi\n\t.byte 0x5b" - : "=a" (*a), - "=S" (*b), - "=c" (*c), - "=d" (*d) - : "a" (op)); + unsigned int b, c, d; + + cpuid(0, 0, lvl, &b, &c, &d); + strncpy(&vendor[0], (char *)(&b), 4); + strncpy(&vendor[4], (char *)(&d), 4); + strncpy(&vendor[8], (char *)(&c), 4); + vendor[12] = '\0'; } static int @@ -31,14 +30,10 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) int nb; char vendor[16]; - cpuid(0, &lvl, &b, &c, &d); - strncpy(&vendor[0], (char *)(&b), 4); - strncpy(&vendor[4], (char *)(&d), 4); - strncpy(&vendor[8], (char *)(&c), 4); - vendor[12] = '\0'; + get_cpuid_0(vendor, &lvl); if (lvl >= 1) { - cpuid(1, &a, &b, &c, &d); + cpuid(1, 0, &a, &b, &c, &d); family = (a >> 8) & 0xf; /* bits 11 - 8 */ model = (a >> 4) & 0xf; /* Bits 7 - 4 */ diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 559365f8fe52..b69144f22489 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> +#include <string.h> #include "../../../util/tsc.h" +#include "cpuid.h" u64 rdtsc(void) { @@ -11,3 +13,34 @@ u64 rdtsc(void) return low | ((u64)high) << 32; } + +double arch_get_tsc_freq(void) +{ + unsigned int a, b, c, d, lvl; + static bool cached; + static double tsc; + char vendor[16]; + + if (cached) + return tsc; + + cached = true; + get_cpuid_0(vendor, &lvl); + if (!strstr(vendor, "Intel")) + return 0; + + /* + * Don't support Time Stamp Counter and + * Nominal Core Crystal Clock Information Leaf. + */ + if (lvl < 0x15) + return 0; + + cpuid(0x15, 0, &a, &b, &c, &d); + /* TSC frequency is not enumerated */ + if (!a || !b || !c) + return 0; + + tsc = (double)c * (double)b / (double)a; + return tsc; +} diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index 675f318ce7c1..c15a9852fa41 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -12,6 +12,7 @@ #include "expr-bison.h" #include "expr-flex.h" #include "smt.h" +#include "tsc.h" #include <linux/err.h> #include <linux/kernel.h> #include <linux/zalloc.h> @@ -402,6 +403,13 @@ double expr_id_data__source_count(const struct expr_id_data *data) return data->val.source_count; } +#if !defined(__i386__) && !defined(__x86_64__) +double arch_get_tsc_freq(void) +{ + return 0.0; +} +#endif + double expr__get_literal(const char *literal) { static struct cpu_topology *topology; @@ -417,6 +425,11 @@ double expr__get_literal(const char *literal) goto out; } + if (!strcasecmp("#system_tsc_freq", literal)) { + result = arch_get_tsc_freq(); + goto out; + } + /* * Assume that topology strings are consistent, such as CPUs "0-1" * wouldn't be listed as "0,1", and so after deduplication the number of diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 7d83a31732a7..88fd1c4c1cb8 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -25,6 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); +double arch_get_tsc_freq(void); size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); |