diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-09-09 20:06:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-09-09 20:06:17 -0700 |
commit | 535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f (patch) | |
tree | a42e088342dac365cfa13f30d987118f2a5d259e /tools/perf/dlfilters | |
parent | fd3a5940e66d059d375bdb9e2d7d06c56f630d7e (diff) | |
parent | 45fc4628c15ab2cb7b2f53354b21db63f0a41f81 (diff) | |
download | linux-stable-535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f.tar.gz linux-stable-535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f.tar.bz2 linux-stable-535a265d7f0dd50d8c3a4f8b4f3a452d56bd160f.zip |
Merge tag 'perf-tools-for-v6.6-1-2023-09-05' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf tools updates from Arnaldo Carvalho de Melo:
"perf tools maintainership:
- Add git information for perf-tools and perf-tools-next trees and
branches to the MAINTAINERS file. That is where development now
takes place and myself and Namhyung Kim have write access, more
people to come as we emulate other maintainer groups.
perf record:
- Record kernel data maps when 'perf record --data' is used, so that
global variables can be resolved and used in tools that do data
profiling.
perf trace:
- Remove the old, experimental support for BPF events in which a .c
file was passed as an event: "perf trace -e hello.c" to then get
compiled and loaded.
The only known usage for that, that shipped with the kernel as an
example for such events, augmented the raw_syscalls tracepoints and
was converted to a libbpf skeleton, reusing all the user space
components and the BPF code connected to the syscalls.
In the end just the way to glue the BPF part and the user space
type beautifiers changed, now being performed by libbpf skeletons.
The next step is to use BTF to do pretty printing of all syscall
types, as discussed with Alan Maguire and others.
Now, on a perf built with BUILD_BPF_SKEL=1 we get most if not all
path/filenames/strings, some of the networking data structures,
perf_event_attr, etc, i.e. systemwide tracing of nanosleep calls
and perf_event_open syscalls while 'perf stat' runs 'sleep' for 5
seconds:
# perf trace -a -e *nanosleep,perf* perf stat -e cycles,instructions sleep 5
0.000 ( 9.034 ms): perf/327641 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0 (PERF_COUNT_HW_CPU_CYCLES), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 327642 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
9.039 ( 0.006 ms): perf/327641 perf_event_open(attr_uptr: { type: 0 (PERF_TYPE_HARDWARE), size: 136, config: 0x1 (PERF_COUNT_HW_INSTRUCTIONS), sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 327642 (perf-exec), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
? ( ): gpm/991 ... [continued]: clock_nanosleep()) = 0
10.133 ( ): sleep/327642 clock_nanosleep(rqtp: { .tv_sec: 5, .tv_nsec: 0 }, rmtp: 0x7ffd36f83ed0) ...
? ( ): pool-gsd-smart/3051 ... [continued]: clock_nanosleep()) = 0
30.276 ( ): gpm/991 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffcc6f73710) ...
223.215 (1000.430 ms): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) = 0
30.276 (2000.394 ms): gpm/991 ... [continued]: clock_nanosleep()) = 0
1230.814 ( ): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) ...
1230.814 (1000.404 ms): pool-gsd-smart/3051 ... [continued]: clock_nanosleep()) = 0
2030.886 ( ): gpm/991 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffcc6f73710) ...
2237.709 (1000.153 ms): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) = 0
? ( ): crond/1172 ... [continued]: clock_nanosleep()) = 0
3242.699 ( ): pool-gsd-smart/3051 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f6e7fffec90) ...
2030.886 (2000.385 ms): gpm/991 ... [continued]: clock_nanosleep()) = 0
3728.078 ( ): crond/1172 clock_nanosleep(rqtp: { .tv_sec: 60, .tv_nsec: 0 }, rmtp: 0x7ffe0971dcf0) ...
3242.699 (1000.158 ms): pool-gsd-smart/3051 ... [continued]: clock_nanosleep()) = 0
4031.409 ( ): gpm/991 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffcc6f73710) ...
10.133 (5000.375 ms): sleep/327642 ... [continued]: clock_nanosleep()) = 0
Performance counter stats for 'sleep 5':
2,617,347 cycles
1,855,997 instructions # 0.71 insn per cycle
5.002282128 seconds time elapsed
0.000855000 seconds user
0.000852000 seconds sys
perf annotate:
- Building with binutils' libopcode now is opt-in (BUILD_NONDISTRO=1)
for licensing reasons, and we missed a build test on
tools/perf/tests makefile.
Since we now default to NDEBUG=1, we ended up segfaulting when
building with BUILD_NONDISTRO=1 because a needed initialization
routine was being "error checked" via an assert.
Fix it by explicitly checking the result and aborting instead if it
fails.
We better back propagate the error, but at least 'perf annotate' on
samples collected for a BPF program is back working when perf is
built with BUILD_NONDISTRO=1.
perf report/top:
- Add back TUI hierarchy mode header, that is seen when using 'perf
report/top --hierarchy'.
- Fix the number of entries for 'e' key in the TUI that was
preventing navigation of lines when expanding an entry.
perf report/script:
- Support cross platform register handling, allowing a perf.data file
collected on one architecture to have registers sampled correctly
displayed when analysis tools such as 'perf report' and 'perf
script' are used on a different architecture.
- Fix handling of event attributes in pipe mode, i.e. when one uses:
perf record -o - | perf report -i -
When no perf.data files are used.
- Handle files generated via pipe mode with a version of perf and
then read also via pipe mode with a different version of perf,
where the event attr record may have changed, use the record size
field to properly support this version mismatch.
perf probe:
- Accessing global variables from uprobes isn't supported, make the
error message state that instead of stating that some minimal
kernel version is needed to have that feature. This seems just a
tool limitation, the kernel probably has all that is needed.
perf tests:
- Fix a reference count related leak in the dlfilter v0 API where the
result of a thread__find_symbol_fb() is not matched with an
addr_location__exit() to drop the reference counts of the resolved
components (machine, thread, map, symbol, etc). Add a dlfilter test
to make sure that doesn't regresses.
- Lots of fixes for the 'perf test' written in shell script related
to problems found with the shellcheck utility.
- Fixes for 'perf test' shell scripts testing features enabled when
perf is built with BUILD_BPF_SKEL=1, such as 'perf stat' bpf
counters.
- Add perf record sample filtering test, things like the following
example, that gets implemented as a BPF filter attached to the
event:
# perf record -e task-clock -c 10000 --filter 'ip < 0xffffffff00000000'
- Improve the way the task_analyzer test checks if libtraceevent is
linked, using 'perf version --build-options' instead of the more
expensinve 'perf record -e "sched:sched_switch"'.
- Add support for riscv in the mmap-basic test. (This went as well
via the RiscV tree, same contents).
libperf:
- Implement riscv mmap support (This went as well via the RiscV tree,
same contents).
perf script:
- New tool that converts perf.data files to the firefox profiler
format so that one can use the visualizer at
https://profiler.firefox.com/. Done by Anup Sharma as part of this
year's Google Summer of Code.
One can generate the output and upload it to the web interface but
Anup also automated everything:
perf script gecko -F 99 -a sleep 60
- Support syscall name parsing on arm64.
- Print "cgroup" field on the same line as "comm".
perf bench:
- Add new 'uprobe' benchmark to measure the overhead of uprobes
with/without BPF programs attached to it.
- breakpoints are not available on power9, skip that test.
perf stat:
- Add #num_cpus_online literal to be used in 'perf stat' metrics, and
add this extra 'perf test' check that exemplifies its purpose:
TEST_ASSERT_VAL("#num_cpus_online",
expr__parse(&num_cpus_online, ctx, "#num_cpus_online") == 0);
TEST_ASSERT_VAL("#num_cpus", expr__parse(&num_cpus, ctx, "#num_cpus") == 0);
TEST_ASSERT_VAL("#num_cpus >= #num_cpus_online", num_cpus >= num_cpus_online);
Miscellaneous:
- Improve tool startup time by lazily reading PMU, JSON, sysfs data.
- Improve error reporting in the parsing of events, passing YYLTYPE
to error routines, so that the output can show were the parsing
error was found.
- Add 'perf test' entries to check the parsing of events
improvements.
- Fix various leak for things detected by -fsanitize=address, mostly
things that would be freed at tool exit, including:
- Free evsel->filter on the destructor.
- Allow tools to register a thread->priv destructor and use it in
'perf trace'.
- Free evsel->priv in 'perf trace'.
- Free string returned by synthesize_perf_probe_point() when the
caller fails to do all it needs.
- Adjust various compiler options to not consider errors some
warnings when building with broken headers found in things like
python, flex, bison, as we otherwise build with -Werror. Some for
gcc, some for clang, some for some specific version of those, some
for some specific version of flex or bison, or some specific
combination of these components, bah.
- Allow customization of clang options for BPF target, this helps
building on gentoo where there are other oddities where BPF targets
gets passed some compiler options intended for the native build, so
building with WERROR=0 helps while these oddities are fixed.
- Dont pass ERR_PTR() values to perf_session__delete() in 'perf top'
and 'perf lock', fixing some segfaults when handling some odd
failures.
- Add LTO build option.
- Fix format of unordered lists in the perf docs
(tools/perf/Documentation)
- Overhaul the bison files, using constructs such as YYNOMEM.
- Remove unused tokens from the bison .y files.
- Add more comments to various structs.
- A few LoongArch enablement patches.
Vendor events (JSON):
- Add JSON metrics for Yitian 710 DDR (aarch64). Things like:
EventName, BriefDescription
visible_window_limit_reached_rd, "At least one entry in read queue reaches the visible window limit.",
visible_window_limit_reached_wr, "At least one entry in write queue reaches the visible window limit.",
op_is_dqsosc_mpc , "A DQS Oscillator MPC command to DRAM.",
op_is_dqsosc_mrr , "A DQS Oscillator MRR command to DRAM.",
op_is_tcr_mrr , "A Temperature Compensated Refresh(TCR) MRR command to DRAM.",
- Add AmpereOne metrics (aarch64).
- Update N2 and V2 metrics (aarch64) and events using Arm telemetry
repo.
- Update scale units and descriptions of common topdown metrics on
aarch64. Things like:
- "MetricExpr": "stall_slot_frontend / (#slots * cpu_cycles)",
- "BriefDescription": "Frontend bound L1 topdown metric",
+ "MetricExpr": "100 * (stall_slot_frontend / (#slots * cpu_cycles))",
+ "BriefDescription": "This metric is the percentage of total slots that were stalled due to resource constraints in the frontend of the processor.",
- Update events for intel: meteorlake to 1.04, sapphirerapids to
1.15, Icelake+ metric constraints.
- Update files for the power10 platform"
* tag 'perf-tools-for-v6.6-1-2023-09-05' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (217 commits)
perf parse-events: Fix driver config term
perf parse-events: Fixes relating to no_value terms
perf parse-events: Fix propagation of term's no_value when cloning
perf parse-events: Name the two term enums
perf list: Don't print Unit for "default_core"
perf vendor events intel: Fix modifier in tma_info_system_mem_parallel_reads for skylake
perf dlfilter: Avoid leak in v0 API test use of resolve_address()
perf metric: Add #num_cpus_online literal
perf pmu: Remove str from perf_pmu_alias
perf parse-events: Make common term list to strbuf helper
perf parse-events: Minor help message improvements
perf pmu: Avoid uninitialized use of alias->str
perf jevents: Use "default_core" for events with no Unit
perf test stat_bpf_counters_cgrp: Enhance perf stat cgroup BPF counter test
perf test shell stat_bpf_counters: Fix test on Intel
perf test shell record_bpf_filter: Skip 6.2 kernel
libperf: Get rid of attr.id field
perf tools: Convert to perf_record_header_attr_id()
libperf: Add perf_record_header_attr_id()
perf tools: Handle old data in PERF_RECORD_ATTR
...
Diffstat (limited to 'tools/perf/dlfilters')
-rw-r--r-- | tools/perf/dlfilters/dlfilter-test-api-v0.c | 26 | ||||
-rw-r--r-- | tools/perf/dlfilters/dlfilter-test-api-v2.c | 377 |
2 files changed, 402 insertions, 1 deletions
diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index b1f51efd67d6..72f263d49121 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -254,6 +254,30 @@ static int check_addr_al(void *ctx) return 0; } +static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample) +{ + struct perf_dlfilter_al address_al; + const struct perf_dlfilter_al *al; + + al = perf_dlfilter_fns.resolve_ip(ctx); + if (!al) + return test_fail("resolve_ip() failed"); + + address_al.size = sizeof(address_al); + if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al)) + return test_fail("resolve_address() failed"); + + CHECK(address_al.sym && al->sym); + CHECK(!strcmp(address_al.sym, al->sym)); + CHECK(address_al.addr == al->addr); + CHECK(address_al.sym_start == al->sym_start); + CHECK(address_al.sym_end == al->sym_end); + CHECK(address_al.dso && al->dso); + CHECK(!strcmp(address_al.dso, al->dso)); + + return 0; +} + static int check_attr(void *ctx) { struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx); @@ -290,7 +314,7 @@ static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void if (early && !d->do_early) return 0; - if (check_al(ctx) || check_addr_al(ctx)) + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) return -1; if (early) diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c new file mode 100644 index 000000000000..38e593d92920 --- /dev/null +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -0,0 +1,377 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test v2 API for perf --dlfilter shared object + * Copyright (c) 2023, Intel Corporation. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> + +/* + * Copy v2 API instead of including current API + */ +#include <linux/perf_event.h> +#include <linux/types.h> + +/* + * The following macro can be used to determine if this header defines + * perf_dlfilter_sample machine_pid and vcpu. + */ +#define PERF_DLFILTER_HAS_MACHINE_PID + +/* Definitions for perf_dlfilter_sample flags */ +enum { + PERF_DLFILTER_FLAG_BRANCH = 1ULL << 0, + PERF_DLFILTER_FLAG_CALL = 1ULL << 1, + PERF_DLFILTER_FLAG_RETURN = 1ULL << 2, + PERF_DLFILTER_FLAG_CONDITIONAL = 1ULL << 3, + PERF_DLFILTER_FLAG_SYSCALLRET = 1ULL << 4, + PERF_DLFILTER_FLAG_ASYNC = 1ULL << 5, + PERF_DLFILTER_FLAG_INTERRUPT = 1ULL << 6, + PERF_DLFILTER_FLAG_TX_ABORT = 1ULL << 7, + PERF_DLFILTER_FLAG_TRACE_BEGIN = 1ULL << 8, + PERF_DLFILTER_FLAG_TRACE_END = 1ULL << 9, + PERF_DLFILTER_FLAG_IN_TX = 1ULL << 10, + PERF_DLFILTER_FLAG_VMENTRY = 1ULL << 11, + PERF_DLFILTER_FLAG_VMEXIT = 1ULL << 12, +}; + +/* + * perf sample event information (as per perf script and <linux/perf_event.h>) + */ +struct perf_dlfilter_sample { + __u32 size; /* Size of this structure (for compatibility checking) */ + __u16 ins_lat; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ + __u16 p_stage_cyc; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ + __u64 ip; + __s32 pid; + __s32 tid; + __u64 time; + __u64 addr; + __u64 id; + __u64 stream_id; + __u64 period; + __u64 weight; /* Refer PERF_SAMPLE_WEIGHT_TYPE in <linux/perf_event.h> */ + __u64 transaction; /* Refer PERF_SAMPLE_TRANSACTION in <linux/perf_event.h> */ + __u64 insn_cnt; /* For instructions-per-cycle (IPC) */ + __u64 cyc_cnt; /* For instructions-per-cycle (IPC) */ + __s32 cpu; + __u32 flags; /* Refer PERF_DLFILTER_FLAG_* above */ + __u64 data_src; /* Refer PERF_SAMPLE_DATA_SRC in <linux/perf_event.h> */ + __u64 phys_addr; /* Refer PERF_SAMPLE_PHYS_ADDR in <linux/perf_event.h> */ + __u64 data_page_size; /* Refer PERF_SAMPLE_DATA_PAGE_SIZE in <linux/perf_event.h> */ + __u64 code_page_size; /* Refer PERF_SAMPLE_CODE_PAGE_SIZE in <linux/perf_event.h> */ + __u64 cgroup; /* Refer PERF_SAMPLE_CGROUP in <linux/perf_event.h> */ + __u8 cpumode; /* Refer CPUMODE_MASK etc in <linux/perf_event.h> */ + __u8 addr_correlates_sym; /* True => resolve_addr() can be called */ + __u16 misc; /* Refer perf_event_header in <linux/perf_event.h> */ + __u32 raw_size; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */ + const void *raw_data; /* Refer PERF_SAMPLE_RAW in <linux/perf_event.h> */ + __u64 brstack_nr; /* Number of brstack entries */ + const struct perf_branch_entry *brstack; /* Refer <linux/perf_event.h> */ + __u64 raw_callchain_nr; /* Number of raw_callchain entries */ + const __u64 *raw_callchain; /* Refer <linux/perf_event.h> */ + const char *event; + __s32 machine_pid; + __s32 vcpu; +}; + +/* + * Address location (as per perf script) + */ +struct perf_dlfilter_al { + __u32 size; /* Size of this structure (for compatibility checking) */ + __u32 symoff; + const char *sym; + __u64 addr; /* Mapped address (from dso) */ + __u64 sym_start; + __u64 sym_end; + const char *dso; + __u8 sym_binding; /* STB_LOCAL, STB_GLOBAL or STB_WEAK, refer <elf.h> */ + __u8 is_64_bit; /* Only valid if dso is not NULL */ + __u8 is_kernel_ip; /* True if in kernel space */ + __u32 buildid_size; + __u8 *buildid; + /* Below members are only populated by resolve_ip() */ + __u8 filtered; /* True if this sample event will be filtered out */ + const char *comm; + void *priv; /* Private data (v2 API) */ +}; + +struct perf_dlfilter_fns { + /* Return information about ip */ + const struct perf_dlfilter_al *(*resolve_ip)(void *ctx); + /* Return information about addr (if addr_correlates_sym) */ + const struct perf_dlfilter_al *(*resolve_addr)(void *ctx); + /* Return arguments from --dlarg option */ + char **(*args)(void *ctx, int *dlargc); + /* + * Return information about address (al->size must be set before + * calling). Returns 0 on success, -1 otherwise. Call al_cleanup() + * when 'al' data is no longer needed. + */ + __s32 (*resolve_address)(void *ctx, __u64 address, struct perf_dlfilter_al *al); + /* Return instruction bytes and length */ + const __u8 *(*insn)(void *ctx, __u32 *length); + /* Return source file name and line number */ + const char *(*srcline)(void *ctx, __u32 *line_number); + /* Return perf_event_attr, refer <linux/perf_event.h> */ + struct perf_event_attr *(*attr)(void *ctx); + /* Read object code, return numbers of bytes read */ + __s32 (*object_code)(void *ctx, __u64 ip, void *buf, __u32 len); + /* + * If present (i.e. must check al_cleanup != NULL), call after + * resolve_address() to free any associated resources. (v2 API) + */ + void (*al_cleanup)(void *ctx, struct perf_dlfilter_al *al); + /* Reserved */ + void *(*reserved[119])(void *); +}; + +struct perf_dlfilter_fns perf_dlfilter_fns; + +static int verbose; + +#define pr_debug(fmt, ...) do { \ + if (verbose > 0) \ + fprintf(stderr, fmt, ##__VA_ARGS__); \ + } while (0) + +static int test_fail(const char *msg) +{ + pr_debug("%s\n", msg); + return -1; +} + +#define CHECK(x) do { \ + if (!(x)) \ + return test_fail("Check '" #x "' failed\n"); \ + } while (0) + +struct filter_data { + __u64 ip; + __u64 addr; + int do_early; + int early_filter_cnt; + int filter_cnt; +}; + +static struct filter_data *filt_dat; + +int start(void **data, void *ctx) +{ + int dlargc; + char **dlargv; + struct filter_data *d; + static bool called; + + verbose = 1; + + CHECK(!filt_dat && !called); + called = true; + + d = calloc(1, sizeof(*d)); + if (!d) + test_fail("Failed to allocate memory"); + filt_dat = d; + *data = d; + + dlargv = perf_dlfilter_fns.args(ctx, &dlargc); + + CHECK(dlargc == 6); + CHECK(!strcmp(dlargv[0], "first")); + verbose = strtol(dlargv[1], NULL, 0); + d->ip = strtoull(dlargv[2], NULL, 0); + d->addr = strtoull(dlargv[3], NULL, 0); + d->do_early = strtol(dlargv[4], NULL, 0); + CHECK(!strcmp(dlargv[5], "last")); + + pr_debug("%s API\n", __func__); + + return 0; +} + +#define CHECK_SAMPLE(x) do { \ + if (sample->x != expected.x) \ + return test_fail("'" #x "' not expected value\n"); \ + } while (0) + +static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample *sample) +{ + struct perf_dlfilter_sample expected = { + .ip = d->ip, + .pid = 12345, + .tid = 12346, + .time = 1234567890, + .addr = d->addr, + .id = 99, + .stream_id = 101, + .period = 543212345, + .cpu = 31, + .cpumode = PERF_RECORD_MISC_USER, + .addr_correlates_sym = 1, + .misc = PERF_RECORD_MISC_USER, + }; + + CHECK(sample->size >= sizeof(struct perf_dlfilter_sample)); + + CHECK_SAMPLE(ip); + CHECK_SAMPLE(pid); + CHECK_SAMPLE(tid); + CHECK_SAMPLE(time); + CHECK_SAMPLE(addr); + CHECK_SAMPLE(id); + CHECK_SAMPLE(stream_id); + CHECK_SAMPLE(period); + CHECK_SAMPLE(cpu); + CHECK_SAMPLE(cpumode); + CHECK_SAMPLE(addr_correlates_sym); + CHECK_SAMPLE(misc); + + CHECK(!sample->raw_data); + CHECK_SAMPLE(brstack_nr); + CHECK(!sample->brstack); + CHECK_SAMPLE(raw_callchain_nr); + CHECK(!sample->raw_callchain); + +#define EVENT_NAME "branches:" + CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); + + return 0; +} + +static int check_al(void *ctx) +{ + const struct perf_dlfilter_al *al; + + al = perf_dlfilter_fns.resolve_ip(ctx); + if (!al) + return test_fail("resolve_ip() failed"); + + CHECK(al->sym && !strcmp("foo", al->sym)); + CHECK(!al->symoff); + + return 0; +} + +static int check_addr_al(void *ctx) +{ + const struct perf_dlfilter_al *addr_al; + + addr_al = perf_dlfilter_fns.resolve_addr(ctx); + if (!addr_al) + return test_fail("resolve_addr() failed"); + + CHECK(addr_al->sym && !strcmp("bar", addr_al->sym)); + CHECK(!addr_al->symoff); + + return 0; +} + +static int check_address_al(void *ctx, const struct perf_dlfilter_sample *sample) +{ + struct perf_dlfilter_al address_al; + const struct perf_dlfilter_al *al; + + al = perf_dlfilter_fns.resolve_ip(ctx); + if (!al) + return test_fail("resolve_ip() failed"); + + address_al.size = sizeof(address_al); + if (perf_dlfilter_fns.resolve_address(ctx, sample->ip, &address_al)) + return test_fail("resolve_address() failed"); + + CHECK(address_al.sym && al->sym); + CHECK(!strcmp(address_al.sym, al->sym)); + CHECK(address_al.addr == al->addr); + CHECK(address_al.sym_start == al->sym_start); + CHECK(address_al.sym_end == al->sym_end); + CHECK(address_al.dso && al->dso); + CHECK(!strcmp(address_al.dso, al->dso)); + + /* al_cleanup() is v2 API so may not be present */ + if (perf_dlfilter_fns.al_cleanup) + perf_dlfilter_fns.al_cleanup(ctx, &address_al); + + return 0; +} + +static int check_attr(void *ctx) +{ + struct perf_event_attr *attr = perf_dlfilter_fns.attr(ctx); + + CHECK(attr); + CHECK(attr->type == PERF_TYPE_HARDWARE); + CHECK(attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + + return 0; +} + +static int do_checks(void *data, const struct perf_dlfilter_sample *sample, void *ctx, bool early) +{ + struct filter_data *d = data; + + CHECK(data && filt_dat == data); + + if (early) { + CHECK(!d->early_filter_cnt); + d->early_filter_cnt += 1; + } else { + CHECK(!d->filter_cnt); + CHECK(d->early_filter_cnt); + CHECK(d->do_early != 2); + d->filter_cnt += 1; + } + + if (check_sample(data, sample)) + return -1; + + if (check_attr(ctx)) + return -1; + + if (early && !d->do_early) + return 0; + + if (check_al(ctx) || check_addr_al(ctx) || check_address_al(ctx, sample)) + return -1; + + if (early) + return d->do_early == 2; + + return 1; +} + +int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + pr_debug("%s API\n", __func__); + + return do_checks(data, sample, ctx, true); +} + +int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx) +{ + pr_debug("%s API\n", __func__); + + return do_checks(data, sample, ctx, false); +} + +int stop(void *data, void *ctx) +{ + static bool called; + + pr_debug("%s API\n", __func__); + + CHECK(data && filt_dat == data && !called); + called = true; + + free(data); + filt_dat = NULL; + return 0; +} + +const char *filter_description(const char **long_description) +{ + *long_description = "Filter used by the 'dlfilter C API' perf test"; + return "dlfilter to test v2 C API"; +} |