summaryrefslogtreecommitdiffstats
path: root/tools/perf/util
Commit message (Collapse)AuthorAgeFilesLines
...
* | perf data: Explicitly set byte order for integer typesWang Nan2016-03-031-0/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | After babeltrace commit 5cec03e402aa ("ir: copy variants and sequences when setting a field path"), 'perf data convert' gets incorrect result if there's bpf output data. For example: # perf data convert --to-ctf ./out.ctf # babeltrace ./out.ctf [10:44:31.186045346] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E7DD1, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0xC028E32F, [1] = 0x815D0100, [2] = 0x1000000 ] } [10:44:31.286101003] (+0.100055657) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF8105B609, perf_tid = 23819, perf_pid = 23819, perf_id = 518, raw_len = 3, raw_data = [ [0] = 0x35D9F1EB, [1] = 0x15D81, [2] = 0x2 ] } The expected result of the first sample should be: raw_data = [ [0] = 0x2FE328C0, [1] = 0x15D81, [2] = 0x1 ] } however, 'perf data convert' output big endian value to resuling CTF file. The reason is a internal change (or a bug?) of babeltrace. Before this patch, at the first add_bpf_output_values(), byte order of all integer type is uncertain (is 0, neither 1234 (le) nor 4321 (be)). It would be fixed by: perf_evlist__deliver_sample -> process_sample_event -> ctf_stream ... ->bt_ctf_trace_add_stream_class ->bt_ctf_field_type_structure_set_byte_order ->bt_ctf_field_type_integer_set_byte_order during creating the stream. However, the babeltrace commit mentioned above duplicates types in sequence to prevent potential conflict in following call stack and link the newly allocated type into the 'raw_data' sequence: perf_evlist__deliver_sample -> process_sample_event -> ctf_stream ... -> bt_ctf_trace_add_stream_class -> bt_ctf_stream_class_resolve_types ... -> bt_ctf_field_type_sequence_copy ->bt_ctf_field_type_integer_copy This happens before byte order setting, so only the newly allocated type is initialized, the byte order of original type perf choose to create the first raw_data is still uncertain. Byte order in CTF output is not related to byte order in perf.data. Setting it to anything other than BT_CTF_BYTE_ORDER_NATIVE solves this problem (only BT_CTF_BYTE_ORDER_NATIVE needs to be fixed). To reduce behavior changing, set byte order according to compiling options. Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Jérémie Galarneau <jeremie.galarneau@efficios.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-10-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf data: Support converting data from bpf_perf_event_output()Wang Nan2016-03-031-1/+111
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | bpf_perf_event_output() outputs data through sample->raw_data. This patch adds support to convert those data into CTF. A python script then can be used to process output data from BPF programs. Test result: # cat ./test_bpf_output_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; static inline int __attribute__((always_inline)) func(void *ctx, int type) { struct { u64 ktime; int type; } __attribute__((packed)) output_data; char error_data[] = "Error: failed to output\n"; int err; output_data.type = type; output_data.ktime = ktime_get_ns(); err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_data, sizeof(output_data)); if (err) trace_printk(error_data, sizeof(error_data)); return 0; } SEC("func_begin=sys_nanosleep") int func_begin(void *ctx) {return func(ctx, 1);} SEC("func_end=sys_nanosleep%return") int func_end(void *ctx) { return func(ctx, 2);} char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # ./perf record -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output_2.c/map:channel.event=evt/ \ usleep 100000 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data (2 samples) ] # ./perf script usleep 14942 92503.198504: evt: ffffffff810e0ba1 sys_nanosleep (/lib/modules/4.3.0.... usleep 14942 92503.298562: evt: ffffffff810585e9 kretprobe_trampoline_holder (/lib.... # ./perf data convert --to-ctf ./out.ctf [ perf data convert: Converted 'perf.data' into CTF data './out.ctf' ] [ perf data convert: Converted and wrote 0.000 MB (2 samples) ] # babeltrace ./out.ctf [01:41:43.198504134] (+?.?????????) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810E0BA1, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x32C0C07B, [1] = 0x5421, [2] = 0x1 ] } [01:41:43.298562257] (+0.100058123) evt: { cpu_id = 0 }, { perf_ip = 0xFFFFFFFF810585E9, perf_tid = 14942, perf_pid = 14942, perf_id = 1044, raw_len = 3, raw_data = [ [0] = 0x38B77FAA, [1] = 0x5421, [2] = 0x2 ] } # cat ./test_bpf_output_2.py from babeltrace import TraceCollection tc = TraceCollection() tc.add_trace('./out.ctf', 'ctf') d = {1:[], 2:[]} for event in tc.events: if not event.name.startswith('evt'): continue raw_data = event['raw_data'] (time, type) = ((raw_data[0] + (raw_data[1] << 32)), raw_data[2]) d[type].append(time) print(list(map(lambda i: d[2][i] - d[1][i], range(len(d[1]))))); # python3 ./test_bpf_output_2.py [100056879] Committer note: Make sure you have python3-devel installed, not python-devel, which may be for python2, which will lead to some "PyInstance_Type" errors. Also make sure that you use the right libbabeltrace, because it is shipped in Fedora, for instance, but an older version. To build libbabeltrace's python binding one also needs to use: ./configure --enable-python-bindings And then set PYTHONPATH=/usr/local/lib64/python3.4/site-packages/. Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-9-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Fix locale handling in pmu parsingJiri Olsa2016-03-031-0/+13
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Ingo reported regression on display format of big numbers, which is missing separators (in default perf stat output). triton:~/tip> perf stat -a sleep 1 ... 127008602 cycles # 0.011 GHz 279538533 stalled-cycles-frontend # 220.09% frontend cycles idle 119213269 instructions # 0.94 insn per cycle This is caused by recent change: perf stat: Check existence of frontend/backed stalled cycles that added call to pmu_have_event, that subsequently calls perf_pmu__parse_scale, which has a bug in locale handling. The lc string returned from setlocale, that we use to store old locale value, may be allocated in static storage. Getting a dynamic copy to make it survive another setlocale call. $ perf stat ls ... 2,360,602 cycles # 3.080 GHz 2,703,090 instructions # 1.15 insn per cycle 546,031 branches # 712.511 M/sec Committer note: Since the patch introducing the regression didn't made to perf/core, move it to just before where the regression was introduced, so that we don't break bisection for this feature. Reported-by: Ingo Molnar <mingo@redhat.com> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20160303095348.GA24511@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Fix python extension buildJiri Olsa2016-02-291-0/+4
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The util/python-ext-sources file contains source files required to build the python extension relative to $(srctree)/tools/perf, Such a file path $(FILE).c is handed over to the python extension build system, which builds the final object in the $(PYTHON_EXTBUILD)/tmp/$(FILE).o path. After the build is done all files from $(PYTHON_EXTBUILD)lib/ are carried as the result binaries. Above system fails when we add source file relative to ../lib, which we do for: ../lib/bitmap.c ../lib/find_bit.c ../lib/hweight.c ../lib/rbtree.c All above objects will be built like: $(PYTHON_EXTBUILD)/tmp/../lib/bitmap.c $(PYTHON_EXTBUILD)/tmp/../lib/find_bit.c $(PYTHON_EXTBUILD)/tmp/../lib/hweight.c $(PYTHON_EXTBUILD)/tmp/../lib/rbtree.c which accidentally happens to be final library path: $(PYTHON_EXTBUILD)/lib/ Changing setup.py to pass full paths of source files to Extension build class and thus keep all built objects under $(PYTHON_EXTBUILD)tmp directory. Reported-by: Jeff Bastian <jbastian@redhat.com> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Josh Boyer <jwboyer@fedoraproject.org> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: stable@vger.kernel.org # v4.2+ Link: http://lkml.kernel.org/r/20160227201350.GB28494@krava.redhat.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Only set filter for tracepoints eventsWang Nan2016-02-261-0/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | perf_evlist__set_filter() tries to set filter to every evsel linked in the evlist. However, since filters can only be applied to tracepoints, checking type of evsel before calling perf_evsel__set_filter() would be better. Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-6-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf config: Bring perf_default_config to the very beginning at main()Wang Nan2016-02-263-5/+7
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Before this patch each subcommand calls perf_config() by themself, reading the default configuration together with subcommand specific options. If a subcommand doesn't have it own options, it needs to call 'perf_config(perf_default_config, NULL)' to ensure .perfconfig is loaded. This patch brings perf_config(perf_default_config, NULL) to the very start of main(), so subcommands don't need to do it. After this patch, 'llvm.clang-path' works for 'perf trace'. Signed-off-by: Wang Nan <wangnan0@huawei.com> Suggested-and-Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456479154-136027-4-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf report: Update column width of dynamic entriesNamhyung Kim2016-02-262-3/+16
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The column width of dynamic entries is updated when comparing hist entries. However some unique entries can miss the chance to update. So move the update to output resort stage to make sure every entry will get called before display. To do that, abuse ->sort callback to update the width when the third argument is NULL. When resorting entries in normal path, it never be NULL so it should be fine IMHO. Before: # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP <-- here After: # Overhead ptr / bytes_req / gfp_flags # .............. ..................................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP_NOMEMALLOC Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456512767-1164-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Fix dynamic entry display in hierarchyNamhyung Kim2016-02-262-1/+4
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When dynamic sort key is used it might not show pretty printed output. This is because the trace output was not set only for the first dynamic sort key. During hierarchy_insert_entry() it missed to pass the trace_output to dynamic entries. Also even if it did, only first entry will have it. Subsequent entries might set it during collapsing stage but it's not guaranteed. Before: $ perf report --hierarchy --stdio -s ptr,bytes_req,gfp_flags -g none # # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% 66080 10.42% 0xffff8803f766be00 8.33% 96 8.33% 66080 2.08% 512 2.08% 67280 After: # # Overhead ptr / bytes_req / gfp_flags # .............. .......................................... # 37.50% 0xffff8803f7669400 37.50% 448 37.50% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 10.42% 0xffff8803f766be00 8.33% 96 8.33% GFP_ATOMIC|GFP_NOWARN|GFP_NOMEMALLOC 2.08% 512 2.08% GFP_KERNEL|GFP_NOWARN|GFP_REPEAT|GFP Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456512767-1164-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf report: Fix indentation of dynamic entries in hierarchyNamhyung Kim2016-02-262-0/+20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When dynamic entries are used in the hierarchy mode with multiple events, the output might not be aligned properly. In the hierarchy mode, the each sort column is indented using total number of sort keys. So it keeps track of number of sort keys when adding them. However a dynamic sort key can be added more than once when multiple events have same field names. This results in unnecessarily long indentation in the output. For example perf kmem records following events: $ perf evlist --trace-fields -i perf.data.kmem kmem:kmalloc: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags kmem:kmalloc_node: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags,node kmem:kfree: trace_fields: call_site,ptr kmem:kmem_cache_alloc: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags kmem:kmem_cache_alloc_node: trace_fields: call_site,ptr,bytes_req,bytes_alloc,gfp_flags,node kmem:kmem_cache_free: trace_fields: call_site,ptr kmem:mm_page_alloc: trace_fields: page,order,gfp_flags,migratetype kmem:mm_page_free: trace_fields: page,order As you can see, many field names shared between kmem events. So adding 'ptr' dynamic sort key alone will set nr_sort_keys to 6. And this adds many unnecessary spaces between columns. Before: $ perf report -i perf.data.kmem --hierarchy -s ptr -g none --stdio ... # Overhead ptr # ....................... ................................... # 99.89% 0xffff8803ffb79720 0.06% 0xffff8803d228a000 0.03% 0xffff8803f7678f00 0.00% 0xffff880401dc5280 0.00% 0xffff880406172380 0.00% 0xffff8803ffac3a00 0.00% 0xffff8803ffac1600 After: # Overhead ptr # ........ .................... # 99.89% 0xffff8803ffb79720 0.06% 0xffff8803d228a000 0.03% 0xffff8803f7678f00 0.00% 0xffff880401dc5280 0.00% 0xffff880406172380 0.00% 0xffff8803ffac3a00 0.00% 0xffff8803ffac1600 Signed-off-by: Namhyung Kim <namhyung@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456512767-1164-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Fix comparing of dynamic entriesNamhyung Kim2016-02-261-0/+8
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When hist_entry__cmp() and hist_entry__collapse() are called, they should check if the dynamic entry is comparing matching hists only. Otherwise it might access different hists resulting in incorrect output. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456512767-1164-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists browser: Show message for percent limitNamhyung Kim2016-02-262-0/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Like the stdio, it should show messages about omitted hierarchy entries. Please refer the previous commit for more details. As it needs to check an entry is omitted or not multiple times, add the has_no_entry field in the hist entry. Suggested-and-Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456488800-28124-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Add more helper functions for the hierarchy modeNamhyung Kim2016-02-262-0/+28
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The hists__overhead_width() is to calculate width occupied by the overhead (and others) columns before the sort columns. The hist_entry__has_hiearchy_children() is to check whether an entry has lower entries (children) in the hierarchy to be shown in the output. This means the children should not be filtered out and above the percent limit. These two functions will be used to show information when all children of an entry is omitted by the percent limit (or filter). Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456488800-28124-1-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf script: Exception handling when the print fmt is emptyTaeung Song2016-02-252-0/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | After collecting samples for events 'syscalls:', perf-script with python script doesn't occasionally work generating a segmentation fault. The reason is that the print fmt is empty and a value of event->print_fmt.args is NULL, so dereferencing the null pointer results in a segmentation fault i.e.: # perf record -e syscalls:* # perf script -g python # perf script -s perf-script.py in trace_begin syscalls__sys_enter_brk 3 79841.832099154 3777 test.sh syscall_nr=12, brk=0 ... (omitted) ... Segmentation fault (core dumped) For example, a format of sys_enter_getuid() hasn't print fmt as below. # cat /sys/kernel/debug/tracing/events/syscalls/sys_enter_getuid/format name: sys_enter_getuid ID: 188 format: field:unsigned short common_type; offset:0; size:2; signed:0; field:unsigned char common_flags; offset:2; size:1; signed:0; field:unsigned char common_preempt_count; offset:3; size:1; signed:0; field:int common_pid; offset:4; size:4; signed:1; field:int syscall_nr; offset:8; size:4; signed:1; print fmt: "" So add exception handling to avoid this problem. Signed-off-by: Taeung Song <treeze.taeung@gmail.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456413179-12331-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Fix parsing of pmu events with empty list of modifiersArnaldo Carvalho de Melo2016-02-251-3/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | In 1d55e8ef340d ("perf tools: Introduce opt_event_config nonterminal") I removed the unconditional "'/' '/'" for pmu events such as "intel_pt//" but forgot to use opt_event_config where it expected some event_config, oops. Fix it. Noticed when trying to use: # perf record -e intel_pt// -a sleep 1 event syntax error: 'intel_pt//' \___ parser error Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events # Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Fixes: 1d55e8ef340d ("perf tools: Introduce opt_event_config nonterminal") Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Support decaying in hierarchy modeNamhyung Kim2016-02-241-8/+34
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | In the hierarchy mode, hist entries should decay their children too. Also update hists__delete_entry() to be able to free child entries. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-18-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf ui/stdio: Align column header for hierarchy outputNamhyung Kim2016-02-242-0/+11
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The hierarchy output mode is to group entries so the existing columns won't fit to the new output. Treat all sort keys as a single column and separate headers by "/". # Overhead Command / Shared Object # ........... ................................ # 15.11% swapper 14.97% [kernel.vmlinux] 0.09% [libahci] 0.05% [iwlwifi] ... Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-11-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf ui/stdio: Implement hierarchy output modeNamhyung Kim2016-02-241-0/+2
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The hierarchy output mode is to group entries for each level so that user can see higher level picture more easily. It also helps to find out which component is most costly. The output will look like below: 15.11% swapper 14.97% [kernel.vmlinux] 0.09% [libahci] 0.05% [iwlwifi] 10.29% irq/33-iwlwifi 6.45% [kernel.vmlinux] 1.41% [mac80211] 1.15% [iwldvm] 1.14% [iwlwifi] 0.14% [cfg80211] 4.81% firefox 3.92% libxul.so 0.34% [kernel.vmlinux] Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-10-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Count number of sort keysNamhyung Kim2016-02-241-0/+1
| | | | | | | | | | | | | | | | | | | | | | | | | | | | It'll be used for hierarchy output mode to indent entries properly. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-9-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Resort after filtering hierarchyNamhyung Kim2016-02-241-0/+54
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | In hierarchy mode, a filter can affect periods of entries in upper hierarchy. So it needs to resort the hists after filter. For example, let's look at following example: Overhead Command / Shared Object / Symbol ------------ -------------------------------- 30.00% perf 20.00% perf 10.00% main 5.00% pr_debug 5.00% memcpy 10.00% [kernel.vmlinux] 8.00% memset 2.00% cpu_idle If we apply simbol filter for 'mem' it should look like this 13.00% perf 8.00% [kernel.vmlinux] 8.00% memset 5.00% perf 5.00% memcpy Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-8-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Support filtering in hierarchy modeNamhyung Kim2016-02-241-8/+93
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The hists__filter_hierarchy() function implements filtering in hierarchy mode. Now we have hist_entry__filter() so use it for entries in the hierarchy. It returns 3 kind of values. A negative value means that it's not filtered by this type. It marks current entry as filtered tentatively so if a lower level entry removes the filter it also removes the all parent so that we can find the entry in the output. Zero means it's filtered out by this type. A positive value means it's not filtered so it removes the filter and shows in the output. In these cases, it moves to next entry since lower level entry won't match by this type of filter anymore. Thus all children will be filtered or not together. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Introduce hist_entry__filter()Namhyung Kim2016-02-243-0/+116
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The hist_entry__filter() function is to filter hist entries using sort key related info. This is needed to support hierarchy mode since each hist entry will be associated with a hpp fmt which has a sort key. So each entry should compare to only matching type of filters. To do that, add the ->se_filter callback field to struct sort_entry. This callback takes 'type' argument which determines whether it's matching sort key or not. It returns -1 for non-matching type, 0 for filtered entry and 1 for not filtered entries. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-6-git-send-email-namhyung@kernel.org [ 'socket' is reserved in sys/socket.h, so replace it with 'sk' ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Add helper functions for hierarchy modeNamhyung Kim2016-02-242-0/+72
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The rb_hierarchy_{next,prev,last} functions are to traverse all hist entries in a hierarchy. They will be used by various function which supports hierarchy output. As the rb_hierarchy_next() is used to traverse the whole hierarchy, it sometime needs to visit entries regardless of current folding state. So add enum hierarchy_move_dir and pass it to __rb_hierarchy_next() for those cases. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Resort hist entries with hierarchyNamhyung Kim2016-02-241-3/+91
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | For hierarchical output, each entry must be sorted in their rbtree (hroot) properly. Add hists__hierarchy_output_resort() to do the job. Note that those hierarchy entries share the period counts, it'd be important to update the hists->stats only once (for leaves). Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf hists: Basic support of hierarchical report viewNamhyung Kim2016-02-243-2/+128
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | In the hierarchical view, entries will be grouped and sorted on the first key, and then on the second key, and so on. Add the he->hroot_{in,out} fields to keep the lower level entries. Actually this can share space, in a union, with callchain's 'sorted_root' since the hroots are only used by non-leaf entries and callchain is only used by leaf entries. It also adds the 'parent_he' and 'depth' fields which can be used by browsers. This patch only implements collapsing part which creates internal entries for each sort key. These need to be sorted by output_sort stage and to be displayed properly in the later patch(es). Signed-off-by: Namhyung Kim <namhyung@kernel.org> Acked-by: Pekka Enberg <penberg@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Add helper functions for some sort keysNamhyung Kim2016-02-242-0/+36
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | The 'trace', 'srcline' and 'srcfile' sort keys updates hist entry's field later. With the hierarchy mode, those fields are passed to a matching entry so it needs to identify the sort keys. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Cc: Andi Kleen <andi@firstfloor.org> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1456326830-30456-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Make binary data printer code in trace_event public availableWang Nan2016-02-243-27/+105
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Move code printing binray data from trace_event() to utils.c and allows passing different printer. Further commits will use this logic to print bpf output event. Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456312845-111583-2-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf script: Display data_src valuesJiri Olsa2016-02-242-0/+17
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Adding support to display data_src values, for events with data_src data in sample. Example: $ perf script ... rcuos/3 32 [002] ... 68501042 Local RAM hit|SNP None or Hit|TLB L1 or L2 hit|LCK No ... rcuos/3 32 [002] ... 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No ... swapper 0 [002] ... 68100242 LFB hit|SNP None|TLB L1 or L2 hit|LCK No ... swapper 0 [000] ... 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No ... swapper 0 [000] ... 50100142 L1 hit|SNP None|TLB L2 miss|LCK No ... rcuos/3 32 [002] ... 68100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK No ... plugin-containe 16538 [000] ... 6a100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK Yes ... gkrellm 1736 [000] ... 68100242 LFB hit|SNP None|TLB L1 or L2 hit|LCK No ... gkrellm 1736 [000] ... 6a100142 L1 hit|SNP None|TLB L1 or L2 hit|LCK Yes ... ^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ data_src value data_src translation Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-14-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Change perf_mem__lck_scnprintf to return nb of displayed bytesJiri Olsa2016-02-242-6/+8
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Moving strncat call into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-13-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Change perf_mem__snp_scnprintf to return nb of displayed bytesJiri Olsa2016-02-242-5/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Moving strncat/strcpy calls into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-12-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Change perf_mem__lvl_scnprintf to return nb of displayed bytesJiri Olsa2016-02-242-7/+8
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Moving strncat/strcpy calls into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-11-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Change perf_mem__tlb_scnprintf to return nb of displayed bytesJiri Olsa2016-02-242-7/+8
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Moving strncat/strcpy calls into scnprintf to easily track number of displayed bytes. It will be used in following patch. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-10-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Introduce perf_mem__lck_scnprintf functionJiri Olsa2016-02-243-12/+20
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Move meminfo's lck display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-9-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Introduce perf_mem__snp_scnprintf functionJiri Olsa2016-02-243-30/+36
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Move meminfo's snp display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-8-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Introduce perf_mem__lvl_scnprintf functionJiri Olsa2016-02-243-49/+55
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Move meminfo's lvl display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-7-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Introduce perf_mem__tlb_scnprintf functionJiri Olsa2016-02-243-42/+52
| | | | | | | | | | | | | | | | | | | | | | | | | | | | Move meminfo's tlb display function into mem-events.c object, so it could be reused later from script code. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf mem: Introduce perf_mem_events__name functionJiri Olsa2016-02-242-0/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Wrap perf_mem_events[].name into perf_mem_events__name() so we could alter the events name if needed. This will be handy when changing latency settings for loads event in following patch. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf mem record: Check for memory events supportJiri Olsa2016-02-242-3/+35
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Check if current kernel support available memory events and display the status within -e list option: $ perf mem record -e list ldlat-loads : available ldlat-stores : available Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1456303616-26926-2-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Remove strbuf_{remove,splice}()Arnaldo Carvalho de Melo2016-02-232-26/+0
| | | | | | | | | | | | | | | | | | | | | | No users, nuke them. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-kfv2wo8xann8t97wdalttcx7@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Dont stop PMU parsing on alias parse errorAndi Kleen2016-02-231-7/+8
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | When an error happens during alias parsing currently the complete parsing of all attributes of the PMU is stopped. This is breaks old perf on a newer kernel that may have not-yet-know alias attributes (such as .scale or .per-pkg). Continue when some attribute is unparseable. This is IMHO a stable candidate and should be backported to older versions to avoid problems with newer kernels. v2: Print warnings when something goes wrong. v3: Change warning to debug output Signed-off-by: Andi Kleen <ak@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: stable@vger.kernel.org # v3.6+ Link: http://lkml.kernel.org/r/1455749095-18358-1-git-send-email-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Use ARRAY_SIZE in mem sort display functionsJiri Olsa2016-02-231-6/+3
| | | | | | | | | | | | | | | | | | | | | | | | | | There's no need to define extra macros for that. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1455525293-8671-13-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf mem: Add -e record optionJiri Olsa2016-02-232-3/+47
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Adding -e option for perf mem record command, to be able to specify memory event directly. Get list of available events: $ perf mem record -e list ldlat-loads ldlat-stores Monitor ldlat-loads: $ perf mem record -e ldlat-loads true Committer notes: Further testing: # perf mem record -e ldlat-loads true [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (10 samples) ] # perf evlist cpu/mem-loads,ldlat=30/P # Signed-off-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1455525293-8671-6-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Add monitored events arrayJiri Olsa2016-02-233-0/+30
| | | | | | | | | | | | | | | | | | | | | | | | | | | | It will ease up configuration of memory events and addition of other memory events in following patches. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1455525293-8671-5-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Introduce cl_offset functionJiri Olsa2016-02-231-0/+6
| | | | | | | | | | | | | | | | | | | | | | | | | | It'll be used in following patches. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1455525293-8671-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Make cl_address globalJiri Olsa2016-02-232-6/+5
| | | | | | | | | | | | | | | | | | | | | | | | | | It'll be used in following patches. Signed-off-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Stephane Eranian <eranian@google.com> Link: http://lkml.kernel.org/r/1455525293-8671-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Introduce bpf-output eventWang Nan2016-02-224-3/+16
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adds a helper to enable a BPF program to output data to a perf ring buffer through a new type of perf event, PERF_COUNT_SW_BPF_OUTPUT. This patch enables perf to create events of that type. Now a perf user can use the following cmdline to receive output data from BPF programs: # perf record -a -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output.c/map:channel.event=evt/ ls / # perf script perf 1560 [004] 347747.086295: evt: ffffffff811fd201 sys_write ... perf 1560 [004] 347747.086300: evt: ffffffff811fd201 sys_write ... perf 1560 [004] 347747.086315: evt: ffffffff811fd201 sys_write ... ... Test result: # cat test_bpf_output.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; #define SEC(NAME) __attribute__((section(NAME), used)) static u64 (*ktime_get_ns)(void) = (void *)BPF_FUNC_ktime_get_ns; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_output)(void *, struct bpf_map_def *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(u32), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { struct { u64 ktime; int cpuid; } __attribute__((packed)) output_data; char error_data[] = "Error: failed to output: %d\n"; output_data.cpuid = get_smp_processor_id(); output_data.ktime = ktime_get_ns(); int err = perf_event_output(ctx, &channel, get_smp_processor_id(), &output_data, sizeof(output_data)); if (err) trace_printk(error_data, sizeof(error_data), err); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************ END ***************************/ # perf record -a -e bpf-output/no-inherit,name=evt/ \ -e ./test_bpf_output.c/map:channel.event=evt/ ls / # perf script | grep ls ls 2242 [003] 347851.557563: evt: ffffffff811fd201 sys_write ... ls 2242 [003] 347851.557571: evt: ffffffff811fd201 sys_write ... Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-11-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Apply tracepoint event definition options to BPF scriptWang Nan2016-02-222-7/+52
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | Users can pass options to tracepoints defined in the BPF script. For example: # perf record -e ./test.c/no-inherit/ bash # dd if=/dev/zero of=/dev/null count=10000 # exit [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.022 MB perf.data (139 samples) ] (no-inherit works, only the sys_read issued by bash are captured, at least 10000 sys_read issued by dd are skipped.) test.c: #define SEC(NAME) __attribute__((section(NAME), used)) SEC("func=sys_read") int bpf_func__sys_read(void *ctx) { return 1; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; no-inherit is applied to the kprobe event defined in test.c. Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-10-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Enable indices setting syntax for BPF mapWang Nan2016-02-223-3/+100
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | This patch introduces a new syntax to perf event parser: # perf record -e './test_bpf_map_3.c/map:channel.value[0,1,2,3...5]=101/' usleep 2 By utilizing the basic facilities in bpf-loader.c which allow setting different slots in a BPF map separately, the newly introduced syntax allows perf to control specific elements in a BPF map. Test result: # cat ./test_bpf_map_3.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(unsigned char), .max_entries = 100, }; SEC("func=hrtimer_nanosleep rqtp->tv_nsec") int func(void *ctx, int err, long nsec) { char fmt[] = "%ld\n"; long usec = nsec * 0x10624dd3 >> 38; // nsec / 1000 int key = (int)usec; unsigned char *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), (unsigned char)*pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_3.c/map:channel.value[0,1,2,3...5]=101/' usleep 2 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep usleep usleep-405 [004] d... 2745423.547822: : 101 # ./perf record -e './test_bpf_map_3.c/map:channel.value[0...9,20...29]=102,map:channel.value[10...19]=103/' usleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # ./perf record -e './test_bpf_map_3.c/map:channel.value[0...9,20...29]=102,map:channel.value[10...19]=103/' usleep 15 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep usleep usleep-405 [004] d... 2745423.547822: : 101 usleep-655 [006] d... 2745434.122814: : 102 usleep-904 [006] d... 2745439.916264: : 103 # ./perf record -e './test_bpf_map_3.c/map:channel.value[all]=104/' usleep 99 # cat /sys/kernel/debug/tracing/trace | grep usleep usleep-405 [004] d... 2745423.547822: : 101 usleep-655 [006] d... 2745434.122814: : 102 usleep-904 [006] d... 2745439.916264: : 103 usleep-1537 [003] d... 2745538.053737: : 104 Error case: # ./perf record -e './test_bpf_map_3.c/map:channel.value[10...1000]=104/' usleep 99 event syntax error: '..annel.value[10...1000]=104/' \___ Index too large Hint: Valid config terms: map:[<arraymap>].value<indices>=[value] map:[<eventmap>].event<indices>=[event] where <indices> is something like [0,3...5] or [all] (add -v to see detail) Run 'perf list' for a list of valid events Usage: perf record [<options>] [<command>] or: perf record [<options>] -- <command> [<options>] -e, --event <event> event selector. use 'perf list' to list available events Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-9-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Support setting different slots in a BPF map separatelyWang Nan2016-02-224-9/+137
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | This patch introduces basic facilities to support config different slots in a BPF map one by one. array.nr_ranges and array.ranges are introduced into 'struct parse_events_term', where ranges is an array of indices range (start, length) which will be configured by this config term. nr_ranges is the size of the array. The array is passed to 'struct bpf_map_priv'. To indicate the new type of configuration, BPF_MAP_KEY_RANGES is added as a new key type. bpf_map_config_foreach_key() is extended to iterate over those indices instead of all possible keys. Code in this commit will be enabled by following commit which enables the indices syntax for array configuration. Signed-off-by: Wang Nan <wangnan0@huawei.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-8-git-send-email-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf tools: Enable passing event to BPF objectWang Nan2016-02-226-13/+190
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | A new syntax is added to the parser so that the user can access predefined perf events in BPF objects. After this patch, BPF programs for perf are finally able to utilize bpf_perf_event_read() introduced in commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU counter"). Test result: # cat test_bpf_map_2.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; static int (*get_smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id; static int (*perf_event_read)(struct bpf_map_def *, int) = (void *)BPF_FUNC_perf_event_read; struct bpf_map_def SEC("maps") pmu_map = { .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = __NR_CPUS__, }; SEC("func_write=sys_write") int func_write(void *ctx) { unsigned long long val; char fmt[] = "sys_write: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } SEC("func_write_return=sys_write%return") int func_write_return(void *ctx) { unsigned long long val = 0; char fmt[] = "sys_write_return: pmu=%llu\n"; val = perf_event_read(&pmu_map, get_smp_processor_id()); trace_printk(fmt, sizeof(fmt), val); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ Normal case: # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.013 MB perf.data (7 samples) ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17066 [000] d... 938449.863301: : sys_write: pmu=1157327 ls-17066 [000] dN.. 938449.863342: : sys_write_return: pmu=1225218 ls-17066 [000] d... 938449.863349: : sys_write: pmu=1241922 ls-17066 [000] dN.. 938449.863369: : sys_write_return: pmu=1267445 Normal case (system wide): # echo "" > /sys/kernel/debug/tracing/trace # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -a ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.811 MB perf.data (120 samples) ] # cat /sys/kernel/debug/tracing/trace | grep -v '18446744073709551594' | grep -v perf | head -n 20 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | gmain-30828 [002] d... 2740551.068992: : sys_write: pmu=84373 gmain-30828 [002] d... 2740551.068992: : sys_write_return: pmu=87696 gmain-30828 [002] d... 2740551.068996: : sys_write: pmu=100658 gmain-30828 [002] d... 2740551.068997: : sys_write_return: pmu=102572 Error case 1: # perf record -e './test_bpf_map_2.c' ls / [SNIP] [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.014 MB perf.data ] # cat /sys/kernel/debug/tracing/trace | grep ls ls-17115 [007] d... 2724279.665625: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665651: : sys_write_return: pmu=18446744073709551614 ls-17115 [007] d... 2724279.665658: : sys_write: pmu=18446744073709551614 ls-17115 [007] dN.. 2724279.665677: : sys_write_return: pmu=18446744073709551614 (18446744073709551614 is 0xfffffffffffffffe (-2)) Error case 2: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=evt/' -a event syntax error: '..ps:pmu_map.event=evt/' \___ Event not found for map setting Hint: Valid config terms: map:[<arraymap>].value=[value] map:[<eventmap>].event=[event] [SNIP] Error case 3: # ls /proc/2348/task/ 2348 2505 2506 2507 2508 # perf record -i -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' -p 2348 ERROR: Apply config to BPF failed: Cannot set event to BPF map in multi-thread tracing Error case 4: # perf record -e cycles -e './test_bpf_map_2.c/map:pmu_map.event=cycles/' ls / ERROR: Apply config to BPF failed: Doesn't support inherit event (Hint: use -i to turn off inherit) Error case 5: # perf record -i -e raw_syscalls:sys_enter -e './test_bpf_map_2.c/map:pmu_map.event=raw_syscalls:sys_enter/' ls ERROR: Apply config to BPF failed: Can only put raw, hardware and BPF output event into a BPF map Error case 6: # perf record -i -e './test_bpf_map_2.c/map:pmu_map.event=123/' ls / event syntax error: '.._map.event=123/' \___ Incorrect value type for map [SNIP] Signed-off-by: Wang Nan <wangnan0@huawei.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-7-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
* | perf record: Apply config to BPF objects before recordingWang Nan2016-02-222-0/+199
| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | bpf__apply_obj_config() is introduced as the core API to apply object config options to all BPF objects. This patch also does the real work for setting values for BPF_MAP_TYPE_PERF_ARRAY maps by inserting value stored in map's private field into the BPF map. This patch is required because we are not always able to set all BPF config during parsing. Further patch will set events created by perf to BPF_MAP_TYPE_PERF_EVENT_ARRAY maps, which is not exist until perf_evsel__open(). bpf_map_foreach_key() is introduced to iterate over each key needs to be configured. This function would be extended to support more map types and different key settings. In perf record, before start recording, call bpf__apply_config() to turn on all BPF config options. Test result: # cat ./test_bpf_map_1.c /************************ BEGIN **************************/ #include <uapi/linux/bpf.h> #define SEC(NAME) __attribute__((section(NAME), used)) struct bpf_map_def { unsigned int type; unsigned int key_size; unsigned int value_size; unsigned int max_entries; }; static void *(*map_lookup_elem)(struct bpf_map_def *, void *) = (void *)BPF_FUNC_map_lookup_elem; static int (*trace_printk)(const char *fmt, int fmt_size, ...) = (void *)BPF_FUNC_trace_printk; struct bpf_map_def SEC("maps") channel = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 1, }; SEC("func=sys_nanosleep") int func(void *ctx) { int key = 0; char fmt[] = "%d\n"; int *pval = map_lookup_elem(&channel, &key); if (!pval) return 0; trace_printk(fmt, sizeof(fmt), *pval); return 0; } char _license[] SEC("license") = "GPL"; int _version SEC("version") = LINUX_VERSION_CODE; /************************* END ***************************/ # echo "" > /sys/kernel/debug/tracing/trace # ./perf record -e './test_bpf_map_1.c/map:channel.value=11/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 # ./perf record -e './test_bpf_map_1.c/map:channel.value=101/' usleep 10 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.012 MB perf.data ] # cat /sys/kernel/debug/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 1/1 #P:8 [SNIP] # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | usleep-18593 [007] d... 2394714.395539: : 11 usleep-19000 [006] d... 2394831.057840: : 101 Signed-off-by: Wang Nan <wangnan0@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexei Starovoitov <ast@kernel.org> Cc: Brendan Gregg <brendan.d.gregg@gmail.com> Cc: Cody P Schafer <dev@codyps.com> Cc: He Kuang <hekuang@huawei.com> Cc: Jeremie Galarneau <jeremie.galarneau@efficios.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kirill Smelkov <kirr@nexedi.com> Cc: Li Zefan <lizefan@huawei.com> Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Zefan Li <lizefan@huawei.com> Cc: pi3orama@163.com Link: http://lkml.kernel.org/r/1456132275-98875-6-git-send-email-wangnan0@huawei.com Signed-off-by: He Kuang <hekuang@huawei.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>