summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoanne Koong <joannekoong@fb.com>2021-11-29 19:06:21 -0800
committerAlexei Starovoitov <ast@kernel.org>2021-11-30 10:56:28 -0800
commitf6e659b7f97c76d0471d12bf274ea2a097cf3c5c (patch)
treeb8130dd0f6a15b2b28121d425169ef8f707382c2
parent4e5070b64b375a9c1f570893cfceeba108382bef (diff)
downloadlinux-f6e659b7f97c76d0471d12bf274ea2a097cf3c5c.tar.gz
linux-f6e659b7f97c76d0471d12bf274ea2a097cf3c5c.tar.bz2
linux-f6e659b7f97c76d0471d12bf274ea2a097cf3c5c.zip
selftests/bpf: Measure bpf_loop verifier performance
This patch tests bpf_loop in pyperf and strobemeta, and measures the verifier performance of replacing the traditional for loop with bpf_loop. The results are as follows: ~strobemeta~ Baseline verification time 6808200 usec stack depth 496 processed 554252 insns (limit 1000000) max_states_per_insn 16 total_states 15878 peak_states 13489 mark_read 3110 #192 verif_scale_strobemeta:OK (unrolled loop) Using bpf_loop verification time 31589 usec stack depth 96+400 processed 1513 insns (limit 1000000) max_states_per_insn 2 total_states 106 peak_states 106 mark_read 60 #193 verif_scale_strobemeta_bpf_loop:OK ~pyperf600~ Baseline verification time 29702486 usec stack depth 368 processed 626838 insns (limit 1000000) max_states_per_insn 7 total_states 30368 peak_states 30279 mark_read 748 #182 verif_scale_pyperf600:OK (unrolled loop) Using bpf_loop verification time 148488 usec stack depth 320+40 processed 10518 insns (limit 1000000) max_states_per_insn 10 total_states 705 peak_states 517 mark_read 38 #183 verif_scale_pyperf600_bpf_loop:OK Using the bpf_loop helper led to approximately a 99% decrease in the verification time and in the number of instructions. Signed-off-by: Joanne Koong <joannekoong@fb.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Andrii Nakryiko <andrii@kernel.org> Link: https://lore.kernel.org/bpf/20211130030622.4131246-4-joannekoong@fb.com
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c12
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h71
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c6
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h75
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c9
5 files changed, 169 insertions, 4 deletions
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 27f5d8ea7964..1fb16f8dad56 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -115,6 +115,12 @@ void test_verif_scale_pyperf600()
scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}
+void test_verif_scale_pyperf600_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
void test_verif_scale_pyperf600_nounroll()
{
/* no unroll at all.
@@ -165,6 +171,12 @@ void test_verif_scale_strobemeta()
scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
}
+void test_verif_scale_strobemeta_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
void test_verif_scale_strobemeta_nounroll1()
{
/* no unroll, tiny loops */
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 2fb7adafb6b6..1ed28882daf3 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -159,6 +159,59 @@ struct {
__uint(value_size, sizeof(long long) * 127);
} stackmap SEC(".maps");
+#ifdef USE_BPF_LOOP
+struct process_frame_ctx {
+ int cur_cpu;
+ int32_t *symbol_counter;
+ void *frame_ptr;
+ FrameData *frame;
+ PidData *pidData;
+ Symbol *sym;
+ Event *event;
+ bool done;
+};
+
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+
+static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
+{
+ int zero = 0;
+ void *frame_ptr = ctx->frame_ptr;
+ PidData *pidData = ctx->pidData;
+ FrameData *frame = ctx->frame;
+ int32_t *symbol_counter = ctx->symbol_counter;
+ int cur_cpu = ctx->cur_cpu;
+ Event *event = ctx->event;
+ Symbol *sym = ctx->sym;
+
+ if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
+ int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+ int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+
+ if (!symbol_id) {
+ bpf_map_update_elem(&symbolmap, sym, &zero, 0);
+ symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+ if (!symbol_id) {
+ ctx->done = true;
+ return 1;
+ }
+ }
+ if (*symbol_id == new_symbol_id)
+ (*symbol_counter)++;
+
+ barrier_var(i);
+ if (i >= STACK_MAX_LEN)
+ return 1;
+
+ event->stack[i] = *symbol_id;
+
+ event->stack_len = i + 1;
+ frame_ptr = frame->f_back;
+ }
+ return 0;
+}
+#endif /* USE_BPF_LOOP */
+
#ifdef GLOBAL_FUNC
__noinline
#elif defined(SUBPROGS)
@@ -228,11 +281,26 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
return 0;
+#ifdef USE_BPF_LOOP
+ struct process_frame_ctx ctx = {
+ .cur_cpu = cur_cpu,
+ .symbol_counter = symbol_counter,
+ .frame_ptr = frame_ptr,
+ .frame = &frame,
+ .pidData = pidData,
+ .sym = &sym,
+ .event = event,
+ };
+
+ bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
+ if (ctx.done)
+ return 0;
+#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma clang loop unroll(full)
-#endif
+#endif /* NO_UNROLL */
/* Unwind python stack */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
@@ -251,6 +319,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
frame_ptr = frame.f_back;
}
}
+#endif /* USE_BPF_LOOP */
event->stack_complete = frame_ptr == NULL;
} else {
event->stack_complete = 1;
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
new file mode 100644
index 000000000000..5c2059dc01af
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define STACK_MAX_LEN 600
+#define USE_BPF_LOOP
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 60c93aee2f4a..753718595c26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -445,6 +445,48 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
return payload;
}
+#ifdef USE_BPF_LOOP
+enum read_type {
+ READ_INT_VAR,
+ READ_MAP_VAR,
+ READ_STR_VAR,
+};
+
+struct read_var_ctx {
+ struct strobemeta_payload *data;
+ void *tls_base;
+ struct strobemeta_cfg *cfg;
+ void *payload;
+ /* value gets mutated */
+ struct strobe_value_generic *value;
+ enum read_type type;
+};
+
+static int read_var_callback(__u32 index, struct read_var_ctx *ctx)
+{
+ switch (ctx->type) {
+ case READ_INT_VAR:
+ if (index >= STROBE_MAX_INTS)
+ return 1;
+ read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
+ break;
+ case READ_MAP_VAR:
+ if (index >= STROBE_MAX_MAPS)
+ return 1;
+ ctx->payload = read_map_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload);
+ break;
+ case READ_STR_VAR:
+ if (index >= STROBE_MAX_STRS)
+ return 1;
+ ctx->payload += read_str_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload);
+ break;
+ }
+ return 0;
+}
+#endif /* USE_BPF_LOOP */
+
/*
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
@@ -475,11 +517,36 @@ static void *read_strobe_meta(struct task_struct *task,
*/
tls_base = (void *)task;
+#ifdef USE_BPF_LOOP
+ struct read_var_ctx ctx = {
+ .cfg = cfg,
+ .tls_base = tls_base,
+ .value = &value,
+ .data = data,
+ .payload = payload,
+ };
+ int err;
+
+ ctx.type = READ_INT_VAR;
+ err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_INTS)
+ return NULL;
+
+ ctx.type = READ_STR_VAR;
+ err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_STRS)
+ return NULL;
+
+ ctx.type = READ_MAP_VAR;
+ err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_MAPS)
+ return NULL;
+#else
#ifdef NO_UNROLL
#pragma clang loop unroll(disable)
#else
#pragma unroll
-#endif
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
@@ -487,7 +554,7 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma clang loop unroll(disable)
#else
#pragma unroll
-#endif
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
payload += read_str_var(cfg, i, tls_base, &value, data, payload);
}
@@ -495,10 +562,12 @@ static void *read_strobe_meta(struct task_struct *task,
#pragma clang loop unroll(disable)
#else
#pragma unroll
-#endif
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
payload = read_map_var(cfg, i, tls_base, &value, data, payload);
}
+#endif /* USE_BPF_LOOP */
+
/*
* return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
new file mode 100644
index 000000000000..d18b992f0165
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 100
+#define STROBE_MAX_MAP_ENTRIES 20
+#define USE_BPF_LOOP
+#include "strobemeta.h"