summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin KaFai Lau <martin.lau@kernel.org>2022-09-29 00:04:03 -0700
committerAlexei Starovoitov <ast@kernel.org>2022-09-29 09:25:47 -0700
commit64696c40d03c01e0ea2e3e9aa1c490a7b6a1b6be (patch)
treebe3119098834c597db3fdfd618f50b29377a1429
parent8526f0d6135f77451566463ace6f0fb8b72cedaa (diff)
downloadlinux-64696c40d03c01e0ea2e3e9aa1c490a7b6a1b6be.tar.gz
linux-64696c40d03c01e0ea2e3e9aa1c490a7b6a1b6be.tar.bz2
linux-64696c40d03c01e0ea2e3e9aa1c490a7b6a1b6be.zip
bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline
The struct_ops prog is to allow using bpf to implement the functions in a struct (eg. kernel module). The current usage is to implement the tcp_congestion. The kernel does not call the tcp-cc's ops (ie. the bpf prog) in a recursive way. The struct_ops is sharing the tracing-trampoline's enter/exit function which tracks prog->active to avoid recursion. It is needed for tracing prog. However, it turns out the struct_ops bpf prog will hit this prog->active and unnecessarily skipped running the struct_ops prog. eg. The '.ssthresh' may run in_task() and then interrupted by softirq that runs the same '.ssthresh'. Skip running the '.ssthresh' will end up returning random value to the caller. The patch adds __bpf_prog_{enter,exit}_struct_ops for the struct_ops trampoline. They do not track the prog->active to detect recursion. One exception is when the tcp_congestion's '.init' ops is doing bpf_setsockopt(TCP_CONGESTION) and then recurs to the same '.init' ops. This will be addressed in the following patches. Fixes: ca06f55b9002 ("bpf: Add per-program recursion prevention mechanism") Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org> Link: https://lore.kernel.org/r/20220929070407.965581-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov <ast@kernel.org>
-rw-r--r--arch/x86/net/bpf_jit_comp.c3
-rw-r--r--include/linux/bpf.h4
-rw-r--r--kernel/bpf/trampoline.c23
3 files changed, 30 insertions, 0 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 35796db58116..5b6230779cf3 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1836,6 +1836,9 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (p->aux->sleepable) {
enter = __bpf_prog_enter_sleepable;
exit = __bpf_prog_exit_sleepable;
+ } else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ enter = __bpf_prog_enter_struct_ops;
+ exit = __bpf_prog_exit_struct_ops;
} else if (p->expected_attach_type == BPF_LSM_CGROUP) {
enter = __bpf_prog_enter_lsm_cgroup;
exit = __bpf_prog_exit_lsm_cgroup;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0f3eaf3ed98c..9e7d46d16032 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -864,6 +864,10 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog,
struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
struct bpf_tramp_run_ctx *run_ctx);
+u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
+ struct bpf_tramp_run_ctx *run_ctx);
+void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx);
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 6f7b939321d6..bf0906e1e2b9 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -964,6 +964,29 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
rcu_read_unlock_trace();
}
+u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog,
+ struct bpf_tramp_run_ctx *run_ctx)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ migrate_disable();
+
+ run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+
+ return bpf_prog_start_time();
+}
+
+void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start,
+ struct bpf_tramp_run_ctx *run_ctx)
+ __releases(RCU)
+{
+ bpf_reset_run_ctx(run_ctx->saved_run_ctx);
+
+ update_prog_stats(prog, start);
+ migrate_enable();
+ rcu_read_unlock();
+}
+
void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
{
percpu_ref_get(&tr->pcref);