summaryrefslogtreecommitdiffstats
path: root/kernel/bpf
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-22 13:27:01 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-22 13:27:01 -0800
commit06afb0f36106ecb839c5e2509905e68c1e2677de (patch)
treee54b4528c648fc59ab89726b3786bf6626600c06 /kernel/bpf
parent4b01712311c6e209137c4fa3e7d7920ec509456a (diff)
parent45af52e7d3b8560f21d139b3759735eead8b1653 (diff)
downloadlinux-06afb0f36106ecb839c5e2509905e68c1e2677de.tar.gz
linux-06afb0f36106ecb839c5e2509905e68c1e2677de.tar.bz2
linux-06afb0f36106ecb839c5e2509905e68c1e2677de.zip
Merge tag 'trace-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace
Pull tracing updates from Steven Rostedt: - Addition of faultable tracepoints There's a tracepoint attached to both a system call entry and exit. This location is known to allow page faults. The tracepoints are called under an rcu_read_lock() which does not allow faults that can sleep. This limits the ability of tracepoint handlers to page fault in user space system call parameters. Now these tracepoints have been made "faultable", allowing the callbacks to fault in user space parameters and record them. Note, only the infrastructure has been implemented. The consumers (perf, ftrace, BPF) now need to have their code modified to allow faults. - Fix up of BPF code for the tracepoint faultable logic - Update tracepoints to use the new static branch API - Remove trace_*_rcuidle() variants and the SRCU protection they used - Remove unused TRACE_EVENT_FL_FILTERED logic - Replace strncpy() with strscpy() and memcpy() - Use replace per_cpu_ptr(smp_processor_id()) with this_cpu_ptr() - Fix perf events to not duplicate samples when tracing is enabled - Replace atomic64_add_return(1, counter) with atomic64_inc_return(counter) - Make stack trace buffer 4K instead of PAGE_SIZE - Remove TRACE_FLAG_IRQS_NOSUPPORT flag as it was never used - Get the true return address for function tracer when function graph tracer is also running. When function_graph trace is running along with function tracer, the parent function of the function tracer sometimes is "return_to_handler", which is the function graph trampoline to record the exit of the function. Use existing logic that calls into the fgraph infrastructure to find the real return address. - Remove (un)regfunc pointers out of tracepoint structure - Added last minute bug fix for setting pending modules in stack function filter. echo "write*:mod:ext3" > /sys/kernel/tracing/stack_trace_filter Would cause a kernel NULL dereference. - Minor clean ups * tag 'trace-v6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace: (31 commits) ftrace: Fix regression with module command in stack_trace_filter tracing: Fix function name for trampoline ftrace: Get the true parent ip for function tracer tracing: Remove redundant check on field->field in histograms bpf: ensure RCU Tasks Trace GP for sleepable raw tracepoint BPF links bpf: decouple BPF link/attach hook and BPF program sleepable semantics bpf: put bpf_link's program when link is safe to be deallocated tracing: Replace strncpy() with strscpy() when copying comm tracing: Add might_fault() check in __DECLARE_TRACE_SYSCALL tracing: Fix syscall tracepoint use-after-free tracing: Introduce tracepoint_is_faultable() tracing: Introduce tracepoint extended structure tracing: Remove TRACE_FLAG_IRQS_NOSUPPORT tracing: Replace multiple deprecated strncpy with memcpy tracing: Make percpu stack trace buffer invariant to PAGE_SIZE tracing: Use atomic64_inc_return() in trace_clock_counter() trace/trace_event_perf: remove duplicate samples on the first tracepoint event tracing/bpf: Add might_fault check to syscall probes tracing/perf: Add might_fault check to syscall probes tracing/ftrace: Add might_fault check to syscall probes ...
Diffstat (limited to 'kernel/bpf')
-rw-r--r--kernel/bpf/syscall.c67
1 files changed, 49 insertions, 18 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 58190ca724a2..5684e8ce132d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -35,6 +35,7 @@
#include <linux/rcupdate_trace.h>
#include <linux/memcontrol.h>
#include <linux/trace_events.h>
+#include <linux/tracepoint.h>
#include <net/netfilter/nf_bpf_link.h>
#include <net/netkit.h>
@@ -3033,17 +3034,33 @@ static int bpf_obj_get(const union bpf_attr *attr)
attr->file_flags);
}
-void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
- const struct bpf_link_ops *ops, struct bpf_prog *prog)
+/* bpf_link_init_sleepable() allows to specify whether BPF link itself has
+ * "sleepable" semantics, which normally would mean that BPF link's attach
+ * hook can dereference link or link's underlying program for some time after
+ * detachment due to RCU Tasks Trace-based lifetime protection scheme.
+ * BPF program itself can be non-sleepable, yet, because it's transitively
+ * reachable through BPF link, its freeing has to be delayed until after RCU
+ * Tasks Trace GP.
+ */
+void bpf_link_init_sleepable(struct bpf_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog,
+ bool sleepable)
{
WARN_ON(ops->dealloc && ops->dealloc_deferred);
atomic64_set(&link->refcnt, 1);
link->type = type;
+ link->sleepable = sleepable;
link->id = 0;
link->ops = ops;
link->prog = prog;
}
+void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
+ const struct bpf_link_ops *ops, struct bpf_prog *prog)
+{
+ bpf_link_init_sleepable(link, type, ops, prog, false);
+}
+
static void bpf_link_free_id(int id)
{
if (!id)
@@ -3076,12 +3093,24 @@ void bpf_link_inc(struct bpf_link *link)
atomic64_inc(&link->refcnt);
}
+static void bpf_link_dealloc(struct bpf_link *link)
+{
+ /* now that we know that bpf_link itself can't be reached, put underlying BPF program */
+ if (link->prog)
+ bpf_prog_put(link->prog);
+
+ /* free bpf_link and its containing memory */
+ if (link->ops->dealloc_deferred)
+ link->ops->dealloc_deferred(link);
+ else
+ link->ops->dealloc(link);
+}
+
static void bpf_link_defer_dealloc_rcu_gp(struct rcu_head *rcu)
{
struct bpf_link *link = container_of(rcu, struct bpf_link, rcu);
- /* free bpf_link and its containing memory */
- link->ops->dealloc_deferred(link);
+ bpf_link_dealloc(link);
}
static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
@@ -3096,26 +3125,27 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu)
static void bpf_link_free(struct bpf_link *link)
{
const struct bpf_link_ops *ops = link->ops;
- bool sleepable = false;
bpf_link_free_id(link->id);
- if (link->prog) {
- sleepable = link->prog->sleepable;
- /* detach BPF program, clean up used resources */
+ /* detach BPF program, clean up used resources */
+ if (link->prog)
ops->release(link);
- bpf_prog_put(link->prog);
- }
if (ops->dealloc_deferred) {
- /* schedule BPF link deallocation; if underlying BPF program
- * is sleepable, we need to first wait for RCU tasks trace
- * sync, then go through "classic" RCU grace period
+ /* Schedule BPF link deallocation, which will only then
+ * trigger putting BPF program refcount.
+ * If underlying BPF program is sleepable or BPF link's target
+ * attach hookpoint is sleepable or otherwise requires RCU GPs
+ * to ensure link and its underlying BPF program is not
+ * reachable anymore, we need to first wait for RCU tasks
+ * trace sync, and then go through "classic" RCU grace period
*/
- if (sleepable)
+ if (link->sleepable || (link->prog && link->prog->sleepable))
call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp);
else
call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp);
- } else if (ops->dealloc)
- ops->dealloc(link);
+ } else if (ops->dealloc) {
+ bpf_link_dealloc(link);
+ }
}
static void bpf_link_put_deferred(struct work_struct *work)
@@ -3936,8 +3966,9 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
err = -ENOMEM;
goto out_put_btp;
}
- bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
- &bpf_raw_tp_link_lops, prog);
+ bpf_link_init_sleepable(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
+ &bpf_raw_tp_link_lops, prog,
+ tracepoint_is_faultable(btp->tp));
link->btp = btp;
link->cookie = cookie;