diff options
author | Yonghong Song <yhs@fb.com> | 2017-10-23 23:53:08 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-10-25 10:47:47 +0900 |
commit | e87c6bc3852b981e71c757be20771546ce9f76f3 (patch) | |
tree | bad3be630137d8e873f4ad5a1ea77b4aa1853184 /include | |
parent | 0b4c6841fee03e096b735074a0c4aab3a8e92986 (diff) | |
download | linux-e87c6bc3852b981e71c757be20771546ce9f76f3.tar.gz linux-e87c6bc3852b981e71c757be20771546ce9f76f3.tar.bz2 linux-e87c6bc3852b981e71c757be20771546ce9f76f3.zip |
bpf: permit multiple bpf attachments for a single perf event
This patch enables multiple bpf attachments for a
kprobe/uprobe/tracepoint single trace event.
Each trace_event keeps a list of attached perf events.
When an event happens, all attached bpf programs will
be executed based on the order of attachment.
A global bpf_event_mutex lock is introduced to protect
prog_array attaching and detaching. An alternative will
be introduce a mutex lock in every trace_event_call
structure, but it takes a lot of extra memory.
So a global bpf_event_mutex lock is a good compromise.
The bpf prog detachment involves allocation of memory.
If the allocation fails, a dummy do-nothing program
will replace to-be-detached program in-place.
Signed-off-by: Yonghong Song <yhs@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/bpf.h | 30 | ||||
-rw-r--r-- | include/linux/trace_events.h | 43 | ||||
-rw-r--r-- | include/trace/perf.h | 6 |
3 files changed, 67 insertions, 12 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1e334b248ff6..172be7faf7ba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, __u32 __user *prog_ids, u32 cnt); -#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog); +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array); + +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog; \ + struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array *_array; \ u32 _ret = 1; \ rcu_read_lock(); \ - _prog = rcu_dereference(array)->progs; \ - for (; *_prog; _prog++) \ - _ret &= func(*_prog, ctx); \ + _array = rcu_dereference(array); \ + if (unlikely(check_non_null && !_array))\ + goto _out; \ + _prog = _array->progs; \ + while ((__prog = READ_ONCE(*_prog))) { \ + _ret &= func(__prog, ctx); \ + _prog++; \ + } \ +_out: \ rcu_read_unlock(); \ _ret; \ }) +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + +#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2e0f22298fe9..fc6aeca945db 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -271,14 +271,37 @@ struct trace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - struct perf_event *bpf_prog_owner; + struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; +#ifdef CONFIG_PERF_EVENTS +static inline bool bpf_prog_array_valid(struct trace_event_call *call) +{ + /* + * This inline function checks whether call->prog_array + * is valid or not. The function is called in various places, + * outside rcu_read_lock/unlock, as a heuristic to speed up execution. + * + * If this function returns true, and later call->prog_array + * becomes false inside rcu_read_lock/unlock region, + * we bail out then. If this function return false, + * there is a risk that we might miss a few events if the checking + * were delayed until inside rcu_read_lock/unlock region and + * call->prog_array happened to become non-NULL then. + * + * Here, READ_ONCE() is used instead of rcu_access_pointer(). + * rcu_access_pointer() requires the actual definition of + * "struct bpf_prog_array" while READ_ONCE() only needs + * a declaration of the same type. + */ + return !!READ_ONCE(call->prog_array); +} +#endif + static inline const char * trace_event_name(struct trace_event_call *call) { @@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file) } #ifdef CONFIG_BPF_EVENTS -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); +int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); +void perf_event_detach_bpf_prog(struct perf_event *event); #else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } + +static inline int +perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } + #endif enum { @@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } + #endif #endif /* _LINUX_TRACE_EVENT_H */ diff --git a/include/trace/perf.h b/include/trace/perf.h index 04fe68bbe767..14f127b6acf5 100644 --- a/include/trace/perf.h +++ b/include/trace/perf.h @@ -34,7 +34,6 @@ perf_trace_##call(void *__data, proto) \ struct trace_event_call *event_call = __data; \ struct trace_event_data_offsets_##call __maybe_unused __data_offsets;\ struct trace_event_raw_##call *entry; \ - struct bpf_prog *prog = event_call->prog; \ struct pt_regs *__regs; \ u64 __count = 1; \ struct task_struct *__task = NULL; \ @@ -46,8 +45,9 @@ perf_trace_##call(void *__data, proto) \ __data_size = trace_event_get_offsets_##call(&__data_offsets, args); \ \ head = this_cpu_ptr(event_call->perf_events); \ - if (!prog && __builtin_constant_p(!__task) && !__task && \ - hlist_empty(head)) \ + if (!bpf_prog_array_valid(event_call) && \ + __builtin_constant_p(!__task) && !__task && \ + hlist_empty(head)) \ return; \ \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ |