diff options
author | Alexei Starovoitov <ast@kernel.org> | 2019-02-25 14:28:39 -0800 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2019-02-27 17:22:50 +0100 |
commit | 492ecee892c2a4ba6a14903d5d586ff750b7e805 (patch) | |
tree | 6161a74e75b41fdb94944cd5451e8589da3cab7c /kernel/sysctl.c | |
parent | 143bdc2e27b44d2559596424bfb017d578be33eb (diff) | |
download | linux-492ecee892c2a4ba6a14903d5d586ff750b7e805.tar.gz linux-492ecee892c2a4ba6a14903d5d586ff750b7e805.tar.bz2 linux-492ecee892c2a4ba6a14903d5d586ff750b7e805.zip |
bpf: enable program stats
JITed BPF programs are indistinguishable from kernel functions, but unlike
kernel code BPF code can be changed often.
Typical approach of "perf record" + "perf report" profiling and tuning of
kernel code works just as well for BPF programs, but kernel code doesn't
need to be monitored whereas BPF programs do.
Users load and run large amount of BPF programs.
These BPF stats allow tools monitor the usage of BPF on the server.
The monitoring tools will turn sysctl kernel.bpf_stats_enabled
on and off for few seconds to sample average cost of the programs.
Aggregated data over hours and days will provide an insight into cost of BPF
and alarms can trigger in case given program suddenly gets more expensive.
The cost of two sched_clock() per program invocation adds ~20 nsec.
Fast BPF progs (like selftests/bpf/progs/test_pkt_access.c) will slow down
from ~10 nsec to ~30 nsec.
static_key minimizes the cost of the stats collection.
There is no measurable difference before/after this patch
with kernel.bpf_stats_enabled=0
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'kernel/sysctl.c')
-rw-r--r-- | kernel/sysctl.c | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ba4d9e85feb8..86e0771352f2 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -224,6 +224,9 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, #endif static int proc_dopipe_max_size(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); #ifdef CONFIG_MAGIC_SYSRQ /* Note: sysrq code uses its own private copy */ @@ -1230,6 +1233,15 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, #endif + { + .procname = "bpf_stats_enabled", + .data = &sysctl_bpf_stats_enabled, + .maxlen = sizeof(sysctl_bpf_stats_enabled), + .mode = 0644, + .proc_handler = proc_dointvec_minmax_bpf_stats, + .extra1 = &zero, + .extra2 = &one, + }, #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) { .procname = "panic_on_rcu_stall", @@ -3260,6 +3272,28 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, #endif /* CONFIG_PROC_SYSCTL */ +static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int ret, bpf_stats = *(int *)table->data; + struct ctl_table tmp = *table; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + tmp.data = &bpf_stats; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) { + *(int *)table->data = bpf_stats; + if (bpf_stats) + static_branch_enable(&bpf_stats_enabled_key); + else + static_branch_disable(&bpf_stats_enabled_key); + } + return ret; +} + /* * No sense putting this after each symbol definition, twice, * exception granted :-) |