summaryrefslogtreecommitdiffstats
path: root/kernel/trace
diff options
context:
space:
mode:
authorFlorent Revest <revest@chromium.org>2021-04-27 19:43:13 +0200
committerAlexei Starovoitov <ast@kernel.org>2021-04-27 15:56:31 -0700
commit48cac3f4a96ddf08df8e53809ed066de0dc93915 (patch)
tree2a9455b9328ad460e138774aaf5af191091a7605 /kernel/trace
parent76d6a13383b8e3ff20a9cf52aa9c3de39e485632 (diff)
downloadlinux-stable-48cac3f4a96ddf08df8e53809ed066de0dc93915.tar.gz
linux-stable-48cac3f4a96ddf08df8e53809ed066de0dc93915.tar.bz2
linux-stable-48cac3f4a96ddf08df8e53809ed066de0dc93915.zip
bpf: Implement formatted output helpers with bstr_printf
BPF has three formatted output helpers: bpf_trace_printk, bpf_seq_printf and bpf_snprintf. Their signatures specify that all arguments are provided from the BPF world as u64s (in an array or as registers). All of these helpers are currently implemented by calling functions such as snprintf() whose signatures take a variable number of arguments, then placed in a va_list by the compiler to call vsnprintf(). "d9c9e4db bpf: Factorize bpf_trace_printk and bpf_seq_printf" introduced a bpf_printf_prepare function that fills an array of u64 sanitized arguments with an array of "modifiers" which indicate what the "real" size of each argument should be (given by the format specifier). The BPF_CAST_FMT_ARG macro consumes these arrays and casts each argument to its real size. However, the C promotion rules implicitely cast them all back to u64s. Therefore, the arguments given to snprintf are u64s and the va_list constructed by the compiler will use 64 bits for each argument. On 64 bit machines, this happens to work well because 32 bit arguments in va_lists need to occupy 64 bits anyway, but on 32 bit architectures this breaks the layout of the va_list expected by the called function and mangles values. In "88a5c690b6 bpf: fix bpf_trace_printk on 32 bit archs", this problem had been solved for bpf_trace_printk only with a "horrid workaround" that emitted multiple calls to trace_printk where each call had different argument types and generated different va_list layouts. One of the call would be dynamically chosen at runtime. This was ok with the 3 arguments that bpf_trace_printk takes but bpf_seq_printf and bpf_snprintf accept up to 12 arguments. Because this approach scales code exponentially, it is not a viable option anymore. Because the promotion rules are part of the language and because the construction of a va_list is an arch-specific ABI, it's best to just avoid variadic arguments and va_lists altogether. Thankfully the kernel's snprintf() has an alternative in the form of bstr_printf() that accepts arguments in a "binary buffer representation". These binary buffers are currently created by vbin_printf and used in the tracing subsystem to split the cost of printing into two parts: a fast one that only dereferences and remembers values, and a slower one, called later, that does the pretty-printing. This patch refactors bpf_printf_prepare to construct binary buffers of arguments consumable by bstr_printf() instead of arrays of arguments and modifiers. This gets rid of BPF_CAST_FMT_ARG and greatly simplifies the bpf_printf_prepare usage but there are a few gotchas that change how bpf_printf_prepare needs to do things. Currently, bpf_printf_prepare uses a per cpu temporary buffer as a generic storage for strings and IP addresses. With this refactoring, the temporary buffers now holds all the arguments in a structured binary format. To comply with the format expected by bstr_printf, certain format specifiers also need to be pre-formatted: %pB and %pi6/%pi4/%pI4/%pI6. Because vsnprintf subroutines for these specifiers are hard to expose, we pre-format these arguments with calls to snprintf(). Reported-by: Rasmus Villemoes <linux@rasmusvillemoes.dk> Signed-off-by: Florent Revest <revest@chromium.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Link: https://lore.kernel.org/bpf/20210427174313.860948-3-revest@chromium.org
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/bpf_trace.c34
1 files changed, 10 insertions, 24 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0e67d12a8f40..d2d7cf6cfe83 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -381,27 +381,23 @@ BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
u64, arg2, u64, arg3)
{
u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
- enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
+ u32 *bin_args;
static char buf[BPF_TRACE_PRINTK_SIZE];
unsigned long flags;
int ret;
- ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
- MAX_TRACE_PRINTK_VARARGS);
+ ret = bpf_bprintf_prepare(fmt, fmt_size, args, &bin_args,
+ MAX_TRACE_PRINTK_VARARGS);
if (ret < 0)
return ret;
raw_spin_lock_irqsave(&trace_printk_lock, flags);
- ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
- BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
- /* snprintf() will not append null for zero-length strings */
- if (ret == 0)
- buf[0] = '\0';
+ ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
trace_bpf_trace_printk(buf);
raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
- bpf_printf_cleanup();
+ bpf_bprintf_cleanup();
return ret;
}
@@ -435,31 +431,21 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
const void *, data, u32, data_len)
{
- enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
- u64 args[MAX_SEQ_PRINTF_VARARGS];
int err, num_args;
+ u32 *bin_args;
if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
(data_len && !data))
return -EINVAL;
num_args = data_len / 8;
- err = bpf_printf_prepare(fmt, fmt_size, data, args, mod, num_args);
+ err = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
if (err < 0)
return err;
- /* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
- * all of them to seq_printf().
- */
- seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod),
- BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
- BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
- BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
- BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
- BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
- BPF_CAST_FMT_ARG(11, args, mod));
-
- bpf_printf_cleanup();
+ seq_bprintf(m, fmt, bin_args);
+
+ bpf_bprintf_cleanup();
return seq_has_overflowed(m) ? -EOVERFLOW : 0;
}