diff options
Diffstat (limited to 'arch/x86/net/bpf_jit_comp.c')
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 237 |
1 files changed, 109 insertions, 128 deletions
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 0fe6aacef3db..8796559f62a4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -15,7 +15,6 @@ #include <asm/set_memory.h> #include <asm/nospec-branch.h> #include <asm/text-patching.h> -#include <asm/asm-prototypes.h> static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -225,6 +224,14 @@ static void jit_fill_hole(void *area, unsigned int size) struct jit_context { int cleanup_addr; /* Epilogue code offset */ + + /* + * Program specific offsets of labels in the code; these rely on the + * JIT doing at least 2 passes, recording the position on the first + * pass, only to generate the correct offset on the second pass. + */ + int tail_call_direct_label; + int tail_call_indirect_label; }; /* Maximum number of bytes emitted while JITing one eBPF insn */ @@ -380,20 +387,23 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true); } -static int get_pop_bytes(bool *callee_regs_used) +#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) + +static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) { - int bytes = 0; + u8 *prog = *pprog; - if (callee_regs_used[3]) - bytes += 2; - if (callee_regs_used[2]) - bytes += 2; - if (callee_regs_used[1]) - bytes += 2; - if (callee_regs_used[0]) - bytes += 1; +#ifdef CONFIG_RETPOLINE + if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_AMD)) { + EMIT_LFENCE(); + EMIT2(0xFF, 0xE0 + reg); + } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); + } else +#endif + EMIT2(0xFF, 0xE0 + reg); - return bytes; + *pprog = prog; } /* @@ -411,29 +421,12 @@ static int get_pop_bytes(bool *callee_regs_used) * out: */ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, - u32 stack_depth) + u32 stack_depth, u8 *ip, + struct jit_context *ctx) { int tcc_off = -4 - round_up(stack_depth, 8); - u8 *prog = *pprog; - int pop_bytes = 0; - int off1 = 42; - int off2 = 31; - int off3 = 9; - - /* count the additional bytes used for popping callee regs from stack - * that need to be taken into account for each of the offsets that - * are used for bailing out of the tail call - */ - pop_bytes = get_pop_bytes(callee_regs_used); - off1 += pop_bytes; - off2 += pop_bytes; - off3 += pop_bytes; - - if (stack_depth) { - off1 += 7; - off2 += 7; - off3 += 7; - } + u8 *prog = *pprog, *start = *pprog; + int offset; /* * rdi - pointer to ctx @@ -448,8 +441,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 (off1 + RETPOLINE_RCX_BPF_JIT_SIZE) /* Number of bytes to jump */ - EMIT2(X86_JBE, OFFSET1); /* jbe out */ + + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JBE, offset); /* jbe out */ /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -457,8 +451,9 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 (off2 + RETPOLINE_RCX_BPF_JIT_SIZE) - EMIT2(X86_JA, OFFSET2); /* ja out */ + + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JA, offset); /* ja out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ @@ -471,12 +466,11 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, * goto out; */ EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */ -#define OFFSET3 (off3 + RETPOLINE_RCX_BPF_JIT_SIZE) - EMIT2(X86_JE, OFFSET3); /* je out */ - *pprog = prog; - pop_callee_regs(pprog, callee_regs_used); - prog = *pprog; + offset = ctx->tail_call_indirect_label - (prog + 2 - start); + EMIT2(X86_JE, offset); /* je out */ + + pop_callee_regs(&prog, callee_regs_used); EMIT1(0x58); /* pop rax */ if (stack_depth) @@ -493,41 +487,21 @@ static void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, * rdi == ctx (1st arg) * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET */ - RETPOLINE_RCX_BPF_JIT(); + emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); /* out: */ + ctx->tail_call_indirect_label = prog - start; *pprog = prog; } static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, - u8 **pprog, int addr, u8 *image, - bool *callee_regs_used, u32 stack_depth) + u8 **pprog, u8 *ip, + bool *callee_regs_used, u32 stack_depth, + struct jit_context *ctx) { int tcc_off = -4 - round_up(stack_depth, 8); - u8 *prog = *pprog; - int pop_bytes = 0; - int off1 = 20; - int poke_off; - - /* count the additional bytes used for popping callee regs to stack - * that need to be taken into account for jump offset that is used for - * bailing out from of the tail call when limit is reached - */ - pop_bytes = get_pop_bytes(callee_regs_used); - off1 += pop_bytes; - - /* - * total bytes for: - * - nop5/ jmpq $off - * - pop callee regs - * - sub rsp, $val if depth > 0 - * - pop rax - */ - poke_off = X86_PATCH_SIZE + pop_bytes + 1; - if (stack_depth) { - poke_off += 7; - off1 += 7; - } + u8 *prog = *pprog, *start = *pprog; + int offset; /* * if (tail_call_cnt > MAX_TAIL_CALL_CNT) @@ -535,28 +509,30 @@ static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, */ EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ - EMIT2(X86_JA, off1); /* ja out */ + + offset = ctx->tail_call_direct_label - (prog + 2 - start); + EMIT2(X86_JA, offset); /* ja out */ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ - poke->tailcall_bypass = image + (addr - poke_off - X86_PATCH_SIZE); + poke->tailcall_bypass = ip + (prog - start); poke->adj_off = X86_TAIL_CALL_OFFSET; - poke->tailcall_target = image + (addr - X86_PATCH_SIZE); + poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE; poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, poke->tailcall_bypass); - *pprog = prog; - pop_callee_regs(pprog, callee_regs_used); - prog = *pprog; + pop_callee_regs(&prog, callee_regs_used); EMIT1(0x58); /* pop rax */ if (stack_depth) EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); memcpy(prog, x86_nops[5], X86_PATCH_SIZE); prog += X86_PATCH_SIZE; + /* out: */ + ctx->tail_call_direct_label = prog - start; *pprog = prog; } @@ -827,9 +803,7 @@ static int emit_atomic(u8 **pprog, u8 atomic_op, return 0; } -static bool ex_handler_bpf(const struct exception_table_entry *x, - struct pt_regs *regs, int trapnr, - unsigned long error_code, unsigned long fault_addr) +bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) { u32 reg = x->fixup >> 8; @@ -1222,8 +1196,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* speculation barrier */ case BPF_ST | BPF_NOSPEC: if (boot_cpu_has(X86_FEATURE_XMM2)) - /* Emit 'lfence' */ - EMIT3(0x0F, 0xAE, 0xE8); + EMIT_LFENCE(); break; /* ST: *(u8*)(dst_reg + off) = imm */ @@ -1313,12 +1286,7 @@ st: if (is_imm8(insn->off)) } ex->insn = delta; - delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; - if (!is_simm32(delta)) { - pr_err("extable->handler doesn't fit into 32-bit\n"); - return -EFAULT; - } - ex->handler = delta; + ex->type = EX_TYPE_BPF; if (dst_reg > BPF_REG_9) { pr_err("verifier error\n"); @@ -1341,9 +1309,10 @@ st: if (is_imm8(insn->off)) if (insn->imm == (BPF_AND | BPF_FETCH) || insn->imm == (BPF_OR | BPF_FETCH) || insn->imm == (BPF_XOR | BPF_FETCH)) { - u8 *branch_target; bool is64 = BPF_SIZE(insn->code) == BPF_DW; u32 real_src_reg = src_reg; + u32 real_dst_reg = dst_reg; + u8 *branch_target; /* * Can't be implemented with a single x86 insn. @@ -1354,11 +1323,13 @@ st: if (is_imm8(insn->off)) emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); if (src_reg == BPF_REG_0) real_src_reg = BPF_REG_AX; + if (dst_reg == BPF_REG_0) + real_dst_reg = BPF_REG_AX; branch_target = prog; /* Load old value */ emit_ldx(&prog, BPF_SIZE(insn->code), - BPF_REG_0, dst_reg, insn->off); + BPF_REG_0, real_dst_reg, insn->off); /* * Perform the (commutative) operation locally, * put the result in the AUX_REG. @@ -1369,7 +1340,8 @@ st: if (is_imm8(insn->off)) add_2reg(0xC0, AUX_REG, real_src_reg)); /* Attempt to swap in new value */ err = emit_atomic(&prog, BPF_CMPXCHG, - dst_reg, AUX_REG, insn->off, + real_dst_reg, AUX_REG, + insn->off, BPF_SIZE(insn->code)); if (WARN_ON(err)) return err; @@ -1383,11 +1355,10 @@ st: if (is_imm8(insn->off)) /* Restore R0 after clobbering RAX */ emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); break; - } err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, - insn->off, BPF_SIZE(insn->code)); + insn->off, BPF_SIZE(insn->code)); if (err) return err; break; @@ -1409,13 +1380,16 @@ st: if (is_imm8(insn->off)) case BPF_JMP | BPF_TAIL_CALL: if (imm32) emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], - &prog, addrs[i], image, + &prog, image + addrs[i - 1], callee_regs_used, - bpf_prog->aux->stack_depth); + bpf_prog->aux->stack_depth, + ctx); else emit_bpf_tail_call_indirect(&prog, callee_regs_used, - bpf_prog->aux->stack_depth); + bpf_prog->aux->stack_depth, + image + addrs[i - 1], + ctx); break; /* cond jump */ @@ -1744,7 +1718,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size, bool mod_ret) + struct bpf_prog *p, int stack_size, bool save_ret) { u8 *prog = *pprog; u8 *jmp_insn; @@ -1777,11 +1751,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (emit_call(&prog, p->bpf_func, prog)) return -EINVAL; - /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + /* + * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return * of the previous call which is then passed on the stack to * the next BPF program. + * + * BPF_TRAMP_FENTRY trampoline may need to return the return + * value of BPF_PROG_TYPE_STRUCT_OPS prog. */ - if (mod_ret) + if (save_ret) emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); /* replace 2 nops with JE insn, since jmp target is known */ @@ -1828,13 +1806,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) } static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, - struct bpf_tramp_progs *tp, int stack_size) + struct bpf_tramp_progs *tp, int stack_size, + bool save_ret) { int i; u8 *prog = *pprog; for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, + save_ret)) return -EINVAL; } *pprog = prog; @@ -1877,6 +1857,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, return 0; } +static bool is_valid_bpf_tramp_flags(unsigned int flags) +{ + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && + (flags & BPF_TRAMP_F_SKIP_FRAME)) + return false; + + /* + * BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops, + * and it must be used alone. + */ + if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) && + (flags & ~BPF_TRAMP_F_RET_FENTRY_RET)) + return false; + + return true; +} + /* Example: * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); * its 'struct btf_func_model' will be nr_args=2 @@ -1949,17 +1946,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; u8 **branches = NULL; u8 *prog; + bool save_ret; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ if (nr_args > 6) return -ENOTSUPP; - if ((flags & BPF_TRAMP_F_RESTORE_REGS) && - (flags & BPF_TRAMP_F_SKIP_FRAME)) + if (!is_valid_bpf_tramp_flags(flags)) return -EINVAL; - if (flags & BPF_TRAMP_F_CALL_ORIG) - stack_size += 8; /* room for return value of orig_call */ + /* room for return value of orig_call or fentry prog */ + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; if (flags & BPF_TRAMP_F_IP_ARG) stack_size += 8; /* room for IP address argument */ @@ -2005,7 +2004,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fentry->nr_progs) - if (invoke_bpf(m, &prog, fentry, stack_size)) + if (invoke_bpf(m, &prog, fentry, stack_size, + flags & BPF_TRAMP_F_RET_FENTRY_RET)) return -EINVAL; if (fmod_ret->nr_progs) { @@ -2052,7 +2052,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, stack_size)) { + if (invoke_bpf(m, &prog, fexit, stack_size, false)) { ret = -EINVAL; goto cleanup; } @@ -2072,9 +2072,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ret = -EINVAL; goto cleanup; } - /* restore original return value back into RAX */ - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); } + /* restore return value of orig_call or fentry prog back into RAX */ + if (save_ret) + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); EMIT1(0x5B); /* pop rbx */ EMIT1(0xC9); /* leave */ @@ -2094,24 +2095,6 @@ cleanup: return ret; } -static int emit_fallback_jump(u8 **pprog) -{ - u8 *prog = *pprog; - int err = 0; - -#ifdef CONFIG_RETPOLINE - /* Note that this assumes the the compiler uses external - * thunks for indirect calls. Both clang and GCC use the same - * naming convention for external thunks. - */ - err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); -#else - EMIT2(0xFF, 0xE2); /* jmp rdx */ -#endif - *pprog = prog; - return err; -} - static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) { u8 *jg_reloc, *prog = *pprog; @@ -2133,9 +2116,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) if (err) return err; - err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ - if (err) - return err; + emit_indirect_jump(&prog, 2 /* rdx */, prog); *pprog = prog; return 0; |