diff options
Diffstat (limited to 'arch/arm64/net/bpf_jit_comp.c')
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 715 |
1 files changed, 698 insertions, 17 deletions
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index f08a4447d363..7ca8779ae34f 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/memory.h> #include <linux/printk.h> #include <linux/slab.h> @@ -18,6 +19,7 @@ #include <asm/cacheflush.h> #include <asm/debug-monitors.h> #include <asm/insn.h> +#include <asm/patching.h> #include <asm/set_memory.h> #include "bpf_jit.h" @@ -78,6 +80,15 @@ struct jit_ctx { int fpb_offset; }; +struct bpf_plt { + u32 insn_ldr; /* load target */ + u32 insn_br; /* branch to target */ + u64 target; /* target value */ +}; + +#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) +#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) + static inline void emit(const u32 insn, struct jit_ctx *ctx) { if (ctx->image != NULL) @@ -140,6 +151,12 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val, } } +static inline void emit_bti(u32 insn, struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(insn, ctx); +} + /* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address @@ -159,6 +176,14 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } } +static inline void emit_call(u64 target, struct jit_ctx *ctx) +{ + u8 tmp = bpf2a64[TMP_REG_1]; + + emit_addr_mov_i64(tmp, target, ctx); + emit(A64_BLR(tmp), ctx); +} + static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { @@ -235,13 +260,30 @@ static bool is_lsi_offset(int offset, int scale) return true; } +/* generated prologue: + * bti c // if CONFIG_ARM64_BTI_KERNEL + * mov x9, lr + * nop // POKE_OFFSET + * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL + * stp x29, lr, [sp, #-16]! + * mov x29, sp + * stp x19, x20, [sp, #-16]! + * stp x21, x22, [sp, #-16]! + * stp x25, x26, [sp, #-16]! + * stp x27, x28, [sp, #-16]! + * mov x25, sp + * mov tcc, #0 + * // PROLOGUE_OFFSET + */ + +#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) +#define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) + +/* Offset of nop instruction in bpf prog entry to be poked */ +#define POKE_OFFSET (BTI_INSNS + 1) + /* Tail call offset to jump into */ -#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || \ - IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) -#define PROLOGUE_OFFSET 9 -#else -#define PROLOGUE_OFFSET 8 -#endif +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { @@ -280,12 +322,14 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + emit_bti(A64_BTI_C, ctx); + + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx); + /* Sign lr */ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) emit(A64_PACIASP, ctx); - /* BTI landing pad */ - else if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_C, ctx); /* Save FP and LR registers to stay align with ARM64 AAPCS */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); @@ -312,8 +356,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) } /* BTI landing pad for the tail call, done with a BR */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_J, ctx); + emit_bti(A64_BTI_J, ctx); } emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); @@ -557,6 +600,53 @@ static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) return 0; } +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .global dummy_tramp\n" +" .type dummy_tramp, %function\n" +"dummy_tramp:" +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +" bti j\n" /* dummy_tramp is called via "br x10" */ +#endif +" mov x10, x30\n" +" mov x30, x9\n" +" ret x10\n" +" .size dummy_tramp, .-dummy_tramp\n" +" .popsection\n" +); + +/* build a plt initialized like this: + * + * plt: + * ldr tmp, target + * br tmp + * target: + * .quad dummy_tramp + * + * when a long jump trampoline is attached, target is filled with the + * trampoline address, and when the trampoline is removed, target is + * restored to dummy_tramp address. + */ +static void build_plt(struct jit_ctx *ctx) +{ + const u8 tmp = bpf2a64[TMP_REG_1]; + struct bpf_plt *plt = NULL; + + /* make sure target is 64-bit aligned */ + if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) + emit(A64_NOP, ctx); + + plt = (struct bpf_plt *)(ctx->image + ctx->idx); + /* plt is called via bl, no BTI needed here */ + emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); + emit(A64_BR(tmp), ctx); + + if (ctx->image) + plt->target = (u64)&dummy_tramp; +} + static void build_epilogue(struct jit_ctx *ctx) { const u8 r0 = bpf2a64[BPF_REG_0]; @@ -991,8 +1081,7 @@ emit_cond_jmp: &func_addr, &func_addr_fixed); if (ret < 0) return ret; - emit_addr_mov_i64(tmp, func_addr, ctx); - emit(A64_BLR(tmp), ctx); + emit_call(func_addr, ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; } @@ -1336,6 +1425,13 @@ static int validate_code(struct jit_ctx *ctx) if (a64_insn == AARCH64_BREAK_FAULT) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1; if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) return -1; @@ -1356,7 +1452,7 @@ struct arm64_jit_data { struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { - int image_size, prog_size, extable_size; + int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; @@ -1426,13 +1522,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + build_plt(&ctx); + extable_align = __alignof__(struct exception_table_entry); extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry); /* Now we know the actual image size. */ prog_size = sizeof(u32) * ctx.idx; - image_size = prog_size + extable_size; + /* also allocate space for plt target */ + extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); + image_size = extable_offset + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -1444,7 +1544,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.image = (__le32 *)image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + prog_size; + prog->aux->extable = (void *)image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; @@ -1458,9 +1558,10 @@ skip_init_ctx: } build_epilogue(&ctx); + build_plt(&ctx); /* 3. Extra pass to validate JITed code. */ - if (validate_code(&ctx)) { + if (validate_ctx(&ctx)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_off; @@ -1537,3 +1638,583 @@ bool bpf_jit_supports_subprog_tailcalls(void) { return true; } + +static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, + int args_off, int retval_off, int run_ctx_off, + bool save_ret) +{ + u32 *branch; + u64 enter_prog; + u64 exit_prog; + struct bpf_prog *p = l->link.prog; + int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); + + if (p->aux->sleepable) { + enter_prog = (u64)__bpf_prog_enter_sleepable; + exit_prog = (u64)__bpf_prog_exit_sleepable; + } else { + enter_prog = (u64)__bpf_prog_enter; + exit_prog = (u64)__bpf_prog_exit; + } + + if (l->cookie == 0) { + /* if cookie is zero, one instruction is enough to store it */ + emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); + } else { + emit_a64_mov_i64(A64_R(10), l->cookie, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), + ctx); + } + + /* save p to callee saved register x19 to avoid loading p with mov_i64 + * each time. + */ + emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: &run_ctx */ + emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); + + emit_call(enter_prog, ctx); + + /* if (__bpf_prog_enter(prog) == 0) + * goto skip_exec_of_prog; + */ + branch = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + + /* save return value to callee saved register x20 */ + emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); + + emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + if (!p->jited) + emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); + + emit_call((const u64)p->bpf_func, ctx); + + if (save_ret) + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + + if (ctx->image) { + int offset = &ctx->image[ctx->idx] - branch; + *branch = A64_CBZ(1, A64_R(0), offset); + } + + /* arg1: prog */ + emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); + /* arg2: start time */ + emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); + /* arg3: &run_ctx */ + emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); + + emit_call(exit_prog, ctx); +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, + int args_off, int retval_off, int run_ctx_off, + u32 **branches) +{ + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); + for (i = 0; i < tl->nr_links; i++) { + invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, + run_ctx_off, true); + /* if (*(u64 *)(sp + retval_off) != 0) + * goto do_fexit; + */ + emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); + /* Save the location of branch, and generate a nop. + * This nop will be replaced with a cbnz later. + */ + branches[i] = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } +} + +static void save_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_STR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_LDR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +/* Based on the x86's implementation of arch_prepare_bpf_trampoline(). + * + * bpf prog and function entry before bpf trampoline hooked: + * mov x9, lr + * nop + * + * bpf prog and function entry after bpf trampoline hooked: + * mov x9, lr + * bl <bpf_trampoline or plt> + * + */ +static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + struct bpf_tramp_links *tlinks, void *orig_call, + int nargs, u32 flags) +{ + int i; + int stack_size; + int retaddr_off; + int regs_off; + int retval_off; + int args_off; + int nargs_off; + int ip_off; + int run_ctx_off; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + bool save_ret; + u32 **branches = NULL; + + /* trampoline stack layout: + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] + * + * [ padding ] align SP to multiples of 16 + * + * [ x20 ] callee saved reg x20 + * SP + regs_off [ x19 ] callee saved reg x19 + * + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * + * [ argN ] + * [ ... ] + * SP + args_off [ arg1 ] + * + * SP + nargs_off [ args count ] + * + * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag + * + * SP + run_ctx_off [ bpf_tramp_run_ctx ] + */ + + stack_size = 0; + run_ctx_off = stack_size; + /* room for bpf_tramp_run_ctx */ + stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); + + ip_off = stack_size; + /* room for IP address argument */ + if (flags & BPF_TRAMP_F_IP_ARG) + stack_size += 8; + + nargs_off = stack_size; + /* room for args count */ + stack_size += 8; + + args_off = stack_size; + /* room for args */ + stack_size += nargs * 8; + + /* room for return value */ + retval_off = stack_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; + + /* room for callee saved registers, currently x19 and x20 are used */ + regs_off = stack_size; + stack_size += 16; + + /* round up to multiples of 16 to avoid SPAlignmentFault */ + stack_size = round_up(stack_size, 16); + + /* return address locates above FP */ + retaddr_off = stack_size + 8; + + /* bpf trampoline may be invoked by 3 instruction types: + * 1. bl, attached to bpf prog or kernel function via short jump + * 2. br, attached to bpf prog or kernel function via long jump + * 3. blr, working as a function pointer, used by struct_ops. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + /* frame for parent function */ + emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* frame for patched function */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* allocate stack space */ + emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + + if (flags & BPF_TRAMP_F_IP_ARG) { + /* save ip address of the traced function */ + emit_addr_mov_i64(A64_R(10), (const u64)orig_call, ctx); + emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); + } + + /* save args count*/ + emit(A64_MOVZ(1, A64_R(10), nargs, 0), ctx); + emit(A64_STR64I(A64_R(10), A64_SP, nargs_off), ctx); + + /* save args */ + save_args(ctx, args_off, nargs); + + /* save callee saved registers */ + emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_enter, ctx); + } + + for (i = 0; i < fentry->nr_links; i++) + invoke_bpf_prog(ctx, fentry->links[i], args_off, + retval_off, run_ctx_off, + flags & BPF_TRAMP_F_RET_FENTRY_RET); + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, + run_ctx_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, args_off, nargs); + /* call original func */ + emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); + emit(A64_BLR(A64_R(10)), ctx); + /* store return value */ + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + /* reserve a nop for bpf_tramp_image_put */ + im->ip_after_call = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } + + /* update the branches saved in invoke_bpf_mod_ret with cbnz */ + for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { + int offset = &ctx->image[ctx->idx] - branches[i]; + *branches[i] = A64_CBNZ(1, A64_R(10), offset); + } + + for (i = 0; i < fexit->nr_links; i++) + invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, + run_ctx_off, false); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->image + ctx->idx; + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_exit, ctx); + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, args_off, nargs); + + /* restore callee saved register x19 and x20 */ + emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); + emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); + + if (save_ret) + emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); + + /* reset SP */ + emit(A64_MOV(1, A64_SP, A64_FP), ctx); + + /* pop frames */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* skip patched function, return to parent */ + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(9)), ctx); + } else { + /* return to patched function */ + emit(A64_MOV(1, A64_R(10), A64_LR), ctx); + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(10)), ctx); + } + + if (ctx->image) + bpf_flush_icache(ctx->image, ctx->image + ctx->idx); + + kfree(branches); + + return ctx->idx; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, + void *image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_links *tlinks, + void *orig_call) +{ + int ret; + int nargs = m->nr_args; + int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; + struct jit_ctx ctx = { + .image = NULL, + .idx = 0, + }; + + /* the first 8 arguments are passed by registers */ + if (nargs > 8) + return -ENOTSUPP; + + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + if (ret < 0) + return ret; + + if (ret > max_insns) + return -EFBIG; + + ctx.image = image; + ctx.idx = 0; + + jit_fill_hole(image, (unsigned int)(image_end - image)); + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); + + if (ret > 0 && validate_code(&ctx) < 0) + ret = -EINVAL; + + if (ret > 0) + ret *= AARCH64_INSN_SIZE; + + return ret; +} + +static bool is_long_jump(void *ip, void *target) +{ + long offset; + + /* NULL target means this is a NOP */ + if (!target) + return false; + + offset = (long)target - (long)ip; + return offset < -SZ_128M || offset >= SZ_128M; +} + +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, + void *addr, void *plt, u32 *insn) +{ + void *target; + + if (!addr) { + *insn = aarch64_insn_gen_nop(); + return 0; + } + + if (is_long_jump(ip, addr)) + target = plt; + else + target = addr; + + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, + (unsigned long)target, + type); + + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; +} + +/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf + * trampoline with the branch instruction from @ip to @new_addr. If @old_addr + * or @new_addr is NULL, the old or new instruction is NOP. + * + * When @ip is the bpf prog entry, a bpf trampoline is being attached or + * detached. Since bpf trampoline and bpf prog are allocated separately with + * vmalloc, the address distance may exceed 128MB, the maximum branch range. + * So long jump should be handled. + * + * When a bpf prog is constructed, a plt pointing to empty trampoline + * dummy_tramp is placed at the end: + * + * bpf_prog: + * mov x9, lr + * nop // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * This is also the state when no trampoline is attached. + * + * When a short-jump bpf trampoline is attached, the patchsite is patched + * to a bl instruction to the trampoline directly: + * + * bpf_prog: + * mov x9, lr + * bl <short-jump bpf trampoline address> // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * When a long-jump bpf trampoline is attached, the plt target is filled with + * the trampoline address and the patchsite is patched to a bl instruction to + * the plt: + * + * bpf_prog: + * mov x9, lr + * bl plt // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad <long-jump bpf trampoline address> // plt target + * + * The dummy_tramp is used to prevent another CPU from jumping to unknown + * locations during the patching process, making the patching process easier. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + u32 old_insn; + u32 new_insn; + u32 replaced; + struct bpf_plt *plt = NULL; + unsigned long size = 0UL; + unsigned long offset = ~0UL; + enum aarch64_insn_branch_type branch_type; + char namebuf[KSYM_NAME_LEN]; + void *image = NULL; + u64 plt_target = 0ULL; + bool poking_bpf_entry; + + if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + /* Only poking bpf text is supported. Since kernel function + * entry is set up by ftrace, we reply on ftrace to poke kernel + * functions. + */ + return -ENOTSUPP; + + image = ip - offset; + /* zero offset means we're poking bpf prog entry */ + poking_bpf_entry = (offset == 0UL); + + /* bpf prog entry, find plt and the real patchsite */ + if (poking_bpf_entry) { + /* plt locates at the end of bpf prog */ + plt = image + size - PLT_TARGET_OFFSET; + + /* skip to the nop instruction in bpf prog entry: + * bti c // if BTI enabled + * mov x9, x30 + * nop + */ + ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; + } + + /* long jump is only possible at bpf prog entry */ + if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && + !poking_bpf_entry)) + return -EINVAL; + + if (poke_type == BPF_MOD_CALL) + branch_type = AARCH64_INSN_BRANCH_LINK; + else + branch_type = AARCH64_INSN_BRANCH_NOLINK; + + if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) + return -EFAULT; + + if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) + return -EFAULT; + + if (is_long_jump(ip, new_addr)) + plt_target = (u64)new_addr; + else if (is_long_jump(ip, old_addr)) + /* if the old target is a long jump and the new target is not, + * restore the plt target to dummy_tramp, so there is always a + * legal and harmless address stored in plt target, and we'll + * never jump from plt to an unknown place. + */ + plt_target = (u64)&dummy_tramp; + + if (plt_target) { + /* non-zero plt_target indicates we're patching a bpf prog, + * which is read only. + */ + if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) + return -EFAULT; + WRITE_ONCE(plt->target, plt_target); + set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); + /* since plt target points to either the new trampoline + * or dummy_tramp, even if another CPU reads the old plt + * target value before fetching the bl instruction to plt, + * it will be brought back by dummy_tramp, so no barrier is + * required here. + */ + } + + /* if the old target and the new target are both long jumps, no + * patching is required + */ + if (old_insn == new_insn) + return 0; + + mutex_lock(&text_mutex); + if (aarch64_insn_read(ip, &replaced)) { + ret = -EFAULT; + goto out; + } + + if (replaced != old_insn) { + ret = -EFAULT; + goto out; + } + + /* We call aarch64_insn_patch_text_nosync() to replace instruction + * atomically, so no other CPUs will fetch a half-new and half-old + * instruction. But there is chance that another CPU executes the + * old instruction after the patching operation finishes (e.g., + * pipeline not flushed, or icache not synchronized yet). + * + * 1. when a new trampoline is attached, it is not a problem for + * different CPUs to jump to different trampolines temporarily. + * + * 2. when an old trampoline is freed, we should wait for all other + * CPUs to exit the trampoline and make sure the trampoline is no + * longer reachable, since bpf_tramp_image_put() function already + * uses percpu_ref and task-based rcu to do the sync, no need to call + * the sync version here, see bpf_tramp_image_put() for details. + */ + ret = aarch64_insn_patch_text_nosync(ip, new_insn); +out: + mutex_unlock(&text_mutex); + + return ret; +} |