diff options
-rw-r--r-- | arch/Kconfig | 8 | ||||
-rw-r--r-- | arch/x86/Kconfig | 1 | ||||
-rw-r--r-- | arch/x86/include/asm/unwind.h | 23 | ||||
-rw-r--r-- | arch/x86/kernel/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/common.h | 1 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 107 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/opt.c | 25 | ||||
-rw-r--r-- | arch/x86/kernel/rethook.c | 127 | ||||
-rw-r--r-- | arch/x86/kernel/unwind_orc.c | 10 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/intel/ice/ice_xsk.c | 5 | ||||
-rw-r--r-- | include/linux/kprobes.h | 51 | ||||
-rw-r--r-- | kernel/Makefile | 1 | ||||
-rw-r--r-- | kernel/bpf/btf.c | 2 | ||||
-rw-r--r-- | kernel/kprobes.c | 124 | ||||
-rw-r--r-- | kernel/trace/fprobe.c | 8 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 4 | ||||
-rw-r--r-- | net/xdp/xsk_buff_pool.c | 8 | ||||
-rw-r--r-- | tools/bpf/bpftool/feature.c | 5 | ||||
-rw-r--r-- | tools/bpf/bpftool/gen.c | 2 | ||||
-rw-r--r-- | tools/include/uapi/linux/bpf.h | 8 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 3 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c | 12 | ||||
-rw-r--r-- | tools/testing/selftests/bpf/test_lpm_map.c | 3 |
24 files changed, 354 insertions, 187 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 84bc1de02720..33e06966f248 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -164,7 +164,13 @@ config ARCH_USE_BUILTIN_BSWAP config KRETPROBES def_bool y - depends on KPROBES && HAVE_KRETPROBES + depends on KPROBES && (HAVE_KRETPROBES || HAVE_RETHOOK) + +config KRETPROBE_ON_RETHOOK + def_bool y + depends on HAVE_RETHOOK + depends on KRETPROBES + select RETHOOK config USER_RETURN_NOTIFIER bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7340d9f01b62..ff45a27fc29c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -224,6 +224,7 @@ config X86 select HAVE_KPROBES_ON_FTRACE select HAVE_FUNCTION_ERROR_INJECTION select HAVE_KRETPROBES + select HAVE_RETHOOK select HAVE_KVM select HAVE_LIVEPATCH if X86_64 select HAVE_MIXED_BREAKPOINTS_REGS diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 2a1f8734416d..7cede4dc21f0 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -4,7 +4,7 @@ #include <linux/sched.h> #include <linux/ftrace.h> -#include <linux/kprobes.h> +#include <linux/rethook.h> #include <asm/ptrace.h> #include <asm/stacktrace.h> @@ -16,7 +16,7 @@ struct unwind_state { unsigned long stack_mask; struct task_struct *task; int graph_idx; -#ifdef CONFIG_KRETPROBES +#if defined(CONFIG_RETHOOK) struct llist_node *kr_cur; #endif bool error; @@ -104,19 +104,18 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, #endif static inline -unsigned long unwind_recover_kretprobe(struct unwind_state *state, - unsigned long addr, unsigned long *addr_p) +unsigned long unwind_recover_rethook(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) { -#ifdef CONFIG_KRETPROBES - return is_kretprobe_trampoline(addr) ? - kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) : - addr; -#else - return addr; +#ifdef CONFIG_RETHOOK + if (is_rethook_trampoline(addr)) + return rethook_find_ret_addr(state->task, (unsigned long)addr_p, + &state->kr_cur); #endif + return addr; } -/* Recover the return address modified by kretprobe and ftrace_graph. */ +/* Recover the return address modified by rethook and ftrace_graph. */ static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, unsigned long addr, unsigned long *addr_p) @@ -125,7 +124,7 @@ unsigned long unwind_recover_ret_addr(struct unwind_state *state, ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, addr, addr_p); - return unwind_recover_kretprobe(state, ret, addr_p); + return unwind_recover_rethook(state, ret, addr_p); } /* diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6462e3dd98f4..c41ef42adbe8 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -103,6 +103,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o obj-$(CONFIG_X86_TSC) += trace_clock.o obj-$(CONFIG_TRACING) += trace.o +obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_CRASH_CORE) += crash_core_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += machine_kexec_$(BITS).o obj-$(CONFIG_KEXEC_CORE) += relocate_kernel_$(BITS).o crash.o diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index 7d3a2e2daf01..c993521d4933 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -6,6 +6,7 @@ #include <asm/asm.h> #include <asm/frame.h> +#include <asm/insn.h> #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 8ef933c03afa..7c4ab8870da4 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -811,18 +811,6 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs, = (regs->flags & X86_EFLAGS_IF); } -void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) -{ - unsigned long *sara = stack_addr(regs); - - ri->ret_addr = (kprobe_opcode_t *) *sara; - ri->fp = sara; - - /* Replace the return addr with trampoline addr */ - *sara = (unsigned long) &__kretprobe_trampoline; -} -NOKPROBE_SYMBOL(arch_prepare_kretprobe); - static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { @@ -1023,101 +1011,6 @@ int kprobe_int3_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(kprobe_int3_handler); -/* - * When a retprobed function returns, this code saves registers and - * calls trampoline_handler() runs, which calls the kretprobe's handler. - */ -asm( - ".text\n" - ".global __kretprobe_trampoline\n" - ".type __kretprobe_trampoline, @function\n" - "__kretprobe_trampoline:\n" -#ifdef CONFIG_X86_64 - ANNOTATE_NOENDBR - /* Push a fake return address to tell the unwinder it's a kretprobe. */ - " pushq $__kretprobe_trampoline\n" - UNWIND_HINT_FUNC - /* Save the 'sp - 8', this will be fixed later. */ - " pushq %rsp\n" - " pushfq\n" - SAVE_REGS_STRING - " movq %rsp, %rdi\n" - " call trampoline_handler\n" - RESTORE_REGS_STRING - /* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */ - " addq $8, %rsp\n" - " popfq\n" -#else - /* Push a fake return address to tell the unwinder it's a kretprobe. */ - " pushl $__kretprobe_trampoline\n" - UNWIND_HINT_FUNC - /* Save the 'sp - 4', this will be fixed later. */ - " pushl %esp\n" - " pushfl\n" - SAVE_REGS_STRING - " movl %esp, %eax\n" - " call trampoline_handler\n" - RESTORE_REGS_STRING - /* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */ - " addl $4, %esp\n" - " popfl\n" -#endif - ASM_RET - ".size __kretprobe_trampoline, .-__kretprobe_trampoline\n" -); -NOKPROBE_SYMBOL(__kretprobe_trampoline); -/* - * __kretprobe_trampoline() skips updating frame pointer. The frame pointer - * saved in trampoline_handler() points to the real caller function's - * frame pointer. Thus the __kretprobe_trampoline() doesn't have a - * standard stack frame with CONFIG_FRAME_POINTER=y. - * Let's mark it non-standard function. Anyway, FP unwinder can correctly - * unwind without the hint. - */ -STACK_FRAME_NON_STANDARD_FP(__kretprobe_trampoline); - -/* This is called from kretprobe_trampoline_handler(). */ -void arch_kretprobe_fixup_return(struct pt_regs *regs, - kprobe_opcode_t *correct_ret_addr) -{ - unsigned long *frame_pointer = ®s->sp + 1; - - /* Replace fake return address with real one. */ - *frame_pointer = (unsigned long)correct_ret_addr; -} - -/* - * Called from __kretprobe_trampoline - */ -__used __visible void trampoline_handler(struct pt_regs *regs) -{ - unsigned long *frame_pointer; - - /* fixup registers */ - regs->cs = __KERNEL_CS; -#ifdef CONFIG_X86_32 - regs->gs = 0; -#endif - regs->ip = (unsigned long)&__kretprobe_trampoline; - regs->orig_ax = ~0UL; - regs->sp += sizeof(long); - frame_pointer = ®s->sp + 1; - - /* - * The return address at 'frame_pointer' is recovered by the - * arch_kretprobe_fixup_return() which called from the - * kretprobe_trampoline_handler(). - */ - kretprobe_trampoline_handler(regs, frame_pointer); - - /* - * Copy FLAGS to 'pt_regs::sp' so that __kretprobe_trapmoline() - * can do RET right after POPF. - */ - regs->sp = regs->flags; -} -NOKPROBE_SYMBOL(trampoline_handler); - int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index b4a54a52aa59..e6b8c5362b94 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -106,7 +106,8 @@ asm ( ".global optprobe_template_entry\n" "optprobe_template_entry:\n" #ifdef CONFIG_X86_64 - /* We don't bother saving the ss register */ + " pushq $" __stringify(__KERNEL_DS) "\n" + /* Save the 'sp - 8', this will be fixed later. */ " pushq %rsp\n" " pushfq\n" ".global optprobe_template_clac\n" @@ -121,14 +122,17 @@ asm ( ".global optprobe_template_call\n" "optprobe_template_call:\n" ASM_NOP5 - /* Move flags to rsp */ + /* Copy 'regs->flags' into 'regs->ss'. */ " movq 18*8(%rsp), %rdx\n" - " movq %rdx, 19*8(%rsp)\n" + " movq %rdx, 20*8(%rsp)\n" RESTORE_REGS_STRING - /* Skip flags entry */ - " addq $8, %rsp\n" + /* Skip 'regs->flags' and 'regs->sp'. */ + " addq $16, %rsp\n" + /* And pop flags register from 'regs->ss'. */ " popfq\n" #else /* CONFIG_X86_32 */ + " pushl %ss\n" + /* Save the 'sp - 4', this will be fixed later. */ " pushl %esp\n" " pushfl\n" ".global optprobe_template_clac\n" @@ -142,12 +146,13 @@ asm ( ".global optprobe_template_call\n" "optprobe_template_call:\n" ASM_NOP5 - /* Move flags into esp */ + /* Copy 'regs->flags' into 'regs->ss'. */ " movl 14*4(%esp), %edx\n" - " movl %edx, 15*4(%esp)\n" + " movl %edx, 16*4(%esp)\n" RESTORE_REGS_STRING - /* Skip flags entry */ - " addl $4, %esp\n" + /* Skip 'regs->flags' and 'regs->sp'. */ + " addl $8, %esp\n" + /* And pop flags register from 'regs->ss'. */ " popfl\n" #endif ".global optprobe_template_end\n" @@ -179,6 +184,8 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) kprobes_inc_nmissed_count(&op->kp); } else { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + /* Adjust stack pointer */ + regs->sp += sizeof(long); /* Save skipped registers */ regs->cs = __KERNEL_CS; #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/rethook.c b/arch/x86/kernel/rethook.c new file mode 100644 index 000000000000..8a1c0111ae79 --- /dev/null +++ b/arch/x86/kernel/rethook.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * x86 implementation of rethook. Mostly copied from arch/x86/kernel/kprobes/core.c. + */ +#include <linux/bug.h> +#include <linux/rethook.h> +#include <linux/kprobes.h> +#include <linux/objtool.h> + +#include "kprobes/common.h" + +__visible void arch_rethook_trampoline_callback(struct pt_regs *regs); + +#ifndef ANNOTATE_NOENDBR +#define ANNOTATE_NOENDBR +#endif + +/* + * When a target function returns, this code saves registers and calls + * arch_rethook_trampoline_callback(), which calls the rethook handler. + */ +asm( + ".text\n" + ".global arch_rethook_trampoline\n" + ".type arch_rethook_trampoline, @function\n" + "arch_rethook_trampoline:\n" +#ifdef CONFIG_X86_64 + ANNOTATE_NOENDBR /* This is only jumped from ret instruction */ + /* Push a fake return address to tell the unwinder it's a rethook. */ + " pushq $arch_rethook_trampoline\n" + UNWIND_HINT_FUNC + " pushq $" __stringify(__KERNEL_DS) "\n" + /* Save the 'sp - 16', this will be fixed later. */ + " pushq %rsp\n" + " pushfq\n" + SAVE_REGS_STRING + " movq %rsp, %rdi\n" + " call arch_rethook_trampoline_callback\n" + RESTORE_REGS_STRING + /* In the callback function, 'regs->flags' is copied to 'regs->ss'. */ + " addq $16, %rsp\n" + " popfq\n" +#else + /* Push a fake return address to tell the unwinder it's a rethook. */ + " pushl $arch_rethook_trampoline\n" + UNWIND_HINT_FUNC + " pushl %ss\n" + /* Save the 'sp - 8', this will be fixed later. */ + " pushl %esp\n" + " pushfl\n" + SAVE_REGS_STRING + " movl %esp, %eax\n" + " call arch_rethook_trampoline_callback\n" + RESTORE_REGS_STRING + /* In the callback function, 'regs->flags' is copied to 'regs->ss'. */ + " addl $8, %esp\n" + " popfl\n" +#endif + ASM_RET + ".size arch_rethook_trampoline, .-arch_rethook_trampoline\n" +); +NOKPROBE_SYMBOL(arch_rethook_trampoline); + +/* + * Called from arch_rethook_trampoline + */ +__used __visible void arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + unsigned long *frame_pointer; + + /* fixup registers */ + regs->cs = __KERNEL_CS; +#ifdef CONFIG_X86_32 + regs->gs = 0; +#endif + regs->ip = (unsigned long)&arch_rethook_trampoline; + regs->orig_ax = ~0UL; + regs->sp += 2*sizeof(long); + frame_pointer = (long *)(regs + 1); + + /* + * The return address at 'frame_pointer' is recovered by the + * arch_rethook_fixup_return() which called from this + * rethook_trampoline_handler(). + */ + rethook_trampoline_handler(regs, (unsigned long)frame_pointer); + + /* + * Copy FLAGS to 'pt_regs::ss' so that arch_rethook_trapmoline() + * can do RET right after POPF. + */ + *(unsigned long *)®s->ss = regs->flags; +} +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +/* + * arch_rethook_trampoline() skips updating frame pointer. The frame pointer + * saved in arch_rethook_trampoline_callback() points to the real caller + * function's frame pointer. Thus the arch_rethook_trampoline() doesn't have + * a standard stack frame with CONFIG_FRAME_POINTER=y. + * Let's mark it non-standard function. Anyway, FP unwinder can correctly + * unwind without the hint. + */ +STACK_FRAME_NON_STANDARD_FP(arch_rethook_trampoline); + +/* This is called from rethook_trampoline_handler(). */ +void arch_rethook_fixup_return(struct pt_regs *regs, + unsigned long correct_ret_addr) +{ + unsigned long *frame_pointer = (void *)(regs + 1); + + /* Replace fake return address with real one. */ + *frame_pointer = correct_ret_addr; +} +NOKPROBE_SYMBOL(arch_rethook_fixup_return); + +void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount) +{ + unsigned long *stack = (unsigned long *)regs->sp; + + rh->ret_addr = stack[0]; + rh->frame = regs->sp; + + /* Replace the return addr with trampoline addr */ + stack[0] = (unsigned long) arch_rethook_trampoline; +} +NOKPROBE_SYMBOL(arch_rethook_prepare); diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 2de3c8c5eba9..794fdef2501a 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -550,15 +550,15 @@ bool unwind_next_frame(struct unwind_state *state) } /* * There is a small chance to interrupt at the entry of - * __kretprobe_trampoline() where the ORC info doesn't exist. - * That point is right after the RET to __kretprobe_trampoline() + * arch_rethook_trampoline() where the ORC info doesn't exist. + * That point is right after the RET to arch_rethook_trampoline() * which was modified return address. - * At that point, the @addr_p of the unwind_recover_kretprobe() + * At that point, the @addr_p of the unwind_recover_rethook() * (this has to point the address of the stack entry storing * the modified return address) must be "SP - (a stack entry)" * because SP is incremented by the RET. */ - state->ip = unwind_recover_kretprobe(state, state->ip, + state->ip = unwind_recover_rethook(state, state->ip, (unsigned long *)(state->sp - sizeof(long))); state->regs = (struct pt_regs *)sp; state->prev_regs = NULL; @@ -573,7 +573,7 @@ bool unwind_next_frame(struct unwind_state *state) goto err; } /* See UNWIND_HINT_TYPE_REGS case comment. */ - state->ip = unwind_recover_kretprobe(state, state->ip, + state->ip = unwind_recover_rethook(state, state->ip, (unsigned long *)(state->sp - sizeof(long))); if (state->full_regs) diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index b0b27bfcd7a2..d4f1874df7d0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -710,7 +710,7 @@ static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) struct ice_vsi *vsi = ring->vsi; u16 qid; - qid = ring->q_index - vsi->num_xdp_txq; + qid = ring->q_index - vsi->alloc_txq; if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) return NULL; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 88853a6ed931..dfbcaf08520e 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -608,6 +608,9 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) */ dma_rmb(); + if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use)) + break; + xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.pkt_len) & @@ -754,7 +757,7 @@ skip: next_dd = next_dd + tx_thresh; if (next_dd >= desc_cnt) next_dd = tx_thresh - 1; - } while (budget--); + } while (--budget); xdp_ring->next_dd = next_dd; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 312ff997c743..157168769fc2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -28,6 +28,7 @@ #include <linux/ftrace.h> #include <linux/refcount.h> #include <linux/freelist.h> +#include <linux/rethook.h> #include <asm/kprobes.h> #ifdef CONFIG_KPROBES @@ -149,13 +150,20 @@ struct kretprobe { int maxactive; int nmissed; size_t data_size; +#ifdef CONFIG_KRETPROBE_ON_RETHOOK + struct rethook *rh; +#else struct freelist_head freelist; struct kretprobe_holder *rph; +#endif }; #define KRETPROBE_MAX_DATA_SIZE 4096 struct kretprobe_instance { +#ifdef CONFIG_KRETPROBE_ON_RETHOOK + struct rethook_node node; +#else union { struct freelist_node freelist; struct rcu_head rcu; @@ -164,6 +172,7 @@ struct kretprobe_instance { struct kretprobe_holder *rph; kprobe_opcode_t *ret_addr; void *fp; +#endif char data[]; }; @@ -186,10 +195,24 @@ extern void kprobe_busy_begin(void); extern void kprobe_busy_end(void); #ifdef CONFIG_KRETPROBES -extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs); +/* Check whether @p is used for implementing a trampoline. */ extern int arch_trampoline_kprobe(struct kprobe *p); +#ifdef CONFIG_KRETPROBE_ON_RETHOOK +static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri) +{ + RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(), + "Kretprobe is accessed from instance under preemptive context"); + + return (struct kretprobe *)READ_ONCE(ri->node.rethook->data); +} +static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) +{ + return ri->node.ret_addr; +} +#else +extern void arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs); void arch_kretprobe_fixup_return(struct pt_regs *regs, kprobe_opcode_t *correct_ret_addr); @@ -232,6 +255,12 @@ static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance return READ_ONCE(ri->rph->rp); } +static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri) +{ + return (unsigned long)ri->ret_addr; +} +#endif /* CONFIG_KRETPROBE_ON_RETHOOK */ + #else /* !CONFIG_KRETPROBES */ static inline void arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) @@ -395,7 +424,11 @@ void unregister_kretprobe(struct kretprobe *rp); int register_kretprobes(struct kretprobe **rps, int num); void unregister_kretprobes(struct kretprobe **rps, int num); +#ifdef CONFIG_KRETPROBE_ON_RETHOOK +#define kprobe_flush_task(tk) do {} while (0) +#else void kprobe_flush_task(struct task_struct *tk); +#endif void kprobe_free_init_mem(void); @@ -509,6 +542,19 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr) #endif /* !CONFIG_OPTPROBES */ #ifdef CONFIG_KRETPROBES +#ifdef CONFIG_KRETPROBE_ON_RETHOOK +static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) +{ + return is_rethook_trampoline(addr); +} + +static nokprobe_inline +unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, + struct llist_node **cur) +{ + return rethook_find_ret_addr(tsk, (unsigned long)fp, cur); +} +#else static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) { return (void *)addr == kretprobe_trampoline_addr(); @@ -516,6 +562,7 @@ static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp, struct llist_node **cur); +#endif #else static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr) { diff --git a/kernel/Makefile b/kernel/Makefile index 56f4ee97f328..471d71935e90 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -108,6 +108,7 @@ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_TRACE_CLOCK) += trace/ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_TRACEPOINTS) += trace/ +obj-$(CONFIG_RETHOOK) += trace/ obj-$(CONFIG_IRQ_WORK) += irq_work.o obj-$(CONFIG_CPU_PM) += cpu_pm.o obj-$(CONFIG_BPF) += bpf/ diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 24788ce564a0..0918a39279f6 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5507,7 +5507,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, } args = (const struct btf_param *)(func + 1); nargs = btf_type_vlen(func); - if (nargs >= MAX_BPF_FUNC_ARGS) { + if (nargs > MAX_BPF_FUNC_ARGS) { bpf_log(log, "The function %s has %d arguments. Too many.\n", tname, nargs); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 185badc780b7..dbe57df2e199 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1237,6 +1237,27 @@ void kprobes_inc_nmissed_count(struct kprobe *p) } NOKPROBE_SYMBOL(kprobes_inc_nmissed_count); +static struct kprobe kprobe_busy = { + .addr = (void *) get_kprobe, +}; + +void kprobe_busy_begin(void) +{ + struct kprobe_ctlblk *kcb; + + preempt_disable(); + __this_cpu_write(current_kprobe, &kprobe_busy); + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; +} + +void kprobe_busy_end(void) +{ + __this_cpu_write(current_kprobe, NULL); + preempt_enable(); +} + +#if !defined(CONFIG_KRETPROBE_ON_RETHOOK) static void free_rp_inst_rcu(struct rcu_head *head) { struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu); @@ -1258,26 +1279,6 @@ static void recycle_rp_inst(struct kretprobe_instance *ri) } NOKPROBE_SYMBOL(recycle_rp_inst); -static struct kprobe kprobe_busy = { - .addr = (void *) get_kprobe, -}; - -void kprobe_busy_begin(void) -{ - struct kprobe_ctlblk *kcb; - - preempt_disable(); - __this_cpu_write(current_kprobe, &kprobe_busy); - kcb = get_kprobe_ctlblk(); - kcb->kprobe_status = KPROBE_HIT_ACTIVE; -} - -void kprobe_busy_end(void) -{ - __this_cpu_write(current_kprobe, NULL); - preempt_enable(); -} - /* * This function is called from delayed_put_task_struct() when a task is * dead and cleaned up to recycle any kretprobe instances associated with @@ -1327,6 +1328,7 @@ static inline void free_rp_inst(struct kretprobe *rp) rp->rph = NULL; } } +#endif /* !CONFIG_KRETPROBE_ON_RETHOOK */ /* Add the new probe to 'ap->list'. */ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) @@ -1925,6 +1927,7 @@ static struct notifier_block kprobe_exceptions_nb = { #ifdef CONFIG_KRETPROBES +#if !defined(CONFIG_KRETPROBE_ON_RETHOOK) /* This assumes the 'tsk' is the current task or the is not running. */ static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk, struct llist_node **cur) @@ -2087,6 +2090,57 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) return 0; } NOKPROBE_SYMBOL(pre_handler_kretprobe); +#else /* CONFIG_KRETPROBE_ON_RETHOOK */ +/* + * This kprobe pre_handler is registered with every kretprobe. When probe + * hits it will set up the return probe. + */ +static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) +{ + struct kretprobe *rp = container_of(p, struct kretprobe, kp); + struct kretprobe_instance *ri; + struct rethook_node *rhn; + + rhn = rethook_try_get(rp->rh); + if (!rhn) { + rp->nmissed++; + return 0; + } + + ri = container_of(rhn, struct kretprobe_instance, node); + + if (rp->entry_handler && rp->entry_handler(ri, regs)) + rethook_recycle(rhn); + else + rethook_hook(rhn, regs, kprobe_ftrace(p)); + + return 0; +} +NOKPROBE_SYMBOL(pre_handler_kretprobe); + +static void kretprobe_rethook_handler(struct rethook_node *rh, void *data, + struct pt_regs *regs) +{ + struct kretprobe *rp = (struct kretprobe *)data; + struct kretprobe_instance *ri; + struct kprobe_ctlblk *kcb; + + /* The data must NOT be null. This means rethook data structure is broken. */ + if (WARN_ON_ONCE(!data)) + return; + + __this_cpu_write(current_kprobe, &rp->kp); + kcb = get_kprobe_ctlblk(); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + ri = container_of(rh, struct kretprobe_instance, node); + rp->handler(ri, regs); + + __this_cpu_write(current_kprobe, NULL); +} +NOKPROBE_SYMBOL(kretprobe_rethook_handler); + +#endif /* !CONFIG_KRETPROBE_ON_RETHOOK */ /** * kprobe_on_func_entry() -- check whether given address is function entry @@ -2155,6 +2209,29 @@ int register_kretprobe(struct kretprobe *rp) rp->maxactive = num_possible_cpus(); #endif } +#ifdef CONFIG_KRETPROBE_ON_RETHOOK + rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler); + if (!rp->rh) + return -ENOMEM; + + for (i = 0; i < rp->maxactive; i++) { + inst = kzalloc(sizeof(struct kretprobe_instance) + + rp->data_size, GFP_KERNEL); + if (inst == NULL) { + rethook_free(rp->rh); + rp->rh = NULL; + return -ENOMEM; + } + rethook_add_node(rp->rh, &inst->node); + } + rp->nmissed = 0; + /* Establish function entry probe point */ + ret = register_kprobe(&rp->kp); + if (ret != 0) { + rethook_free(rp->rh); + rp->rh = NULL; + } +#else /* !CONFIG_KRETPROBE_ON_RETHOOK */ rp->freelist.head = NULL; rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL); if (!rp->rph) @@ -2179,6 +2256,7 @@ int register_kretprobe(struct kretprobe *rp) ret = register_kprobe(&rp->kp); if (ret != 0) free_rp_inst(rp); +#endif return ret; } EXPORT_SYMBOL_GPL(register_kretprobe); @@ -2217,7 +2295,11 @@ void unregister_kretprobes(struct kretprobe **rps, int num) for (i = 0; i < num; i++) { if (__unregister_kprobe_top(&rps[i]->kp) < 0) rps[i]->kp.addr = NULL; +#ifdef CONFIG_KRETPROBE_ON_RETHOOK + rethook_free(rps[i]->rh); +#else rps[i]->rph->rp = NULL; +#endif } mutex_unlock(&kprobe_mutex); @@ -2225,7 +2307,9 @@ void unregister_kretprobes(struct kretprobe **rps, int num) for (i = 0; i < num; i++) { if (rps[i]->kp.addr) { __unregister_kprobe_bottom(&rps[i]->kp); +#ifndef CONFIG_KRETPROBE_ON_RETHOOK free_rp_inst(rps[i]); +#endif } } } diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index 8b2dd5b9dcd1..89d9f994ebb0 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -150,15 +150,15 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler); for (i = 0; i < size; i++) { - struct rethook_node *node; + struct fprobe_rethook_node *node; - node = kzalloc(sizeof(struct fprobe_rethook_node), GFP_KERNEL); + node = kzalloc(sizeof(*node), GFP_KERNEL); if (!node) { rethook_free(fp->rethook); fp->rethook = NULL; return -ENOMEM; } - rethook_add_node(fp->rethook, node); + rethook_add_node(fp->rethook, &node->node); } return 0; } @@ -215,7 +215,7 @@ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter * correctly calculate the total number of filtered symbols * from both filter and notfilter. */ - hash = fp->ops.local_hash.filter_hash; + hash = rcu_access_pointer(fp->ops.local_hash.filter_hash); if (WARN_ON_ONCE(!hash)) goto out; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index b62fd785b599..47cebef78532 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1433,7 +1433,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, fbuffer.regs = regs; entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event); entry->func = (unsigned long)tk->rp.kp.addr; - entry->ret_ip = (unsigned long)ri->ret_addr; + entry->ret_ip = get_kretprobe_retaddr(ri); store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); trace_event_buffer_commit(&fbuffer); @@ -1628,7 +1628,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri, return; entry->func = (unsigned long)tk->rp.kp.addr; - entry->ret_ip = (unsigned long)ri->ret_addr; + entry->ret_ip = get_kretprobe_retaddr(ri); store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index b34fca6ada86..af040ffa14ff 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -591,9 +591,13 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max) u32 nb_entries1 = 0, nb_entries2; if (unlikely(pool->dma_need_sync)) { + struct xdp_buff *buff; + /* Slow path */ - *xdp = xp_alloc(pool); - return !!*xdp; + buff = xp_alloc(pool); + if (buff) + *xdp = buff; + return !!buff; } if (unlikely(pool->free_list_cnt)) { diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index c2f43a5d38e0..290998c82de1 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -207,7 +207,10 @@ static void probe_unprivileged_disabled(void) printf("bpf() syscall for unprivileged users is enabled\n"); break; case 1: - printf("bpf() syscall restricted to privileged users\n"); + printf("bpf() syscall restricted to privileged users (without recovery)\n"); + break; + case 2: + printf("bpf() syscall restricted to privileged users (admin can change)\n"); break; case -1: printf("Unable to retrieve required privileges for bpf() syscall\n"); diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 7ba7ff55d2ea..91af2850b505 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -477,7 +477,7 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name) codegen("\ \n\ __attribute__((unused)) static void \n\ - %1$s__assert(struct %1$s *s) \n\ + %1$s__assert(struct %1$s *s __attribute__((unused))) \n\ { \n\ #ifdef __cplusplus \n\ #define _Static_assert static_assert \n\ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7604e7d5438f..d14b10b85e51 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3009,8 +3009,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack=<new value> * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description @@ -4316,8 +4316,8 @@ union bpf_attr { * * # sysctl kernel.perf_event_max_stack=<new value> * Return - * A non-negative value equal to or less than *size* on success, - * or a negative error in case of failure. + * The non-negative copied *buf* length equal to or less than + * *size* on success, or a negative error in case of failure. * * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags) * Description diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c index e834a01de16a..16048978a1ef 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c +++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c @@ -29,11 +29,8 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size) */ struct get_stack_trace_t e; int i, num_stack; - static __u64 cnt; struct ksym *ks; - cnt++; - memset(&e, 0, sizeof(e)); memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e)); diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c index 36a707e7c7a7..6c62bfb8bb6f 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c @@ -39,16 +39,8 @@ struct { __type(value, stack_trace_t); } stack_amap SEC(".maps"); -/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */ -struct random_urandom_args { - unsigned long long pad; - int got_bits; - int pool_left; - int input_left; -}; - -SEC("tracepoint/random/urandom_read") -int oncpu(struct random_urandom_args *args) +SEC("kprobe/urandom_read") +int oncpu(struct pt_regs *args) { __u32 max_len = sizeof(struct bpf_stack_build_id) * PERF_MAX_STACK_DEPTH; diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c index baa3e3ecae82..aa294612e0a7 100644 --- a/tools/testing/selftests/bpf/test_lpm_map.c +++ b/tools/testing/selftests/bpf/test_lpm_map.c @@ -209,7 +209,8 @@ static void test_lpm_order(void) static void test_lpm_map(int keysize) { LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC); - size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups; + volatile size_t n_matches, n_matches_after_delete; + size_t i, j, n_nodes, n_lookups; struct tlpm_node *t, *list = NULL; struct bpf_lpm_trie_key *key; uint8_t *data, *value; |