summaryrefslogtreecommitdiffstats
path: root/kernel/kprobes.c
diff options
context:
space:
mode:
authorwuqiang.matt <wuqiang.matt@bytedance.com>2023-10-17 21:56:52 +0800
committerMasami Hiramatsu (Google) <mhiramat@kernel.org>2023-10-18 23:59:54 +0900
commit4bbd9345565933823f38a419df65661f12adbe5e (patch)
treedebaf6ea237ce00ce92be26ce3ef8d6999cbd24e /kernel/kprobes.c
parent92f90d3b0d5e384f218c8068138ed1b3afa025af (diff)
downloadlinux-stable-4bbd9345565933823f38a419df65661f12adbe5e.tar.gz
linux-stable-4bbd9345565933823f38a419df65661f12adbe5e.tar.bz2
linux-stable-4bbd9345565933823f38a419df65661f12adbe5e.zip
kprobes: kretprobe scalability improvement
kretprobe is using freelist to manage return-instances, but freelist, as LIFO queue based on singly linked list, scales badly and reduces the overall throughput of kretprobed routines, especially for high contention scenarios. Here's a typical throughput test of sys_prctl (counts in 10 seconds, measured with perf stat -a -I 10000 -e syscalls:sys_enter_prctl): OS: Debian 10 X86_64, Linux 6.5rc7 with freelist HW: XEON 8336C x 2, 64 cores/128 threads, DDR4 3200MT/s 1T 2T 4T 8T 16T 24T 24150045 29317964 15446741 12494489 18287272 17708768 32T 48T 64T 72T 96T 128T 16200682 13737658 11645677 11269858 10470118 9931051 This patch introduces objpool to replace freelist. objpool is a high performance queue, which can bring near-linear scalability to kretprobed routines. Tests of kretprobe throughput show the biggest ratio as 159x of original freelist. Here's the result: 1T 2T 4T 8T 16T native: 41186213 82336866 164250978 328662645 658810299 freelist: 24150045 29317964 15446741 12494489 18287272 objpool: 23926730 48010314 96125218 191782984 385091769 32T 48T 64T 96T 128T native: 1330338351 1969957941 2512291791 2615754135 2671040914 freelist: 16200682 13737658 11645677 10470118 9931051 objpool: 764481096 1147149781 1456220214 1502109662 1579015050 Testings on 96-core ARM64 output similarly, but with the biggest ratio up to 448x: OS: Debian 10 AARCH64, Linux 6.5rc7 HW: Kunpeng-920 96 cores/2 sockets/4 NUMA nodes, DDR4 2933 MT/s 1T 2T 4T 8T 16T native: . 30066096 63569843 126194076 257447289 505800181 freelist: 16152090 11064397 11124068 7215768 5663013 objpool: 13997541 28032100 55726624 110099926 221498787 24T 32T 48T 64T 96T native: 763305277 1015925192 1521075123 2033009392 3021013752 freelist: 5015810 4602893 3766792 3382478 2945292 objpool: 328192025 439439564 668534502 887401381 1319972072 Link: https://lore.kernel.org/all/20231017135654.82270-4-wuqiang.matt@bytedance.com/ Signed-off-by: wuqiang.matt <wuqiang.matt@bytedance.com> Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Diffstat (limited to 'kernel/kprobes.c')
-rw-r--r--kernel/kprobes.c91
1 files changed, 39 insertions, 52 deletions
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 0c6185aefaef..075a632e6c7c 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1877,13 +1877,27 @@ static struct notifier_block kprobe_exceptions_nb = {
#ifdef CONFIG_KRETPROBES
#if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
+
+/* callbacks for objpool of kretprobe instances */
+static int kretprobe_init_inst(void *nod, void *context)
+{
+ struct kretprobe_instance *ri = nod;
+
+ ri->rph = context;
+ return 0;
+}
+static int kretprobe_fini_pool(struct objpool_head *head, void *context)
+{
+ kfree(context);
+ return 0;
+}
+
static void free_rp_inst_rcu(struct rcu_head *head)
{
struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
+ struct kretprobe_holder *rph = ri->rph;
- if (refcount_dec_and_test(&ri->rph->ref))
- kfree(ri->rph);
- kfree(ri);
+ objpool_drop(ri, &rph->pool);
}
NOKPROBE_SYMBOL(free_rp_inst_rcu);
@@ -1892,7 +1906,7 @@ static void recycle_rp_inst(struct kretprobe_instance *ri)
struct kretprobe *rp = get_kretprobe(ri);
if (likely(rp))
- freelist_add(&ri->freelist, &rp->freelist);
+ objpool_push(ri, &rp->rph->pool);
else
call_rcu(&ri->rcu, free_rp_inst_rcu);
}
@@ -1929,23 +1943,12 @@ NOKPROBE_SYMBOL(kprobe_flush_task);
static inline void free_rp_inst(struct kretprobe *rp)
{
- struct kretprobe_instance *ri;
- struct freelist_node *node;
- int count = 0;
-
- node = rp->freelist.head;
- while (node) {
- ri = container_of(node, struct kretprobe_instance, freelist);
- node = node->next;
-
- kfree(ri);
- count++;
- }
+ struct kretprobe_holder *rph = rp->rph;
- if (refcount_sub_and_test(count, &rp->rph->ref)) {
- kfree(rp->rph);
- rp->rph = NULL;
- }
+ if (!rph)
+ return;
+ rp->rph = NULL;
+ objpool_fini(&rph->pool);
}
/* This assumes the 'tsk' is the current task or the is not running. */
@@ -2087,19 +2090,17 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
{
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
+ struct kretprobe_holder *rph = rp->rph;
struct kretprobe_instance *ri;
- struct freelist_node *fn;
- fn = freelist_try_get(&rp->freelist);
- if (!fn) {
+ ri = objpool_pop(&rph->pool);
+ if (!ri) {
rp->nmissed++;
return 0;
}
- ri = container_of(fn, struct kretprobe_instance, freelist);
-
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
- freelist_add(&ri->freelist, &rp->freelist);
+ objpool_push(ri, &rph->pool);
return 0;
}
@@ -2193,7 +2194,6 @@ int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long o
int register_kretprobe(struct kretprobe *rp)
{
int ret;
- struct kretprobe_instance *inst;
int i;
void *addr;
@@ -2227,19 +2227,12 @@ int register_kretprobe(struct kretprobe *rp)
rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
- rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler);
- if (!rp->rh)
- return -ENOMEM;
+ rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler,
+ sizeof(struct kretprobe_instance) +
+ rp->data_size, rp->maxactive);
+ if (IS_ERR(rp->rh))
+ return PTR_ERR(rp->rh);
- for (i = 0; i < rp->maxactive; i++) {
- inst = kzalloc(struct_size(inst, data, rp->data_size), GFP_KERNEL);
- if (inst == NULL) {
- rethook_free(rp->rh);
- rp->rh = NULL;
- return -ENOMEM;
- }
- rethook_add_node(rp->rh, &inst->node);
- }
rp->nmissed = 0;
/* Establish function entry probe point */
ret = register_kprobe(&rp->kp);
@@ -2248,24 +2241,18 @@ int register_kretprobe(struct kretprobe *rp)
rp->rh = NULL;
}
#else /* !CONFIG_KRETPROBE_ON_RETHOOK */
- rp->freelist.head = NULL;
rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
if (!rp->rph)
return -ENOMEM;
- rp->rph->rp = rp;
- for (i = 0; i < rp->maxactive; i++) {
- inst = kzalloc(struct_size(inst, data, rp->data_size), GFP_KERNEL);
- if (inst == NULL) {
- refcount_set(&rp->rph->ref, i);
- free_rp_inst(rp);
- return -ENOMEM;
- }
- inst->rph = rp->rph;
- freelist_add(&inst->freelist, &rp->freelist);
+ if (objpool_init(&rp->rph->pool, rp->maxactive, rp->data_size +
+ sizeof(struct kretprobe_instance), GFP_KERNEL,
+ rp->rph, kretprobe_init_inst, kretprobe_fini_pool)) {
+ kfree(rp->rph);
+ rp->rph = NULL;
+ return -ENOMEM;
}
- refcount_set(&rp->rph->ref, i);
-
+ rp->rph->rp = rp;
rp->nmissed = 0;
/* Establish function entry probe point */
ret = register_kprobe(&rp->kp);