From 42c269c88dc146982a54a8267f71abc99f12852a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 3 Mar 2017 16:15:39 -0500 Subject: ftrace: Allow for function tracing to record init functions on boot up Adding a hook into free_reserve_area() that informs ftrace that boot up init text is being free, lets ftrace safely remove those init functions from its records, which keeps ftrace from trying to modify text that no longer exists. Note, this still does not allow for tracing .init text of modules, as modules require different work for freeing its init code. Link: http://lkml.kernel.org/r/1488502497.7212.24.camel@linux.intel.com Cc: linux-mm@kvack.org Cc: Vlastimil Babka Cc: Mel Gorman Cc: Peter Zijlstra Requested-by: Todd Brandt Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b9691ee8f6c1..0556a202c055 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5262,6 +5262,50 @@ void ftrace_module_init(struct module *mod) } #endif /* CONFIG_MODULES */ +void ftrace_free_mem(void *start_ptr, void *end_ptr) +{ + unsigned long start = (unsigned long)start_ptr; + unsigned long end = (unsigned long)end_ptr; + struct ftrace_page **last_pg = &ftrace_pages_start; + struct ftrace_page *pg; + struct dyn_ftrace *rec; + struct dyn_ftrace key; + int order; + + key.ip = start; + key.flags = end; /* overload flags, as it is unsigned long */ + + mutex_lock(&ftrace_lock); + + for (pg = ftrace_pages_start; pg; last_pg = &pg->next, pg = *last_pg) { + if (end < pg->records[0].ip || + start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE)) + continue; + again: + rec = bsearch(&key, pg->records, pg->index, + sizeof(struct dyn_ftrace), + ftrace_cmp_recs); + if (!rec) + continue; + pg->index--; + if (!pg->index) { + *last_pg = pg->next; + order = get_count_order(pg->size / ENTRIES_PER_PAGE); + free_pages((unsigned long)pg->records, order); + kfree(pg); + pg = container_of(last_pg, struct ftrace_page, next); + if (!(*last_pg)) + ftrace_pages = pg; + continue; + } + memmove(rec, rec + 1, + (pg->index - (rec - pg->records)) * sizeof(*rec)); + /* More than one function may be in this block */ + goto again; + } + mutex_unlock(&ftrace_lock); +} + void __init ftrace_init(void) { extern unsigned long __start_mcount_loc[]; -- cgit v1.2.3 From c1bc5919f6741cc4b0c83e3058b3d65d76c943e3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Mar 2017 11:38:13 -0400 Subject: ftrace: Clean up __seq_open_private() return check The return status check of __seq_open_private() is rather strange: iter = __seq_open_private(); if (iter) { /* do stuff */ } return iter ? 0 : -ENOMEM; It makes much more sense to do the return of failure right away: iter = __seq_open_private(); if (!iter) return -ENOMEM; /* do stuff */ return 0; This clean up will make updates to this code a bit nicer. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0556a202c055..527c4d3e8d7f 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3355,12 +3355,13 @@ ftrace_avail_open(struct inode *inode, struct file *file) return -ENODEV; iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); - if (iter) { - iter->pg = ftrace_pages_start; - iter->ops = &global_ops; - } + if (!iter) + return -ENOMEM; - return iter ? 0 : -ENOMEM; + iter->pg = ftrace_pages_start; + iter->ops = &global_ops; + + return 0; } static int @@ -3369,13 +3370,14 @@ ftrace_enabled_open(struct inode *inode, struct file *file) struct ftrace_iterator *iter; iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter)); - if (iter) { - iter->pg = ftrace_pages_start; - iter->flags = FTRACE_ITER_ENABLED; - iter->ops = &global_ops; - } + if (!iter) + return -ENOMEM; - return iter ? 0 : -ENOMEM; + iter->pg = ftrace_pages_start; + iter->flags = FTRACE_ITER_ENABLED; + iter->ops = &global_ops; + + return 0; } /** -- cgit v1.2.3 From c20489dad156dd9919ebd854bbace46dbd2576a3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Mar 2017 14:55:49 -0400 Subject: ftrace: Assign iter->hash to filter or notrace hashes on seq read Instead of testing if the hash to use is the filter_hash or the notrace_hash at each iteration, do the test at open, and set the iter->hash to point to the corresponding filter or notrace hash. Then use that directly instead of testing which hash needs to be used each iteration. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 527c4d3e8d7f..3165b7f840e6 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3157,7 +3157,6 @@ static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct ftrace_ops *ops = iter->ops; struct dyn_ftrace *rec = NULL; if (unlikely(ftrace_disabled)) @@ -3181,11 +3180,8 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } } else { rec = &iter->pg->records[iter->idx++]; - if (((iter->flags & FTRACE_ITER_FILTER) && - !(ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip))) || - - ((iter->flags & FTRACE_ITER_NOTRACE) && - !ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip)) || + if (((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) && + !ftrace_lookup_ip(iter->hash, rec->ip)) || ((iter->flags & FTRACE_ITER_ENABLED) && !(rec->flags & FTRACE_FL_ENABLED))) { @@ -3213,7 +3209,6 @@ static void reset_iter_read(struct ftrace_iterator *iter) static void *t_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct ftrace_ops *ops = iter->ops; void *p = NULL; loff_t l; @@ -3233,10 +3228,8 @@ static void *t_start(struct seq_file *m, loff_t *pos) * off, we can short cut and just print out that all * functions are enabled. */ - if ((iter->flags & FTRACE_ITER_FILTER && - ftrace_hash_empty(ops->func_hash->filter_hash)) || - (iter->flags & FTRACE_ITER_NOTRACE && - ftrace_hash_empty(ops->func_hash->notrace_hash))) { + if ((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) && + ftrace_hash_empty(iter->hash)) { if (*pos > 0) return t_hash_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; @@ -3442,7 +3435,8 @@ ftrace_regex_open(struct ftrace_ops *ops, int flag, ret = -ENOMEM; goto out_unlock; } - } + } else + iter->hash = hash; if (file->f_mode & FMODE_READ) { iter->pg = ftrace_pages_start; @@ -4526,6 +4520,9 @@ int ftrace_regex_release(struct inode *inode, struct file *file) free_ftrace_hash_rcu(old_hash); } mutex_unlock(&ftrace_lock); + } else { + /* For read only, the hash is the ops hash */ + iter->hash = NULL; } mutex_unlock(&iter->ops->func_hash->regex_lock); -- cgit v1.2.3 From 2d71d98900b8a4bd58c3ca92e404d5e3701de874 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Mar 2017 21:40:49 -0400 Subject: ftrace: Return NULL at end of t_start() instead of calling t_hash_start() The loop in t_start() of calling t_next() will call t_hash_start() if the pos is beyond the functions and enters the hash items. There's no reason to check if p is NULL and call t_hash_start(), as that would be redundant. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3165b7f840e6..421530831ddd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3255,7 +3255,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) } if (!p) - return t_hash_start(m, pos); + return NULL; return iter; } -- cgit v1.2.3 From 43ff926a0c3a0cfd6aa313c3232420f009ab43e8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 30 Mar 2017 16:51:43 -0400 Subject: ftrace: Update func_pos in t_start() when all functions are enabled If all functions are enabled, there's a comment displayed in the file to denote that: # cd /sys/kernel/debug/tracing # cat set_ftrace_filter #### all functions enabled #### If a function trigger is set, those are displayed as well: # echo schedule:traceoff >> /debug/tracing/set_ftrace_filter # cat set_ftrace_filter #### all functions enabled #### schedule:traceoff:unlimited But if you read that file with dd, the output can change: # dd if=/debug/tracing/set_ftrace_filter bs=1 #### all functions enabled #### 32+0 records in 32+0 records out 32 bytes copied, 7.0237e-05 s, 456 kB/s This is because the "pos" variable is updated for the comment, but func_pos is not. "func_pos" is used by the triggers (or hashes) to know how many functions were printed and it bases its index from the pos - func_pos. func_pos should be 1 to count for the comment printed. But since it is not, t_hash_start() thinks that one trigger was already printed. The cat gets to t_hash_start() via t_next() and not t_start() which updates both pos and func_pos. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 421530831ddd..d4b18ce9ba88 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3230,6 +3230,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) */ if ((iter->flags & (FTRACE_ITER_FILTER | FTRACE_ITER_NOTRACE)) && ftrace_hash_empty(iter->hash)) { + iter->func_pos = 1; /* Account for the message */ if (*pos > 0) return t_hash_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; -- cgit v1.2.3 From 5bd84629a7a0e2462c28ca52e213ebe27fadfee8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 29 Mar 2017 22:45:18 -0400 Subject: ftrace: Create separate t_func_next() to simplify the function / hash logic I noticed that if I use dd to read the set_ftrace_filter file that the first hash command is repeated. # cd /sys/kernel/debug/tracing # echo schedule > set_ftrace_filter # echo do_IRQ >> set_ftrace_filter # echo schedule:traceoff >> set_ftrace_filter # echo do_IRQ:traceoff >> set_ftrace_filter # cat set_ftrace_filter schedule do_IRQ schedule:traceoff:unlimited do_IRQ:traceoff:unlimited # dd if=set_ftrace_filter bs=1 schedule do_IRQ schedule:traceoff:unlimited schedule:traceoff:unlimited do_IRQ:traceoff:unlimited 98+0 records in 98+0 records out 98 bytes copied, 0.00265011 s, 37.0 kB/s This is due to the way t_start() calls t_next() as well as the seq_file calls t_next() and the state is slightly different between the two. Namely, t_start() will call t_next() with a local "pos" variable. By separating out the function listing from t_next() into its own function, we can have better control of outputting the functions and the hash of triggers. This simplifies the code. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 45 +++++++++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 14 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d4b18ce9ba88..aff7a2c08387 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3154,22 +3154,12 @@ t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) } static void * -t_next(struct seq_file *m, void *v, loff_t *pos) +t_func_next(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; struct dyn_ftrace *rec = NULL; - if (unlikely(ftrace_disabled)) - return NULL; - - if (iter->flags & FTRACE_ITER_HASH) - return t_hash_next(m, pos); - (*pos)++; - iter->pos = iter->func_pos = *pos; - - if (iter->flags & FTRACE_ITER_PRINTALL) - return t_hash_start(m, pos); retry: if (iter->idx >= iter->pg->index) { @@ -3192,13 +3182,40 @@ t_next(struct seq_file *m, void *v, loff_t *pos) } if (!rec) - return t_hash_start(m, pos); + return NULL; + iter->pos = iter->func_pos = *pos; iter->func = rec; return iter; } +static void * +t_next(struct seq_file *m, void *v, loff_t *pos) +{ + struct ftrace_iterator *iter = m->private; + void *ret; + + if (unlikely(ftrace_disabled)) + return NULL; + + if (iter->flags & FTRACE_ITER_HASH) + return t_hash_next(m, pos); + + if (iter->flags & FTRACE_ITER_PRINTALL) { + /* next must increment pos, and t_hash_start does not */ + (*pos)++; + return t_hash_start(m, pos); + } + + ret = t_func_next(m, pos); + + if (!ret) + return t_hash_start(m, pos); + + return ret; +} + static void reset_iter_read(struct ftrace_iterator *iter) { iter->pos = 0; @@ -3250,13 +3267,13 @@ static void *t_start(struct seq_file *m, loff_t *pos) iter->pg = ftrace_pages_start; iter->idx = 0; for (l = 0; l <= *pos; ) { - p = t_next(m, p, &l); + p = t_func_next(m, &l); if (!p) break; } if (!p) - return NULL; + return t_hash_start(m, pos); return iter; } -- cgit v1.2.3 From b80f0f6c9ed3958ff4002b6135f43a1ef312a610 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Apr 2017 12:57:35 -0400 Subject: ftrace: Have init/main.c call ftrace directly to free init memory Relying on free_reserved_area() to call ftrace to free init memory proved to not be sufficient. The issue is that on x86, when debug_pagealloc is enabled, the init memory is not freed, but simply set as not present. Since ftrace was uninformed of this, starting function tracing still tries to update pages that are not present according to the page tables, causing ftrace to bug, as well as killing the kernel itself. Instead of relying on free_reserved_area(), have init/main.c call ftrace directly just before it frees the init memory. Then it needs to use __init_begin and __init_end to know where the init memory location is. Looking at all archs (and testing what I can), it appears that this should work for each of them. Reported-by: kernel test robot Reported-by: Fengguang Wu Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index aff7a2c08387..8efd9fe7aec0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -36,6 +36,7 @@ #include +#include #include #include "trace_output.h" @@ -5279,10 +5280,10 @@ void ftrace_module_init(struct module *mod) } #endif /* CONFIG_MODULES */ -void ftrace_free_mem(void *start_ptr, void *end_ptr) +void __init ftrace_free_init_mem(void) { - unsigned long start = (unsigned long)start_ptr; - unsigned long end = (unsigned long)end_ptr; + unsigned long start = (unsigned long)(&__init_begin); + unsigned long end = (unsigned long)(&__init_end); struct ftrace_page **last_pg = &ftrace_pages_start; struct ftrace_page *pg; struct dyn_ftrace *rec; -- cgit v1.2.3 From 0598e4f08e3da1fea2ee3b4765a44798147a8c62 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 6 Apr 2017 10:28:12 -0400 Subject: ftrace: Add use of synchronize_rcu_tasks() with dynamic trampolines The function tracer needs to be more careful than other subsystems when it comes to freeing data. Especially if that data is actually executable code. When a single function is traced, a trampoline can be dynamically allocated which is called to jump to the function trace callback. When the callback is no longer needed, the dynamic allocated trampoline needs to be freed. This is where the issues arise. The dynamically allocated trampoline must not be used again. As function tracing can trace all subsystems, including subsystems that are used to serialize aspects of freeing (namely RCU), it must take extra care when doing the freeing. Before synchronize_rcu_tasks() was around, there was no way for the function tracer to know that nothing was using the dynamically allocated trampoline when CONFIG_PREEMPT was enabled. That's because a task could be indefinitely preempted while sitting on the trampoline. Now with synchronize_rcu_tasks(), it will wait till all tasks have either voluntarily scheduled (not on the trampoline) or goes into userspace (not on the trampoline). Then it is safe to free the trampoline even with CONFIG_PREEMPT set. Acked-by: "Paul E. McKenney" Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8efd9fe7aec0..34f63e78d661 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2808,18 +2808,28 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command) * callers are done before leaving this function. * The same goes for freeing the per_cpu data of the per_cpu * ops. - * - * Again, normal synchronize_sched() is not good enough. - * We need to do a hard force of sched synchronization. - * This is because we use preempt_disable() to do RCU, but - * the function tracers can be called where RCU is not watching - * (like before user_exit()). We can not rely on the RCU - * infrastructure to do the synchronization, thus we must do it - * ourselves. */ if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_PER_CPU)) { + /* + * We need to do a hard force of sched synchronization. + * This is because we use preempt_disable() to do RCU, but + * the function tracers can be called where RCU is not watching + * (like before user_exit()). We can not rely on the RCU + * infrastructure to do the synchronization, thus we must do it + * ourselves. + */ schedule_on_each_cpu(ftrace_sync); + /* + * When the kernel is preeptive, tasks can be preempted + * while on a ftrace trampoline. Just scheduling a task on + * a CPU is not good enough to flush them. Calling + * synchornize_rcu_tasks() will wait for those tasks to + * execute and either schedule voluntarily or enter user space. + */ + if (IS_ENABLED(CONFIG_PREEMPT)) + synchronize_rcu_tasks(); + arch_ftrace_trampoline_free(ops); if (ops->flags & FTRACE_OPS_FL_PER_CPU) @@ -5366,22 +5376,6 @@ void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops) static void ftrace_update_trampoline(struct ftrace_ops *ops) { - -/* - * Currently there's no safe way to free a trampoline when the kernel - * is configured with PREEMPT. That is because a task could be preempted - * when it jumped to the trampoline, it may be preempted for a long time - * depending on the system load, and currently there's no way to know - * when it will be off the trampoline. If the trampoline is freed - * too early, when the task runs again, it will be executing on freed - * memory and crash. - */ -#ifdef CONFIG_PREEMPT - /* Currently, only non dynamic ops can have a trampoline */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - return; -#endif - arch_ftrace_update_trampoline(ops); } -- cgit v1.2.3 From acceb72e90624ec6c3f2fc62e72dab892cd075da Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 14 Apr 2017 17:45:45 -0400 Subject: ftrace: Fix removing of second function probe When two function probes are added to set_ftrace_filter, and then one of them is removed, the update to the function locations is not performed, and the record keeping of the function states are corrupted, and causes an ftrace_bug() to occur. This is easily reproducable by adding two probes, removing one, and then adding it back again. # cd /sys/kernel/debug/tracing # echo schedule:traceoff > set_ftrace_filter # echo do_IRQ:traceoff > set_ftrace_filter # echo \!do_IRQ:traceoff > /debug/tracing/set_ftrace_filter # echo do_IRQ:traceoff > set_ftrace_filter Causes: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 1098 at kernel/trace/ftrace.c:2369 ftrace_get_addr_curr+0x143/0x220 Modules linked in: [...] CPU: 2 PID: 1098 Comm: bash Not tainted 4.10.0-test+ #405 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v02.05 05/07/2012 Call Trace: dump_stack+0x68/0x9f __warn+0x111/0x130 ? trace_irq_work_interrupt+0xa0/0xa0 warn_slowpath_null+0x1d/0x20 ftrace_get_addr_curr+0x143/0x220 ? __fentry__+0x10/0x10 ftrace_replace_code+0xe3/0x4f0 ? ftrace_int3_handler+0x90/0x90 ? printk+0x99/0xb5 ? 0xffffffff81000000 ftrace_modify_all_code+0x97/0x110 arch_ftrace_update_code+0x10/0x20 ftrace_run_update_code+0x1c/0x60 ftrace_run_modify_code.isra.48.constprop.62+0x8e/0xd0 register_ftrace_function_probe+0x4b6/0x590 ? ftrace_startup+0x310/0x310 ? debug_lockdep_rcu_enabled.part.4+0x1a/0x30 ? update_stack_state+0x88/0x110 ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320 ? preempt_count_sub+0x18/0xd0 ? mutex_lock_nested+0x104/0x800 ? ftrace_regex_write.isra.43.part.44+0x1d3/0x320 ? __unwind_start+0x1c0/0x1c0 ? _mutex_lock_nest_lock+0x800/0x800 ftrace_trace_probe_callback.isra.3+0xc0/0x130 ? func_set_flag+0xe0/0xe0 ? __lock_acquire+0x642/0x1790 ? __might_fault+0x1e/0x20 ? trace_get_user+0x398/0x470 ? strcmp+0x35/0x60 ftrace_trace_onoff_callback+0x48/0x70 ftrace_regex_write.isra.43.part.44+0x251/0x320 ? match_records+0x420/0x420 ftrace_filter_write+0x2b/0x30 __vfs_write+0xd7/0x330 ? do_loop_readv_writev+0x120/0x120 ? locks_remove_posix+0x90/0x2f0 ? do_lock_file_wait+0x160/0x160 ? __lock_is_held+0x93/0x100 ? rcu_read_lock_sched_held+0x5c/0xb0 ? preempt_count_sub+0x18/0xd0 ? __sb_start_write+0x10a/0x230 ? vfs_write+0x222/0x240 vfs_write+0xef/0x240 SyS_write+0xab/0x130 ? SyS_read+0x130/0x130 ? trace_hardirqs_on_caller+0x182/0x280 ? trace_hardirqs_on_thunk+0x1a/0x1c entry_SYSCALL_64_fastpath+0x18/0xad RIP: 0033:0x7fe61c157c30 RSP: 002b:00007ffe87890258 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: ffffffff8114a410 RCX: 00007fe61c157c30 RDX: 0000000000000010 RSI: 000055814798f5e0 RDI: 0000000000000001 RBP: ffff8800c9027f98 R08: 00007fe61c422740 R09: 00007fe61ca53700 R10: 0000000000000073 R11: 0000000000000246 R12: 0000558147a36400 R13: 00007ffe8788f160 R14: 0000000000000024 R15: 00007ffe8788f15c ? trace_hardirqs_off_caller+0xc0/0x110 ---[ end trace 99fa09b3d9869c2c ]--- Bad trampoline accounting at: ffffffff81cc3b00 (do_IRQ+0x0/0x150) Cc: stable@vger.kernel.org Fixes: 59df055f1991 ("ftrace: trace different functions with a different tracer") Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 34f63e78d661..4b6459a57fbc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3780,23 +3780,24 @@ static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash) ftrace_probe_registered = 1; } -static void __disable_ftrace_function_probe(void) +static bool __disable_ftrace_function_probe(void) { int i; if (!ftrace_probe_registered) - return; + return false; for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { struct hlist_head *hhd = &ftrace_func_hash[i]; if (hhd->first) - return; + return false; } /* no more funcs left */ ftrace_shutdown(&trace_probe_ops, 0); ftrace_probe_registered = 0; + return true; } @@ -3926,6 +3927,7 @@ static void __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data, int flags) { + struct ftrace_ops_hash old_hash_ops; struct ftrace_func_entry *rec_entry; struct ftrace_func_probe *entry; struct ftrace_func_probe *p; @@ -3937,6 +3939,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, struct hlist_node *tmp; char str[KSYM_SYMBOL_LEN]; int i, ret; + bool disabled; if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) func_g.search = NULL; @@ -3955,6 +3958,10 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, mutex_lock(&trace_probe_ops.func_hash->regex_lock); + old_hash_ops.filter_hash = old_hash; + /* Probes only have filters */ + old_hash_ops.notrace_hash = NULL; + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) /* Hmm, should report this somehow */ @@ -3992,12 +3999,17 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, } } mutex_lock(&ftrace_lock); - __disable_ftrace_function_probe(); + disabled = __disable_ftrace_function_probe(); /* * Remove after the disable is called. Otherwise, if the last * probe is removed, a null hash means *all enabled*. */ ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + + /* still need to update the function call sites */ + if (ftrace_enabled && !disabled) + ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, + &old_hash_ops); synchronize_sched(); if (!ret) free_ftrace_hash_rcu(old_hash); -- cgit v1.2.3 From fcdc71257923263d042236eaf62bae5e033757b5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 17 Apr 2017 10:22:29 -0400 Subject: ftrace: Fix indexing of t_hash_start() from t_next() t_hash_start() does not increment *pos, where as t_next() must. But when t_next() does increment *pos, it must still pass in the original *pos to t_hash_start() otherwise it will skip the first instance: # cd /sys/kernel/debug/tracing # echo schedule:traceoff > set_ftrace_filter # echo do_IRQ:traceoff > set_ftrace_filter # echo call_rcu > set_ftrace_filter # cat set_ftrace_filter call_rcu schedule:traceoff:unlimited do_IRQ:traceoff:unlimited The above called t_hash_start() from t_start() as there was only one function (call_rcu), but if we add another function: # echo xfrm_policy_destroy_rcu >> set_ftrace_filter # cat set_ftrace_filter call_rcu xfrm_policy_destroy_rcu do_IRQ:traceoff:unlimited The "schedule:traceoff" disappears. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4b6459a57fbc..b21a3e61ac74 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3205,6 +3205,7 @@ static void * t_next(struct seq_file *m, void *v, loff_t *pos) { struct ftrace_iterator *iter = m->private; + loff_t l = *pos; /* t_hash_start() must use original pos */ void *ret; if (unlikely(ftrace_disabled)) @@ -3216,13 +3217,13 @@ t_next(struct seq_file *m, void *v, loff_t *pos) if (iter->flags & FTRACE_ITER_PRINTALL) { /* next must increment pos, and t_hash_start does not */ (*pos)++; - return t_hash_start(m, pos); + return t_hash_start(m, &l); } ret = t_func_next(m, pos); if (!ret) - return t_hash_start(m, pos); + return t_hash_start(m, &l); return ret; } -- cgit v1.2.3 From 1e10486ffee0a5b060c58b9c8c712422f7b88b3b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 17 Apr 2017 11:44:28 +0900 Subject: ftrace: Add 'function-fork' trace option The function-fork option is same as event-fork that it tracks task fork/exit and set the pid filter properly. This can be useful if user wants to trace selected tasks including their children only. Link: http://lkml.kernel.org/r/20170417024430.21194-3-namhyung@kernel.org Signed-off-by: Namhyung Kim Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b21a3e61ac74..b5ce7ea67e02 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5600,6 +5600,43 @@ ftrace_filter_pid_sched_switch_probe(void *data, bool preempt, trace_ignore_this_task(pid_list, next)); } +static void +ftrace_pid_follow_sched_process_fork(void *data, + struct task_struct *self, + struct task_struct *task) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = data; + + pid_list = rcu_dereference_sched(tr->function_pids); + trace_filter_add_remove_task(pid_list, self, task); +} + +static void +ftrace_pid_follow_sched_process_exit(void *data, struct task_struct *task) +{ + struct trace_pid_list *pid_list; + struct trace_array *tr = data; + + pid_list = rcu_dereference_sched(tr->function_pids); + trace_filter_add_remove_task(pid_list, NULL, task); +} + +void ftrace_pid_follow_fork(struct trace_array *tr, bool enable) +{ + if (enable) { + register_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, + tr); + register_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + tr); + } else { + unregister_trace_sched_process_fork(ftrace_pid_follow_sched_process_fork, + tr); + unregister_trace_sched_process_exit(ftrace_pid_follow_sched_process_exit, + tr); + } +} + static void clear_ftrace_pids(struct trace_array *tr) { struct trace_pid_list *pid_list; -- cgit v1.2.3 From e51a9896794bbb819d89b803e5a8446199034853 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Apr 2017 17:43:49 -0400 Subject: ftrace: Remove unused "flags" field from struct ftrace_func_probe Nothing uses "flags" in the ftrace_func_probe descriptor. Remove it. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b5ce7ea67e02..b6dc29583c86 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1101,7 +1101,6 @@ static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; struct ftrace_func_probe { struct hlist_node node; struct ftrace_probe_ops *ops; - unsigned long flags; unsigned long ip; void *data; struct list_head free_list; -- cgit v1.2.3 From bca6c8d0480a8aa5c86f8f416db96c71f6b79e29 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Apr 2017 18:18:47 -0400 Subject: ftrace: Pass probe ops to probe function In preparation to cleaning up the probe function registration code, the "data" parameter will eventually be removed from the probe->func() call. Instead it will receive its own "ops" function, in which it can set up its own data that it needs to map. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b6dc29583c86..d8873079bed4 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3739,7 +3739,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); hlist_for_each_entry_rcu_notrace(entry, hhd, node) { if (entry->ip == ip) - entry->ops->func(ip, parent_ip, &entry->data); + entry->ops->func(ip, parent_ip, entry->ops, &entry->data); } preempt_enable_notrace(); } -- cgit v1.2.3 From 41794f190780c28784fa62b22001691e5876d149 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Apr 2017 20:58:35 -0400 Subject: ftrace: Added ftrace_func_mapper for function probe triggers In order to move the ops to the function probes directly, they need a way to map function ips to their own data without depending on the infrastructure of the function probes, as the data field will be going away. New helper functions are added that are based on the ftrace_hash code. ftrace_func_mapper functions are there to let the probes map ips to their data. These can be allocated by the probe ops, and referenced in the function callbacks. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d8873079bed4..ac47d1845fdb 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3808,6 +3808,147 @@ static void ftrace_free_entry(struct ftrace_func_probe *entry) kfree(entry); } +struct ftrace_func_map { + struct ftrace_func_entry entry; + void *data; +}; + +struct ftrace_func_mapper { + struct ftrace_hash hash; +}; + +/** + * allocate_ftrace_func_mapper - allocate a new ftrace_func_mapper + * + * Returns a ftrace_func_mapper descriptor that can be used to map ips to data. + */ +struct ftrace_func_mapper *allocate_ftrace_func_mapper(void) +{ + struct ftrace_hash *hash; + + /* + * The mapper is simply a ftrace_hash, but since the entries + * in the hash are not ftrace_func_entry type, we define it + * as a separate structure. + */ + hash = alloc_ftrace_hash(FTRACE_HASH_DEFAULT_BITS); + return (struct ftrace_func_mapper *)hash; +} + +/** + * ftrace_func_mapper_find_ip - Find some data mapped to an ip + * @mapper: The mapper that has the ip maps + * @ip: the instruction pointer to find the data for + * + * Returns the data mapped to @ip if found otherwise NULL. The return + * is actually the address of the mapper data pointer. The address is + * returned for use cases where the data is no bigger than a long, and + * the user can use the data pointer as its data instead of having to + * allocate more memory for the reference. + */ +void **ftrace_func_mapper_find_ip(struct ftrace_func_mapper *mapper, + unsigned long ip) +{ + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; + + entry = ftrace_lookup_ip(&mapper->hash, ip); + if (!entry) + return NULL; + + map = (struct ftrace_func_map *)entry; + return &map->data; +} + +/** + * ftrace_func_mapper_add_ip - Map some data to an ip + * @mapper: The mapper that has the ip maps + * @ip: The instruction pointer address to map @data to + * @data: The data to map to @ip + * + * Returns 0 on succes otherwise an error. + */ +int ftrace_func_mapper_add_ip(struct ftrace_func_mapper *mapper, + unsigned long ip, void *data) +{ + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; + + entry = ftrace_lookup_ip(&mapper->hash, ip); + if (entry) + return -EBUSY; + + map = kmalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + + map->entry.ip = ip; + map->data = data; + + __add_hash_entry(&mapper->hash, &map->entry); + + return 0; +} + +/** + * ftrace_func_mapper_remove_ip - Remove an ip from the mapping + * @mapper: The mapper that has the ip maps + * @ip: The instruction pointer address to remove the data from + * + * Returns the data if it is found, otherwise NULL. + * Note, if the data pointer is used as the data itself, (see + * ftrace_func_mapper_find_ip(), then the return value may be meaningless, + * if the data pointer was set to zero. + */ +void *ftrace_func_mapper_remove_ip(struct ftrace_func_mapper *mapper, + unsigned long ip) +{ + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; + void *data; + + entry = ftrace_lookup_ip(&mapper->hash, ip); + if (!entry) + return NULL; + + map = (struct ftrace_func_map *)entry; + data = map->data; + + remove_hash_entry(&mapper->hash, entry); + kfree(entry); + + return data; +} + +/** + * free_ftrace_func_mapper - free a mapping of ips and data + * @mapper: The mapper that has the ip maps + * @free_func: A function to be called on each data item. + * + * This is used to free the function mapper. The @free_func is optional + * and can be used if the data needs to be freed as well. + */ +void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, + ftrace_mapper_func free_func) +{ + struct ftrace_func_entry *entry; + struct ftrace_func_map *map; + struct hlist_head *hhd; + int size = 1 << mapper->hash.size_bits; + int i; + + if (free_func && mapper->hash.count) { + for (i = 0; i < size; i++) { + hhd = &mapper->hash.buckets[i]; + hlist_for_each_entry(entry, hhd, hlist) { + map = (struct ftrace_func_map *)entry; + free_func(map); + } + } + } + free_ftrace_hash(&mapper->hash); +} + int register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data) -- cgit v1.2.3 From 0fe7e7e3f8391d4c9260b41cdb15c7917cb2e5b3 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 09:56:26 -0400 Subject: ftrace: Remove unused unregister_ftrace_function_probe() function Nothing calls unregister_ftrace_function_probe(). Remove it as well as the flag PROBE_TEST_DATA, as this function was the only one to set it. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ac47d1845fdb..5448089e6028 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4061,12 +4061,11 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, enum { PROBE_TEST_FUNC = 1, - PROBE_TEST_DATA = 2 }; static void __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data, int flags) + int flags) { struct ftrace_ops_hash old_hash_ops; struct ftrace_func_entry *rec_entry; @@ -4119,9 +4118,6 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) continue; - if ((flags & PROBE_TEST_DATA) && entry->data != data) - continue; - /* do this last, since it is the most expensive */ if (func_g.search) { kallsyms_lookup(entry->ip, NULL, NULL, @@ -4166,23 +4162,15 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, free_ftrace_hash(hash); } -void -unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data) -{ - __unregister_ftrace_function_probe(glob, ops, data, - PROBE_TEST_FUNC | PROBE_TEST_DATA); -} - void unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) { - __unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC); + __unregister_ftrace_function_probe(glob, ops, PROBE_TEST_FUNC); } void unregister_ftrace_function_probe_all(char *glob) { - __unregister_ftrace_function_probe(glob, NULL, NULL, 0); + __unregister_ftrace_function_probe(glob, NULL, 0); } static LIST_HEAD(ftrace_commands); -- cgit v1.2.3 From 78f78e07d51e440d01e6b1aef172883821193771 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 13:39:59 -0400 Subject: ftrace: Remove unused unregister_ftrace_function_probe_all() function There are no users of unregister_ftrace_function_probe_all(). The only probe function that is used is unregister_ftrace_function_probe_func(). Rename the internal static function __unregister_ftrace_function_probe() to unregister_ftrace_function_probe_func() and make it global. Also remove the PROBE_TEST_FUNC as it would be always set. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5448089e6028..1c31c74d0819 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4059,13 +4059,8 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, return count; } -enum { - PROBE_TEST_FUNC = 1, -}; - -static void -__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - int flags) +void +unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) { struct ftrace_ops_hash old_hash_ops; struct ftrace_func_entry *rec_entry; @@ -4115,7 +4110,7 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, hlist_for_each_entry_safe(entry, tmp, hhd, node) { /* break up if statements for readability */ - if ((flags & PROBE_TEST_FUNC) && entry->ops != ops) + if (entry->ops != ops) continue; /* do this last, since it is the most expensive */ @@ -4162,17 +4157,6 @@ __unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, free_ftrace_hash(hash); } -void -unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) -{ - __unregister_ftrace_function_probe(glob, ops, PROBE_TEST_FUNC); -} - -void unregister_ftrace_function_probe_all(char *glob) -{ - __unregister_ftrace_function_probe(glob, NULL, 0); -} - static LIST_HEAD(ftrace_commands); static DEFINE_MUTEX(ftrace_cmd_mutex); -- cgit v1.2.3 From 02b77e2afb492420cabb117f3bbd7452d4b4ed06 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 10:04:26 -0400 Subject: ftrace: Remove printing of data in showing of a function probe None of the probe users uses the data field anymore of the entry. They all have their own print() function. Remove showing the data field in the generic function as the data field will be going away. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 1c31c74d0819..15f910a03822 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3154,11 +3154,7 @@ t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) if (rec->ops->print) return rec->ops->print(m, rec->ip, rec->ops, rec->data); - seq_printf(m, "%ps:%ps", (void *)rec->ip, (void *)rec->ops->func); - - if (rec->data) - seq_printf(m, ":%p", rec->data); - seq_putc(m, '\n'); + seq_printf(m, "%ps:%ps\n", (void *)rec->ip, (void *)rec->ops->func); return 0; } -- cgit v1.2.3 From 1a48df0041c2756194e700affb0e2ff084092e28 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 10:27:51 -0400 Subject: ftrace: Remove data field from ftrace_func_probe structure No users of the function probes uses the data field anymore. Remove it, and change the init function to take a void *data parameter instead of a void **data, because the init will just get the data that the registering function was received, and there's no state after it is called. The other functions for ftrace_probe_ops still take the data parameter, but it will currently only be passed NULL. It will stay as a parameter for future data to be passed to these functions. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 15f910a03822..f7fcab8f3aa1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1102,7 +1102,6 @@ struct ftrace_func_probe { struct hlist_node node; struct ftrace_probe_ops *ops; unsigned long ip; - void *data; struct list_head free_list; }; @@ -3152,7 +3151,7 @@ t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) return -EIO; if (rec->ops->print) - return rec->ops->print(m, rec->ip, rec->ops, rec->data); + return rec->ops->print(m, rec->ip, rec->ops, NULL); seq_printf(m, "%ps:%ps\n", (void *)rec->ip, (void *)rec->ops->func); @@ -3735,7 +3734,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, preempt_disable_notrace(); hlist_for_each_entry_rcu_notrace(entry, hhd, node) { if (entry->ip == ip) - entry->ops->func(ip, parent_ip, entry->ops, &entry->data); + entry->ops->func(ip, parent_ip, entry->ops, NULL); } preempt_enable_notrace(); } @@ -3800,7 +3799,7 @@ static bool __disable_ftrace_function_probe(void) static void ftrace_free_entry(struct ftrace_func_probe *entry) { if (entry->ops->free) - entry->ops->free(entry->ops, entry->ip, &entry->data); + entry->ops->free(entry->ops, entry->ip, NULL); kfree(entry); } @@ -4007,15 +4006,13 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, count++; - entry->data = data; - /* * The caller might want to do something special * for each function we find. We call the callback * to give the caller an opportunity to do so. */ if (ops->init) { - if (ops->init(ops, rec->ip, &entry->data) < 0) { + if (ops->init(ops, rec->ip, data) < 0) { /* caller does not like this func */ kfree(entry); continue; -- cgit v1.2.3 From e16b35ddb840788e023fac2482b61c0b6bf98057 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 14:46:56 -0400 Subject: ftrace: Add helper function ftrace_hash_move_and_update_ops() The processes of updating a ops filter_hash is a bit complex, and requires setting up an old hash to perform the update. This is done exactly the same in two locations for the same reasons. Create a helper function that does it in one place. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 105 +++++++++++++++++++++++++------------------------- 1 file changed, 53 insertions(+), 52 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f7fcab8f3aa1..5c8d8eea9e7c 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3674,6 +3674,56 @@ ftrace_match_records(struct ftrace_hash *hash, char *buff, int len) return match_records(hash, buff, len, NULL); } +static void ftrace_ops_update_code(struct ftrace_ops *ops, + struct ftrace_ops_hash *old_hash) +{ + struct ftrace_ops *op; + + if (!ftrace_enabled) + return; + + if (ops->flags & FTRACE_OPS_FL_ENABLED) { + ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); + return; + } + + /* + * If this is the shared global_ops filter, then we need to + * check if there is another ops that shares it, is enabled. + * If so, we still need to run the modify code. + */ + if (ops->func_hash != &global_ops.local_hash) + return; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->func_hash == &global_ops.local_hash && + op->flags & FTRACE_OPS_FL_ENABLED) { + ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash); + /* Only need to do this once */ + return; + } + } while_for_each_ftrace_op(op); +} + +static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, + struct ftrace_hash **orig_hash, + struct ftrace_hash *hash, + int enable) +{ + struct ftrace_ops_hash old_hash_ops; + struct ftrace_hash *old_hash; + int ret; + + old_hash = *orig_hash; + old_hash_ops.filter_hash = ops->func_hash->filter_hash; + old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; + ret = ftrace_hash_move(ops, enable, orig_hash, hash); + if (!ret) { + ftrace_ops_update_code(ops, &old_hash_ops); + free_ftrace_hash_rcu(old_hash); + } + return ret; +} /* * We register the module command as a template to show others how @@ -4306,44 +4356,11 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) return add_hash_entry(hash, ip); } -static void ftrace_ops_update_code(struct ftrace_ops *ops, - struct ftrace_ops_hash *old_hash) -{ - struct ftrace_ops *op; - - if (!ftrace_enabled) - return; - - if (ops->flags & FTRACE_OPS_FL_ENABLED) { - ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS, old_hash); - return; - } - - /* - * If this is the shared global_ops filter, then we need to - * check if there is another ops that shares it, is enabled. - * If so, we still need to run the modify code. - */ - if (ops->func_hash != &global_ops.local_hash) - return; - - do_for_each_ftrace_op(op, ftrace_ops_list) { - if (op->func_hash == &global_ops.local_hash && - op->flags & FTRACE_OPS_FL_ENABLED) { - ftrace_run_modify_code(op, FTRACE_UPDATE_CALLS, old_hash); - /* Only need to do this once */ - return; - } - } while_for_each_ftrace_op(op); -} - static int ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, unsigned long ip, int remove, int reset, int enable) { struct ftrace_hash **orig_hash; - struct ftrace_ops_hash old_hash_ops; - struct ftrace_hash *old_hash; struct ftrace_hash *hash; int ret; @@ -4378,14 +4395,7 @@ ftrace_set_hash(struct ftrace_ops *ops, unsigned char *buf, int len, } mutex_lock(&ftrace_lock); - old_hash = *orig_hash; - old_hash_ops.filter_hash = ops->func_hash->filter_hash; - old_hash_ops.notrace_hash = ops->func_hash->notrace_hash; - ret = ftrace_hash_move(ops, enable, orig_hash, hash); - if (!ret) { - ftrace_ops_update_code(ops, &old_hash_ops); - free_ftrace_hash_rcu(old_hash); - } + ret = ftrace_hash_move_and_update_ops(ops, orig_hash, hash, enable); mutex_unlock(&ftrace_lock); out_regex_unlock: @@ -4624,10 +4634,8 @@ static void __init set_ftrace_early_filters(void) int ftrace_regex_release(struct inode *inode, struct file *file) { struct seq_file *m = (struct seq_file *)file->private_data; - struct ftrace_ops_hash old_hash_ops; struct ftrace_iterator *iter; struct ftrace_hash **orig_hash; - struct ftrace_hash *old_hash; struct trace_parser *parser; int filter_hash; int ret; @@ -4657,15 +4665,8 @@ int ftrace_regex_release(struct inode *inode, struct file *file) orig_hash = &iter->ops->func_hash->notrace_hash; mutex_lock(&ftrace_lock); - old_hash = *orig_hash; - old_hash_ops.filter_hash = iter->ops->func_hash->filter_hash; - old_hash_ops.notrace_hash = iter->ops->func_hash->notrace_hash; - ret = ftrace_hash_move(iter->ops, filter_hash, - orig_hash, iter->hash); - if (!ret) { - ftrace_ops_update_code(iter->ops, &old_hash_ops); - free_ftrace_hash_rcu(old_hash); - } + ret = ftrace_hash_move_and_update_ops(iter->ops, orig_hash, + iter->hash, filter_hash); mutex_unlock(&ftrace_lock); } else { /* For read only, the hash is the ops hash */ -- cgit v1.2.3 From d3d532d798c5720055ab02a10bf7829a33c3645a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 16:44:43 -0400 Subject: ftrace: Have unregister_ftrace_function_probe_func() return a value Currently unregister_ftrace_function_probe_func() is a void function. It does not give any feedback if an error occurred or no item was found to remove and nothing was done. Change it to return status and success if it removed something. Also update the callers to return that feedback to the user. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 5c8d8eea9e7c..cbae7fb1be15 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4102,7 +4102,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, return count; } -void +int unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) { struct ftrace_ops_hash old_hash_ops; @@ -4131,7 +4131,7 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) /* we do not support '!' for function probes */ if (WARN_ON(not)) - return; + return -EINVAL; } mutex_lock(&trace_probe_ops.func_hash->regex_lock); @@ -4140,9 +4140,9 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) /* Probes only have filters */ old_hash_ops.notrace_hash = NULL; + ret = -ENOMEM; hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); if (!hash) - /* Hmm, should report this somehow */ goto out_unlock; INIT_LIST_HEAD(&free_list); @@ -4173,6 +4173,13 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) list_add(&entry->free_list, &free_list); } } + + /* Nothing found? */ + if (list_empty(&free_list)) { + ret = -EINVAL; + goto out_unlock; + } + mutex_lock(&ftrace_lock); disabled = __disable_ftrace_function_probe(); /* @@ -4198,6 +4205,7 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) out_unlock: mutex_unlock(&trace_probe_ops.func_hash->regex_lock); free_ftrace_hash(hash); + return ret; } static LIST_HEAD(ftrace_commands); -- cgit v1.2.3 From 1ec3a81a0cf4236b644282794932c4eda9c1714a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 18:16:29 -0400 Subject: ftrace: Have each function probe use its own ftrace_ops Have the function probes have their own ftrace_ops, and remove the trace_probe_ops. This simplifies some of the ftrace infrastructure code. Individual entries for each function is still allocated for the use of the output for set_ftrace_filter, but they will be removed soon too. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 246 ++++++++++++++++++++------------------------------ 1 file changed, 98 insertions(+), 148 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cbae7fb1be15..cf6b7263199a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3789,63 +3789,6 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, preempt_enable_notrace(); } -static struct ftrace_ops trace_probe_ops __read_mostly = -{ - .func = function_trace_probe_call, - .flags = FTRACE_OPS_FL_INITIALIZED, - INIT_OPS_HASH(trace_probe_ops) -}; - -static int ftrace_probe_registered; - -static void __enable_ftrace_function_probe(struct ftrace_ops_hash *old_hash) -{ - int ret; - int i; - - if (ftrace_probe_registered) { - /* still need to update the function call sites */ - if (ftrace_enabled) - ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, - old_hash); - return; - } - - for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { - struct hlist_head *hhd = &ftrace_func_hash[i]; - if (hhd->first) - break; - } - /* Nothing registered? */ - if (i == FTRACE_FUNC_HASHSIZE) - return; - - ret = ftrace_startup(&trace_probe_ops, 0); - - ftrace_probe_registered = 1; -} - -static bool __disable_ftrace_function_probe(void) -{ - int i; - - if (!ftrace_probe_registered) - return false; - - for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { - struct hlist_head *hhd = &ftrace_func_hash[i]; - if (hhd->first) - return false; - } - - /* no more funcs left */ - ftrace_shutdown(&trace_probe_ops, 0); - - ftrace_probe_registered = 0; - return true; -} - - static void ftrace_free_entry(struct ftrace_func_probe *entry) { if (entry->ops->free) @@ -3996,110 +3939,110 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, int register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data) + void *data) { - struct ftrace_ops_hash old_hash_ops; - struct ftrace_func_probe *entry; - struct ftrace_glob func_g; - struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; - struct ftrace_hash *old_hash = *orig_hash; + struct ftrace_func_entry *entry; + struct ftrace_func_probe *probe; + struct ftrace_hash **orig_hash; + struct ftrace_hash *old_hash; struct ftrace_hash *hash; - struct ftrace_page *pg; - struct dyn_ftrace *rec; - int not; + struct hlist_head hl; + struct hlist_node *n; unsigned long key; int count = 0; + int size; int ret; + int i; - func_g.type = filter_parse_regex(glob, strlen(glob), - &func_g.search, ¬); - func_g.len = strlen(func_g.search); - - /* we do not support '!' for function probes */ - if (WARN_ON(not)) + /* We do not support '!' for function probes */ + if (WARN_ON(glob[0] == '!')) return -EINVAL; - mutex_lock(&trace_probe_ops.func_hash->regex_lock); - - old_hash_ops.filter_hash = old_hash; - /* Probes only have filters */ - old_hash_ops.notrace_hash = NULL; - - hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); - if (!hash) { - count = -ENOMEM; - goto out; + if (!(ops->ops.flags & FTRACE_OPS_FL_INITIALIZED)) { + ops->ops.func = function_trace_probe_call; + ftrace_ops_init(&ops->ops); } - if (unlikely(ftrace_disabled)) { - count = -ENODEV; - goto out; - } - - mutex_lock(&ftrace_lock); + mutex_lock(&ops->ops.func_hash->regex_lock); - do_for_each_ftrace_rec(pg, rec) { + orig_hash = &ops->ops.func_hash->filter_hash; + old_hash = *orig_hash; + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); - if (rec->flags & FTRACE_FL_DISABLED) - continue; + ret = ftrace_match_records(hash, glob, strlen(glob)); - if (!ftrace_match_record(rec, &func_g, NULL, 0)) - continue; + /* Nothing found? */ + if (!ret) + ret = -EINVAL; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) { - /* If we did not process any, then return error */ - if (!count) - count = -ENOMEM; - goto out_unlock; - } + if (ret < 0) + goto out; - count++; + INIT_HLIST_HEAD(&hl); - /* - * The caller might want to do something special - * for each function we find. We call the callback - * to give the caller an opportunity to do so. - */ - if (ops->init) { - if (ops->init(ops, rec->ip, data) < 0) { - /* caller does not like this func */ - kfree(entry); + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + if (ftrace_lookup_ip(old_hash, entry->ip)) continue; + probe = kmalloc(sizeof(*probe), GFP_KERNEL); + if (!probe) { + count = -ENOMEM; + goto err_free; } - } + probe->ops = ops; + probe->ip = entry->ip; + /* + * The caller might want to do something special + * for each function we find. We call the callback + * to give the caller an opportunity to do so. + */ + if (ops->init && ops->init(ops, entry->ip, data) < 0) { + kfree(probe); + goto err_free; + } + hlist_add_head(&probe->node, &hl); - ret = enter_record(hash, rec, 0); - if (ret < 0) { - kfree(entry); - count = ret; - goto out_unlock; + count++; } + } - entry->ops = ops; - entry->ip = rec->ip; + mutex_lock(&ftrace_lock); - key = hash_long(entry->ip, FTRACE_HASH_BITS); - hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]); + ret = ftrace_hash_move_and_update_ops(&ops->ops, orig_hash, + hash, 1); + if (ret < 0) + goto err_free_unlock; - } while_for_each_ftrace_rec(); + hlist_for_each_entry_safe(probe, n, &hl, node) { + hlist_del(&probe->node); + key = hash_long(probe->ip, FTRACE_HASH_BITS); + hlist_add_head_rcu(&probe->node, &ftrace_func_hash[key]); + } - ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + if (!(ops->ops.flags & FTRACE_OPS_FL_ENABLED)) + ret = ftrace_startup(&ops->ops, 0); - __enable_ftrace_function_probe(&old_hash_ops); + mutex_unlock(&ftrace_lock); if (!ret) - free_ftrace_hash_rcu(old_hash); - else - count = ret; - - out_unlock: - mutex_unlock(&ftrace_lock); + ret = count; out: - mutex_unlock(&trace_probe_ops.func_hash->regex_lock); + mutex_unlock(&ops->ops.func_hash->regex_lock); free_ftrace_hash(hash); - return count; + return ret; + + err_free_unlock: + mutex_unlock(&ftrace_lock); + err_free: + hlist_for_each_entry_safe(probe, n, &hl, node) { + hlist_del(&probe->node); + if (ops->free) + ops->free(ops, probe->ip, NULL); + kfree(probe); + } + goto out; } int @@ -4110,14 +4053,16 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) struct ftrace_func_probe *entry; struct ftrace_func_probe *p; struct ftrace_glob func_g; - struct ftrace_hash **orig_hash = &trace_probe_ops.func_hash->filter_hash; - struct ftrace_hash *old_hash = *orig_hash; + struct ftrace_hash **orig_hash; + struct ftrace_hash *old_hash; struct list_head free_list; - struct ftrace_hash *hash; + struct ftrace_hash *hash = NULL; struct hlist_node *tmp; char str[KSYM_SYMBOL_LEN]; int i, ret; - bool disabled; + + if (!(ops->ops.flags & FTRACE_OPS_FL_INITIALIZED)) + return -EINVAL; if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) func_g.search = NULL; @@ -4134,14 +4079,21 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) return -EINVAL; } - mutex_lock(&trace_probe_ops.func_hash->regex_lock); + mutex_lock(&ops->ops.func_hash->regex_lock); + + orig_hash = &ops->ops.func_hash->filter_hash; + old_hash = *orig_hash; + + ret = -EINVAL; + if (ftrace_hash_empty(old_hash)) + goto out_unlock; old_hash_ops.filter_hash = old_hash; /* Probes only have filters */ old_hash_ops.notrace_hash = NULL; ret = -ENOMEM; - hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, *orig_hash); + hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); if (!hash) goto out_unlock; @@ -4181,20 +4133,18 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) } mutex_lock(&ftrace_lock); - disabled = __disable_ftrace_function_probe(); - /* - * Remove after the disable is called. Otherwise, if the last - * probe is removed, a null hash means *all enabled*. - */ - ret = ftrace_hash_move(&trace_probe_ops, 1, orig_hash, hash); + + if (ftrace_hash_empty(hash)) + ftrace_shutdown(&ops->ops, 0); + + ret = ftrace_hash_move_and_update_ops(&ops->ops, orig_hash, + hash, 1); /* still need to update the function call sites */ - if (ftrace_enabled && !disabled) - ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS, + if (ftrace_enabled && !ftrace_hash_empty(hash)) + ftrace_run_modify_code(&ops->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); synchronize_sched(); - if (!ret) - free_ftrace_hash_rcu(old_hash); list_for_each_entry_safe(entry, p, &free_list, free_list) { list_del(&entry->free_list); @@ -4203,7 +4153,7 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) mutex_unlock(&ftrace_lock); out_unlock: - mutex_unlock(&trace_probe_ops.func_hash->regex_lock); + mutex_unlock(&ops->ops.func_hash->regex_lock); free_ftrace_hash(hash); return ret; } -- cgit v1.2.3 From eee8ded131f15e0f5b1897c9c4a7687fabd28822 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 21:31:28 -0400 Subject: ftrace: Have the function probes call their own function Now that the function probes have their own ftrace_ops, there's no reason to continue using the ftrace_func_hash to find which probe to call in the function callback. The ops that is passed in to the function callback is part of the probe_ops to call. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 225 ++++++++++++++++++++++---------------------------- 1 file changed, 98 insertions(+), 127 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cf6b7263199a..493c7ff7e860 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1096,14 +1096,7 @@ static bool update_all_ops; # error Dynamic ftrace depends on MCOUNT_RECORD #endif -static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly; - -struct ftrace_func_probe { - struct hlist_node node; - struct ftrace_probe_ops *ops; - unsigned long ip; - struct list_head free_list; -}; +static LIST_HEAD(ftrace_func_probes); struct ftrace_func_entry { struct hlist_node hlist; @@ -1270,7 +1263,7 @@ static void remove_hash_entry(struct ftrace_hash *hash, struct ftrace_func_entry *entry) { - hlist_del(&entry->hlist); + hlist_del_rcu(&entry->hlist); hash->count--; } @@ -3063,35 +3056,58 @@ struct ftrace_iterator { loff_t func_pos; struct ftrace_page *pg; struct dyn_ftrace *func; - struct ftrace_func_probe *probe; + struct ftrace_probe_ops *probe; + struct ftrace_func_entry *probe_entry; struct trace_parser parser; struct ftrace_hash *hash; struct ftrace_ops *ops; - int hidx; + int pidx; int idx; unsigned flags; }; static void * -t_hash_next(struct seq_file *m, loff_t *pos) +t_probe_next(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; + struct ftrace_hash *hash; + struct list_head *next; struct hlist_node *hnd = NULL; struct hlist_head *hhd; + int size; (*pos)++; iter->pos = *pos; - if (iter->probe) - hnd = &iter->probe->node; - retry: - if (iter->hidx >= FTRACE_FUNC_HASHSIZE) + if (list_empty(&ftrace_func_probes)) return NULL; - hhd = &ftrace_func_hash[iter->hidx]; + if (!iter->probe) { + next = ftrace_func_probes.next; + iter->probe = list_entry(next, struct ftrace_probe_ops, list); + } + + if (iter->probe_entry) + hnd = &iter->probe_entry->hlist; + + hash = iter->probe->ops.func_hash->filter_hash; + size = 1 << hash->size_bits; + + retry: + if (iter->pidx >= size) { + if (iter->probe->list.next == &ftrace_func_probes) + return NULL; + next = iter->probe->list.next; + iter->probe = list_entry(next, struct ftrace_probe_ops, list); + hash = iter->probe->ops.func_hash->filter_hash; + size = 1 << hash->size_bits; + iter->pidx = 0; + } + + hhd = &hash->buckets[iter->pidx]; if (hlist_empty(hhd)) { - iter->hidx++; + iter->pidx++; hnd = NULL; goto retry; } @@ -3101,7 +3117,7 @@ t_hash_next(struct seq_file *m, loff_t *pos) else { hnd = hnd->next; if (!hnd) { - iter->hidx++; + iter->pidx++; goto retry; } } @@ -3109,26 +3125,28 @@ t_hash_next(struct seq_file *m, loff_t *pos) if (WARN_ON_ONCE(!hnd)) return NULL; - iter->probe = hlist_entry(hnd, struct ftrace_func_probe, node); + iter->probe_entry = hlist_entry(hnd, struct ftrace_func_entry, hlist); return iter; } -static void *t_hash_start(struct seq_file *m, loff_t *pos) +static void *t_probe_start(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; void *p = NULL; loff_t l; - if (!(iter->flags & FTRACE_ITER_DO_HASH)) + if (!(iter->flags & FTRACE_ITER_DO_PROBES)) return NULL; if (iter->func_pos > *pos) return NULL; - iter->hidx = 0; + iter->probe = NULL; + iter->probe_entry = NULL; + iter->pidx = 0; for (l = 0; l <= (*pos - iter->func_pos); ) { - p = t_hash_next(m, &l); + p = t_probe_next(m, &l); if (!p) break; } @@ -3136,24 +3154,27 @@ static void *t_hash_start(struct seq_file *m, loff_t *pos) return NULL; /* Only set this if we have an item */ - iter->flags |= FTRACE_ITER_HASH; + iter->flags |= FTRACE_ITER_PROBE; return iter; } static int -t_hash_show(struct seq_file *m, struct ftrace_iterator *iter) +t_probe_show(struct seq_file *m, struct ftrace_iterator *iter) { - struct ftrace_func_probe *rec; + struct ftrace_probe_ops *probe; + struct ftrace_func_entry *probe_entry; - rec = iter->probe; - if (WARN_ON_ONCE(!rec)) + probe = iter->probe; + probe_entry = iter->probe_entry; + + if (WARN_ON_ONCE(!probe || !probe_entry)) return -EIO; - if (rec->ops->print) - return rec->ops->print(m, rec->ip, rec->ops, NULL); + if (probe->print) + return probe->print(m, probe_entry->ip, probe, NULL); - seq_printf(m, "%ps:%ps\n", (void *)rec->ip, (void *)rec->ops->func); + seq_printf(m, "%ps:%ps\n", (void *)probe_entry->ip, (void *)probe->func); return 0; } @@ -3205,19 +3226,19 @@ t_next(struct seq_file *m, void *v, loff_t *pos) if (unlikely(ftrace_disabled)) return NULL; - if (iter->flags & FTRACE_ITER_HASH) - return t_hash_next(m, pos); + if (iter->flags & FTRACE_ITER_PROBE) + return t_probe_next(m, pos); if (iter->flags & FTRACE_ITER_PRINTALL) { - /* next must increment pos, and t_hash_start does not */ + /* next must increment pos, and t_probe_start does not */ (*pos)++; - return t_hash_start(m, &l); + return t_probe_start(m, &l); } ret = t_func_next(m, pos); if (!ret) - return t_hash_start(m, &l); + return t_probe_start(m, &l); return ret; } @@ -3226,7 +3247,7 @@ static void reset_iter_read(struct ftrace_iterator *iter) { iter->pos = 0; iter->func_pos = 0; - iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_HASH); + iter->flags &= ~(FTRACE_ITER_PRINTALL | FTRACE_ITER_PROBE); } static void *t_start(struct seq_file *m, loff_t *pos) @@ -3255,15 +3276,15 @@ static void *t_start(struct seq_file *m, loff_t *pos) ftrace_hash_empty(iter->hash)) { iter->func_pos = 1; /* Account for the message */ if (*pos > 0) - return t_hash_start(m, pos); + return t_probe_start(m, pos); iter->flags |= FTRACE_ITER_PRINTALL; /* reset in case of seek/pread */ - iter->flags &= ~FTRACE_ITER_HASH; + iter->flags &= ~FTRACE_ITER_PROBE; return iter; } - if (iter->flags & FTRACE_ITER_HASH) - return t_hash_start(m, pos); + if (iter->flags & FTRACE_ITER_PROBE) + return t_probe_start(m, pos); /* * Unfortunately, we need to restart at ftrace_pages_start @@ -3279,7 +3300,7 @@ static void *t_start(struct seq_file *m, loff_t *pos) } if (!p) - return t_hash_start(m, pos); + return t_probe_start(m, pos); return iter; } @@ -3310,8 +3331,8 @@ static int t_show(struct seq_file *m, void *v) struct ftrace_iterator *iter = m->private; struct dyn_ftrace *rec; - if (iter->flags & FTRACE_ITER_HASH) - return t_hash_show(m, iter); + if (iter->flags & FTRACE_ITER_PROBE) + return t_probe_show(m, iter); if (iter->flags & FTRACE_ITER_PRINTALL) { if (iter->flags & FTRACE_ITER_NOTRACE) @@ -3490,7 +3511,7 @@ ftrace_filter_open(struct inode *inode, struct file *file) struct ftrace_ops *ops = inode->i_private; return ftrace_regex_open(ops, - FTRACE_ITER_FILTER | FTRACE_ITER_DO_HASH, + FTRACE_ITER_FILTER | FTRACE_ITER_DO_PROBES, inode, file); } @@ -3765,16 +3786,9 @@ core_initcall(ftrace_mod_cmd_init); static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { - struct ftrace_func_probe *entry; - struct hlist_head *hhd; - unsigned long key; - - key = hash_long(ip, FTRACE_HASH_BITS); + struct ftrace_probe_ops *probe_ops; - hhd = &ftrace_func_hash[key]; - - if (hlist_empty(hhd)) - return; + probe_ops = container_of(op, struct ftrace_probe_ops, ops); /* * Disable preemption for these calls to prevent a RCU grace @@ -3782,20 +3796,10 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - hlist_for_each_entry_rcu_notrace(entry, hhd, node) { - if (entry->ip == ip) - entry->ops->func(ip, parent_ip, entry->ops, NULL); - } + probe_ops->func(ip, parent_ip, probe_ops, NULL); preempt_enable_notrace(); } -static void ftrace_free_entry(struct ftrace_func_probe *entry) -{ - if (entry->ops->free) - entry->ops->free(entry->ops, entry->ip, NULL); - kfree(entry); -} - struct ftrace_func_map { struct ftrace_func_entry entry; void *data; @@ -3942,13 +3946,9 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_entry *entry; - struct ftrace_func_probe *probe; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; struct ftrace_hash *hash; - struct hlist_head hl; - struct hlist_node *n; - unsigned long key; int count = 0; int size; int ret; @@ -3961,6 +3961,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, if (!(ops->ops.flags & FTRACE_OPS_FL_INITIALIZED)) { ops->ops.func = function_trace_probe_call; ftrace_ops_init(&ops->ops); + INIT_LIST_HEAD(&ops->list); } mutex_lock(&ops->ops.func_hash->regex_lock); @@ -3978,31 +3979,21 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, if (ret < 0) goto out; - INIT_HLIST_HEAD(&hl); - size = 1 << hash->size_bits; for (i = 0; i < size; i++) { hlist_for_each_entry(entry, &hash->buckets[i], hlist) { if (ftrace_lookup_ip(old_hash, entry->ip)) continue; - probe = kmalloc(sizeof(*probe), GFP_KERNEL); - if (!probe) { - count = -ENOMEM; - goto err_free; - } - probe->ops = ops; - probe->ip = entry->ip; /* * The caller might want to do something special * for each function we find. We call the callback * to give the caller an opportunity to do so. */ - if (ops->init && ops->init(ops, entry->ip, data) < 0) { - kfree(probe); - goto err_free; + if (ops->init) { + ret = ops->init(ops, entry->ip, data); + if (ret < 0) + goto out; } - hlist_add_head(&probe->node, &hl); - count++; } } @@ -4012,17 +4003,15 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, ret = ftrace_hash_move_and_update_ops(&ops->ops, orig_hash, hash, 1); if (ret < 0) - goto err_free_unlock; + goto out_unlock; - hlist_for_each_entry_safe(probe, n, &hl, node) { - hlist_del(&probe->node); - key = hash_long(probe->ip, FTRACE_HASH_BITS); - hlist_add_head_rcu(&probe->node, &ftrace_func_hash[key]); - } + if (list_empty(&ops->list)) + list_add(&ops->list, &ftrace_func_probes); if (!(ops->ops.flags & FTRACE_OPS_FL_ENABLED)) ret = ftrace_startup(&ops->ops, 0); + out_unlock: mutex_unlock(&ftrace_lock); if (!ret) @@ -4032,34 +4021,22 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, free_ftrace_hash(hash); return ret; - - err_free_unlock: - mutex_unlock(&ftrace_lock); - err_free: - hlist_for_each_entry_safe(probe, n, &hl, node) { - hlist_del(&probe->node); - if (ops->free) - ops->free(ops, probe->ip, NULL); - kfree(probe); - } - goto out; } int unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) { struct ftrace_ops_hash old_hash_ops; - struct ftrace_func_entry *rec_entry; - struct ftrace_func_probe *entry; - struct ftrace_func_probe *p; + struct ftrace_func_entry *entry; struct ftrace_glob func_g; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; - struct list_head free_list; struct ftrace_hash *hash = NULL; struct hlist_node *tmp; + struct hlist_head hhd; char str[KSYM_SYMBOL_LEN]; int i, ret; + int size; if (!(ops->ops.flags & FTRACE_OPS_FL_INITIALIZED)) return -EINVAL; @@ -4097,18 +4074,12 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) if (!hash) goto out_unlock; - INIT_LIST_HEAD(&free_list); - - for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) { - struct hlist_head *hhd = &ftrace_func_hash[i]; + INIT_HLIST_HEAD(&hhd); - hlist_for_each_entry_safe(entry, tmp, hhd, node) { - - /* break up if statements for readability */ - if (entry->ops != ops) - continue; + size = 1 << hash->size_bits; + for (i = 0; i < size; i++) { + hlist_for_each_entry_safe(entry, tmp, &hash->buckets[i], hlist) { - /* do this last, since it is the most expensive */ if (func_g.search) { kallsyms_lookup(entry->ip, NULL, NULL, NULL, str); @@ -4116,26 +4087,24 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) continue; } - rec_entry = ftrace_lookup_ip(hash, entry->ip); - /* It is possible more than one entry had this ip */ - if (rec_entry) - free_hash_entry(hash, rec_entry); - - hlist_del_rcu(&entry->node); - list_add(&entry->free_list, &free_list); + remove_hash_entry(hash, entry); + hlist_add_head(&entry->hlist, &hhd); } } /* Nothing found? */ - if (list_empty(&free_list)) { + if (hlist_empty(&hhd)) { ret = -EINVAL; goto out_unlock; } mutex_lock(&ftrace_lock); - if (ftrace_hash_empty(hash)) + if (ftrace_hash_empty(hash)) { ftrace_shutdown(&ops->ops, 0); + list_del_init(&ops->list); + } + ret = ftrace_hash_move_and_update_ops(&ops->ops, orig_hash, hash, 1); @@ -4146,9 +4115,11 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) &old_hash_ops); synchronize_sched(); - list_for_each_entry_safe(entry, p, &free_list, free_list) { - list_del(&entry->free_list); - ftrace_free_entry(entry); + hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { + hlist_del(&entry->hlist); + if (ops->free) + ops->free(ops, entry->ip, NULL); + kfree(entry); } mutex_unlock(&ftrace_lock); -- cgit v1.2.3 From 8d70725e452cac9796e9025ccd79c45ffcc4d109 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Apr 2017 13:36:18 -0400 Subject: ftrace: If the hash for a probe fails to update then free what was initialized If the ftrace_hash_move_and_update_ops() fails, and an ops->free() function exists, then it needs to be called on all the ops that were added by this registration. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 493c7ff7e860..8394055e6793 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -4003,7 +4003,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, ret = ftrace_hash_move_and_update_ops(&ops->ops, orig_hash, hash, 1); if (ret < 0) - goto out_unlock; + goto err_unlock; if (list_empty(&ops->list)) list_add(&ops->list, &ftrace_func_probes); @@ -4021,6 +4021,20 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, free_ftrace_hash(hash); return ret; + + err_unlock: + if (!ops->free) + goto out_unlock; + + /* Failed to do the move, need to call the free functions */ + for (i = 0; i < size; i++) { + hlist_for_each_entry(entry, &hash->buckets[i], hlist) { + if (ftrace_lookup_ip(old_hash, entry->ip)) + continue; + ops->free(ops, entry->ip, NULL); + } + } + goto out_unlock; } int -- cgit v1.2.3 From 04ec7bb642b77374b53731b795b5654b5aff1c00 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 5 Apr 2017 13:12:55 -0400 Subject: tracing: Have the trace_array hold the list of registered func probes Add a link list to the trace_array to hold func probes that are registered. Currently, all function probes are the same for all instances as it was before, that is, only the top level trace_array holds the function probes. But this lays the ground work to have function probes be attached to individual instances, and having the event trigger only affect events in the given instance. But that work is still to be done. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 41 +++++++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 12 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8394055e6793..ea208e93f000 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1096,8 +1096,6 @@ static bool update_all_ops; # error Dynamic ftrace depends on MCOUNT_RECORD #endif -static LIST_HEAD(ftrace_func_probes); - struct ftrace_func_entry { struct hlist_node hlist; unsigned long ip; @@ -3070,6 +3068,8 @@ static void * t_probe_next(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; + struct trace_array *tr = global_ops.private; + struct list_head *func_probes; struct ftrace_hash *hash; struct list_head *next; struct hlist_node *hnd = NULL; @@ -3079,11 +3079,15 @@ t_probe_next(struct seq_file *m, loff_t *pos) (*pos)++; iter->pos = *pos; - if (list_empty(&ftrace_func_probes)) + if (!tr) + return NULL; + + func_probes = &tr->func_probes; + if (list_empty(func_probes)) return NULL; if (!iter->probe) { - next = ftrace_func_probes.next; + next = func_probes->next; iter->probe = list_entry(next, struct ftrace_probe_ops, list); } @@ -3095,7 +3099,7 @@ t_probe_next(struct seq_file *m, loff_t *pos) retry: if (iter->pidx >= size) { - if (iter->probe->list.next == &ftrace_func_probes) + if (iter->probe->list.next == func_probes) return NULL; next = iter->probe->list.next; iter->probe = list_entry(next, struct ftrace_probe_ops, list); @@ -3752,7 +3756,7 @@ static int ftrace_hash_move_and_update_ops(struct ftrace_ops *ops, */ static int -ftrace_mod_callback(struct ftrace_hash *hash, +ftrace_mod_callback(struct trace_array *tr, struct ftrace_hash *hash, char *func, char *cmd, char *module, int enable) { int ret; @@ -3942,8 +3946,8 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, } int -register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data) +register_ftrace_function_probe(char *glob, struct trace_array *tr, + struct ftrace_probe_ops *ops, void *data) { struct ftrace_func_entry *entry; struct ftrace_hash **orig_hash; @@ -3954,6 +3958,9 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, int ret; int i; + if (WARN_ON(!tr)) + return -EINVAL; + /* We do not support '!' for function probes */ if (WARN_ON(glob[0] == '!')) return -EINVAL; @@ -4006,7 +4013,7 @@ register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, goto err_unlock; if (list_empty(&ops->list)) - list_add(&ops->list, &ftrace_func_probes); + list_add(&ops->list, &tr->func_probes); if (!(ops->ops.flags & FTRACE_OPS_FL_ENABLED)) ret = ftrace_startup(&ops->ops, 0); @@ -4192,9 +4199,11 @@ __init int unregister_ftrace_command(struct ftrace_func_command *cmd) return ret; } -static int ftrace_process_regex(struct ftrace_hash *hash, +static int ftrace_process_regex(struct ftrace_iterator *iter, char *buff, int len, int enable) { + struct ftrace_hash *hash = iter->hash; + struct trace_array *tr = global_ops.private; char *func, *command, *next = buff; struct ftrace_func_command *p; int ret = -EINVAL; @@ -4214,10 +4223,13 @@ static int ftrace_process_regex(struct ftrace_hash *hash, command = strsep(&next, ":"); + if (WARN_ON_ONCE(!tr)) + return -EINVAL; + mutex_lock(&ftrace_cmd_mutex); list_for_each_entry(p, &ftrace_commands, list) { if (strcmp(p->name, command) == 0) { - ret = p->func(hash, func, command, next, enable); + ret = p->func(tr, hash, func, command, next, enable); goto out_unlock; } } @@ -4254,7 +4266,7 @@ ftrace_regex_write(struct file *file, const char __user *ubuf, if (read >= 0 && trace_parser_loaded(parser) && !trace_parser_cont(parser)) { - ret = ftrace_process_regex(iter->hash, parser->buffer, + ret = ftrace_process_regex(iter, parser->buffer, parser->idx, enable); trace_parser_clear(parser); if (ret < 0) @@ -5441,6 +5453,10 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) arch_ftrace_update_trampoline(ops); } +void ftrace_init_trace_array(struct trace_array *tr) +{ + INIT_LIST_HEAD(&tr->func_probes); +} #else static struct ftrace_ops global_ops = { @@ -5495,6 +5511,7 @@ __init void ftrace_init_global_array_ops(struct trace_array *tr) { tr->ops = &global_ops; tr->ops->private = tr; + ftrace_init_trace_array(tr); } void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) -- cgit v1.2.3 From b5f081b563a6cdcb85a543df8c851951a8978275 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 10 Apr 2017 22:30:05 -0400 Subject: tracing: Pass the trace_array into ftrace_probe_ops functions Pass the trace_array associated to a ftrace_probe_ops into the probe_ops func(), init() and free() functions. The trace_array is the descriptor that describes a tracing instance. This will help create the infrastructure that will allow having function probes unique to tracing instances. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ea208e93f000..e51cd6b51253 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3791,6 +3791,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ftrace_probe_ops *probe_ops; + struct trace_array *tr = op->private; probe_ops = container_of(op, struct ftrace_probe_ops, ops); @@ -3800,7 +3801,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - probe_ops->func(ip, parent_ip, probe_ops, NULL); + probe_ops->func(ip, parent_ip, tr, probe_ops, NULL); preempt_enable_notrace(); } @@ -3969,6 +3970,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, ops->ops.func = function_trace_probe_call; ftrace_ops_init(&ops->ops); INIT_LIST_HEAD(&ops->list); + ops->ops.private = tr; } mutex_lock(&ops->ops.func_hash->regex_lock); @@ -3997,7 +3999,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, * to give the caller an opportunity to do so. */ if (ops->init) { - ret = ops->init(ops, entry->ip, data); + ret = ops->init(ops, tr, entry->ip, data); if (ret < 0) goto out; } @@ -4038,7 +4040,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, hlist_for_each_entry(entry, &hash->buckets[i], hlist) { if (ftrace_lookup_ip(old_hash, entry->ip)) continue; - ops->free(ops, entry->ip, NULL); + ops->free(ops, tr, entry->ip, NULL); } } goto out_unlock; @@ -4055,6 +4057,7 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) struct ftrace_hash *hash = NULL; struct hlist_node *tmp; struct hlist_head hhd; + struct trace_array *tr; char str[KSYM_SYMBOL_LEN]; int i, ret; int size; @@ -4062,6 +4065,8 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) if (!(ops->ops.flags & FTRACE_OPS_FL_INITIALIZED)) return -EINVAL; + tr = ops->ops.private; + if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) func_g.search = NULL; else if (glob) { @@ -4139,7 +4144,7 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); if (ops->free) - ops->free(ops, entry->ip, NULL); + ops->free(ops, tr, entry->ip, NULL); kfree(entry); } mutex_unlock(&ftrace_lock); -- cgit v1.2.3 From 7b60f3d8761561d95d7e962522d6338143fc2329 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 18 Apr 2017 14:50:39 -0400 Subject: ftrace: Dynamically create the probe ftrace_ops for the trace_array In order to eventually have each trace_array instance have its own unique set of function probes (triggers), the trace array needs to hold the ops and the filters for the probes. This is the first step to accomplish this. Instead of having the private data of the probe ops point to the trace_array, create a separate list that the trace_array holds. There's only one private_data for a probe, we need one per trace_array. The probe ftrace_ops will be dynamically created for each instance, instead of being static. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 192 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 141 insertions(+), 51 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e51cd6b51253..8fdc18500c61 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1101,6 +1101,14 @@ struct ftrace_func_entry { unsigned long ip; }; +struct ftrace_func_probe { + struct ftrace_probe_ops *probe_ops; + struct ftrace_ops ops; + struct trace_array *tr; + struct list_head list; + int ref; +}; + /* * We make these constant because no one should touch them, * but they are used as the default "empty hash", to avoid allocating @@ -3054,7 +3062,7 @@ struct ftrace_iterator { loff_t func_pos; struct ftrace_page *pg; struct dyn_ftrace *func; - struct ftrace_probe_ops *probe; + struct ftrace_func_probe *probe; struct ftrace_func_entry *probe_entry; struct trace_parser parser; struct ftrace_hash *hash; @@ -3088,7 +3096,7 @@ t_probe_next(struct seq_file *m, loff_t *pos) if (!iter->probe) { next = func_probes->next; - iter->probe = list_entry(next, struct ftrace_probe_ops, list); + iter->probe = list_entry(next, struct ftrace_func_probe, list); } if (iter->probe_entry) @@ -3102,7 +3110,7 @@ t_probe_next(struct seq_file *m, loff_t *pos) if (iter->probe->list.next == func_probes) return NULL; next = iter->probe->list.next; - iter->probe = list_entry(next, struct ftrace_probe_ops, list); + iter->probe = list_entry(next, struct ftrace_func_probe, list); hash = iter->probe->ops.func_hash->filter_hash; size = 1 << hash->size_bits; iter->pidx = 0; @@ -3166,8 +3174,9 @@ static void *t_probe_start(struct seq_file *m, loff_t *pos) static int t_probe_show(struct seq_file *m, struct ftrace_iterator *iter) { - struct ftrace_probe_ops *probe; struct ftrace_func_entry *probe_entry; + struct ftrace_probe_ops *probe_ops; + struct ftrace_func_probe *probe; probe = iter->probe; probe_entry = iter->probe_entry; @@ -3175,10 +3184,13 @@ t_probe_show(struct seq_file *m, struct ftrace_iterator *iter) if (WARN_ON_ONCE(!probe || !probe_entry)) return -EIO; - if (probe->print) - return probe->print(m, probe_entry->ip, probe, NULL); + probe_ops = probe->probe_ops; + + if (probe_ops->print) + return probe_ops->print(m, probe_entry->ip, probe_ops, NULL); - seq_printf(m, "%ps:%ps\n", (void *)probe_entry->ip, (void *)probe->func); + seq_printf(m, "%ps:%ps\n", (void *)probe_entry->ip, + (void *)probe_ops->func); return 0; } @@ -3791,9 +3803,10 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op, struct pt_regs *pt_regs) { struct ftrace_probe_ops *probe_ops; - struct trace_array *tr = op->private; + struct ftrace_func_probe *probe; - probe_ops = container_of(op, struct ftrace_probe_ops, ops); + probe = container_of(op, struct ftrace_func_probe, ops); + probe_ops = probe->probe_ops; /* * Disable preemption for these calls to prevent a RCU grace @@ -3801,7 +3814,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - probe_ops->func(ip, parent_ip, tr, probe_ops, NULL); + probe_ops->func(ip, parent_ip, probe->tr, probe_ops, NULL); preempt_enable_notrace(); } @@ -3946,11 +3959,41 @@ void free_ftrace_func_mapper(struct ftrace_func_mapper *mapper, free_ftrace_hash(&mapper->hash); } +static void release_probe(struct ftrace_func_probe *probe) +{ + struct ftrace_probe_ops *probe_ops; + + mutex_lock(&ftrace_lock); + + WARN_ON(probe->ref <= 0); + + /* Subtract the ref that was used to protect this instance */ + probe->ref--; + + if (!probe->ref) { + probe_ops = probe->probe_ops; + list_del(&probe->list); + kfree(probe); + } + mutex_unlock(&ftrace_lock); +} + +static void acquire_probe_locked(struct ftrace_func_probe *probe) +{ + /* + * Add one ref to keep it from being freed when releasing the + * ftrace_lock mutex. + */ + probe->ref++; +} + int register_ftrace_function_probe(char *glob, struct trace_array *tr, - struct ftrace_probe_ops *ops, void *data) + struct ftrace_probe_ops *probe_ops, + void *data) { struct ftrace_func_entry *entry; + struct ftrace_func_probe *probe; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; struct ftrace_hash *hash; @@ -3966,16 +4009,33 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, if (WARN_ON(glob[0] == '!')) return -EINVAL; - if (!(ops->ops.flags & FTRACE_OPS_FL_INITIALIZED)) { - ops->ops.func = function_trace_probe_call; - ftrace_ops_init(&ops->ops); - INIT_LIST_HEAD(&ops->list); - ops->ops.private = tr; + + mutex_lock(&ftrace_lock); + /* Check if the probe_ops is already registered */ + list_for_each_entry(probe, &tr->func_probes, list) { + if (probe->probe_ops == probe_ops) + break; } + if (&probe->list == &tr->func_probes) { + probe = kzalloc(sizeof(*probe), GFP_KERNEL); + if (!probe) { + mutex_unlock(&ftrace_lock); + return -ENOMEM; + } + probe->probe_ops = probe_ops; + probe->ops.func = function_trace_probe_call; + probe->tr = tr; + ftrace_ops_init(&probe->ops); + list_add(&probe->list, &tr->func_probes); + } + + acquire_probe_locked(probe); - mutex_lock(&ops->ops.func_hash->regex_lock); + mutex_unlock(&ftrace_lock); + + mutex_lock(&probe->ops.func_hash->regex_lock); - orig_hash = &ops->ops.func_hash->filter_hash; + orig_hash = &probe->ops.func_hash->filter_hash; old_hash = *orig_hash; hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash); @@ -3998,8 +4058,9 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, * for each function we find. We call the callback * to give the caller an opportunity to do so. */ - if (ops->init) { - ret = ops->init(ops, tr, entry->ip, data); + if (probe_ops->init) { + ret = probe_ops->init(probe_ops, tr, + entry->ip, data); if (ret < 0) goto out; } @@ -4009,16 +4070,22 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, mutex_lock(&ftrace_lock); - ret = ftrace_hash_move_and_update_ops(&ops->ops, orig_hash, - hash, 1); + if (!count) { + /* Nothing was added? */ + ret = -EINVAL; + goto out_unlock; + } + + ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash, + hash, 1); if (ret < 0) goto err_unlock; - if (list_empty(&ops->list)) - list_add(&ops->list, &tr->func_probes); + /* One ref for each new function traced */ + probe->ref += count; - if (!(ops->ops.flags & FTRACE_OPS_FL_ENABLED)) - ret = ftrace_startup(&ops->ops, 0); + if (!(probe->ops.flags & FTRACE_OPS_FL_ENABLED)) + ret = ftrace_startup(&probe->ops, 0); out_unlock: mutex_unlock(&ftrace_lock); @@ -4026,13 +4093,15 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, if (!ret) ret = count; out: - mutex_unlock(&ops->ops.func_hash->regex_lock); + mutex_unlock(&probe->ops.func_hash->regex_lock); free_ftrace_hash(hash); + release_probe(probe); + return ret; err_unlock: - if (!ops->free) + if (!probe_ops->free || !count) goto out_unlock; /* Failed to do the move, need to call the free functions */ @@ -4040,33 +4109,30 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, hlist_for_each_entry(entry, &hash->buckets[i], hlist) { if (ftrace_lookup_ip(old_hash, entry->ip)) continue; - ops->free(ops, tr, entry->ip, NULL); + probe_ops->free(probe_ops, tr, entry->ip, NULL); } } goto out_unlock; } int -unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) +unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, + struct ftrace_probe_ops *probe_ops) { struct ftrace_ops_hash old_hash_ops; struct ftrace_func_entry *entry; + struct ftrace_func_probe *probe; struct ftrace_glob func_g; struct ftrace_hash **orig_hash; struct ftrace_hash *old_hash; struct ftrace_hash *hash = NULL; struct hlist_node *tmp; struct hlist_head hhd; - struct trace_array *tr; char str[KSYM_SYMBOL_LEN]; - int i, ret; + int count = 0; + int i, ret = -ENODEV; int size; - if (!(ops->ops.flags & FTRACE_OPS_FL_INITIALIZED)) - return -EINVAL; - - tr = ops->ops.private; - if (glob && (strcmp(glob, "*") == 0 || !strlen(glob))) func_g.search = NULL; else if (glob) { @@ -4082,12 +4148,28 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) return -EINVAL; } - mutex_lock(&ops->ops.func_hash->regex_lock); + mutex_lock(&ftrace_lock); + /* Check if the probe_ops is already registered */ + list_for_each_entry(probe, &tr->func_probes, list) { + if (probe->probe_ops == probe_ops) + break; + } + if (&probe->list == &tr->func_probes) + goto err_unlock_ftrace; + + ret = -EINVAL; + if (!(probe->ops.flags & FTRACE_OPS_FL_INITIALIZED)) + goto err_unlock_ftrace; + + acquire_probe_locked(probe); - orig_hash = &ops->ops.func_hash->filter_hash; + mutex_unlock(&ftrace_lock); + + mutex_lock(&probe->ops.func_hash->regex_lock); + + orig_hash = &probe->ops.func_hash->filter_hash; old_hash = *orig_hash; - ret = -EINVAL; if (ftrace_hash_empty(old_hash)) goto out_unlock; @@ -4112,46 +4194,54 @@ unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops) if (!ftrace_match(str, &func_g)) continue; } - + count++; remove_hash_entry(hash, entry); hlist_add_head(&entry->hlist, &hhd); } } /* Nothing found? */ - if (hlist_empty(&hhd)) { + if (!count) { ret = -EINVAL; goto out_unlock; } mutex_lock(&ftrace_lock); - if (ftrace_hash_empty(hash)) { - ftrace_shutdown(&ops->ops, 0); - list_del_init(&ops->list); - } + WARN_ON(probe->ref < count); + probe->ref -= count; - ret = ftrace_hash_move_and_update_ops(&ops->ops, orig_hash, + if (ftrace_hash_empty(hash)) + ftrace_shutdown(&probe->ops, 0); + + ret = ftrace_hash_move_and_update_ops(&probe->ops, orig_hash, hash, 1); /* still need to update the function call sites */ if (ftrace_enabled && !ftrace_hash_empty(hash)) - ftrace_run_modify_code(&ops->ops, FTRACE_UPDATE_CALLS, + ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); synchronize_sched(); hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); - if (ops->free) - ops->free(ops, tr, entry->ip, NULL); + if (probe_ops->free) + probe_ops->free(probe_ops, tr, entry->ip, NULL); kfree(entry); } mutex_unlock(&ftrace_lock); out_unlock: - mutex_unlock(&ops->ops.func_hash->regex_lock); + mutex_unlock(&probe->ops.func_hash->regex_lock); free_ftrace_hash(hash); + + release_probe(probe); + + return ret; + + err_unlock_ftrace: + mutex_unlock(&ftrace_lock); return ret; } -- cgit v1.2.3 From 6e4443199e5354255e8a4c1e8e5cfc8ef064c3ce Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 19 Apr 2017 22:39:44 -0400 Subject: tracing/ftrace: Add a better way to pass data via the probe functions With the redesign of the registration and execution of the function probes (triggers), data can now be passed from the setup of the probe to the probe callers that are specific to the trace_array it is on. Although, all probes still only affect the toplevel trace array, this change will allow for instances to have their own probes separated from other instances and the top array. That is, something like the stacktrace probe can be set to trace only in an instance and not the toplevel trace array. This isn't implement yet, but this change sets the ground work for the change. When a probe callback is triggered (someone writes the probe format into set_ftrace_filter), it calls register_ftrace_function_probe() passing in init_data that will be used to initialize the probe. Then for every matching function, register_ftrace_function_probe() will call the probe_ops->init() function with the init data that was passed to it, as well as an address to a place holder that is associated with the probe and the instance. The first occurrence will have a NULL in the pointer. The init() function will then initialize it. If other probes are added, or more functions are part of the probe, the place holder will be passed to the init() function with the place holder data that it was initialized to the last time. Then this place_holder is passed to each of the other probe_ops functions, where it can be used in the function callback. When the probe_ops free() function is called, it can be called either with the rip of the function that is being removed from the probe, or zero, indicating that there are no more functions attached to the probe, and the place holder is about to be freed. This gives the probe_ops a way to free the data it assigned to the place holder if it was allocade during the first init call. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 8fdc18500c61..774e9108e5dc 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1106,6 +1106,7 @@ struct ftrace_func_probe { struct ftrace_ops ops; struct trace_array *tr; struct list_head list; + void *data; int ref; }; @@ -3187,7 +3188,7 @@ t_probe_show(struct seq_file *m, struct ftrace_iterator *iter) probe_ops = probe->probe_ops; if (probe_ops->print) - return probe_ops->print(m, probe_entry->ip, probe_ops, NULL); + return probe_ops->print(m, probe_entry->ip, probe_ops, probe->data); seq_printf(m, "%ps:%ps\n", (void *)probe_entry->ip, (void *)probe_ops->func); @@ -3814,7 +3815,7 @@ static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip, * on the hash. rcu_read_lock is too dangerous here. */ preempt_disable_notrace(); - probe_ops->func(ip, parent_ip, probe->tr, probe_ops, NULL); + probe_ops->func(ip, parent_ip, probe->tr, probe_ops, probe->data); preempt_enable_notrace(); } @@ -3972,6 +3973,12 @@ static void release_probe(struct ftrace_func_probe *probe) if (!probe->ref) { probe_ops = probe->probe_ops; + /* + * Sending zero as ip tells probe_ops to free + * the probe->data itself + */ + if (probe_ops->free) + probe_ops->free(probe_ops, probe->tr, 0, probe->data); list_del(&probe->list); kfree(probe); } @@ -4060,9 +4067,15 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, */ if (probe_ops->init) { ret = probe_ops->init(probe_ops, tr, - entry->ip, data); - if (ret < 0) + entry->ip, data, + &probe->data); + if (ret < 0) { + if (probe_ops->free && count) + probe_ops->free(probe_ops, tr, + 0, probe->data); + probe->data = NULL; goto out; + } } count++; } @@ -4109,7 +4122,7 @@ register_ftrace_function_probe(char *glob, struct trace_array *tr, hlist_for_each_entry(entry, &hash->buckets[i], hlist) { if (ftrace_lookup_ip(old_hash, entry->ip)) continue; - probe_ops->free(probe_ops, tr, entry->ip, NULL); + probe_ops->free(probe_ops, tr, entry->ip, probe->data); } } goto out_unlock; @@ -4227,7 +4240,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); if (probe_ops->free) - probe_ops->free(probe_ops, tr, entry->ip, NULL); + probe_ops->free(probe_ops, tr, entry->ip, probe->data); kfree(entry); } mutex_unlock(&ftrace_lock); -- cgit v1.2.3 From d2afd57a4b96f3824220bbb8c067558ca215543f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 20 Apr 2017 11:31:35 -0400 Subject: tracing/ftrace: Allow instances to have their own function probes Pass around the local trace_array that is the descriptor for tracing instances, when enabling and disabling probes. This by default sets the enable/disable of event probe triggers to work with instances. The other probes will need some more work to get them working with instances. Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/trace/ftrace.c') diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 774e9108e5dc..6615197e6597 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3077,7 +3077,7 @@ static void * t_probe_next(struct seq_file *m, loff_t *pos) { struct ftrace_iterator *iter = m->private; - struct trace_array *tr = global_ops.private; + struct trace_array *tr = iter->ops->private; struct list_head *func_probes; struct ftrace_hash *hash; struct list_head *next; @@ -4311,7 +4311,7 @@ static int ftrace_process_regex(struct ftrace_iterator *iter, char *buff, int len, int enable) { struct ftrace_hash *hash = iter->hash; - struct trace_array *tr = global_ops.private; + struct trace_array *tr = iter->ops->private; char *func, *command, *next = buff; struct ftrace_func_command *p; int ret = -EINVAL; -- cgit v1.2.3