diff options
Diffstat (limited to 'kernel/trace/trace.c')
-rw-r--r-- | kernel/trace/trace.c | 275 |
1 files changed, 219 insertions, 56 deletions
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a2f0b9f33e9b..8a4bd6b68a0b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -253,6 +253,9 @@ unsigned long long ns2usecs(cycle_t nsec) #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \ TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD) +/* trace_flags that are default zero for instances */ +#define ZEROED_TRACE_FLAGS \ + TRACE_ITER_EVENT_FORK /* * The global_trace is the descriptor that holds the tracing @@ -303,33 +306,18 @@ void trace_array_put(struct trace_array *this_tr) mutex_unlock(&trace_types_lock); } -int filter_check_discard(struct trace_event_file *file, void *rec, - struct ring_buffer *buffer, - struct ring_buffer_event *event) -{ - if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && - !filter_match_preds(file->filter, rec)) { - ring_buffer_discard_commit(buffer, event); - return 1; - } - - return 0; -} -EXPORT_SYMBOL_GPL(filter_check_discard); - int call_filter_check_discard(struct trace_event_call *call, void *rec, struct ring_buffer *buffer, struct ring_buffer_event *event) { if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) && !filter_match_preds(call->filter, rec)) { - ring_buffer_discard_commit(buffer, event); + __trace_event_discard_commit(buffer, event); return 1; } return 0; } -EXPORT_SYMBOL_GPL(call_filter_check_discard); static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu) { @@ -1672,6 +1660,16 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, } EXPORT_SYMBOL_GPL(tracing_generic_entry_update); +static __always_inline void +trace_event_setup(struct ring_buffer_event *event, + int type, unsigned long flags, int pc) +{ + struct trace_entry *ent = ring_buffer_event_data(event); + + tracing_generic_entry_update(ent, flags, pc); + ent->type = type; +} + struct ring_buffer_event * trace_buffer_lock_reserve(struct ring_buffer *buffer, int type, @@ -1681,34 +1679,137 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer, struct ring_buffer_event *event; event = ring_buffer_lock_reserve(buffer, len); - if (event != NULL) { - struct trace_entry *ent = ring_buffer_event_data(event); + if (event != NULL) + trace_event_setup(event, type, flags, pc); + + return event; +} + +DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); +DEFINE_PER_CPU(int, trace_buffered_event_cnt); +static int trace_buffered_event_ref; + +/** + * trace_buffered_event_enable - enable buffering events + * + * When events are being filtered, it is quicker to use a temporary + * buffer to write the event data into if there's a likely chance + * that it will not be committed. The discard of the ring buffer + * is not as fast as committing, and is much slower than copying + * a commit. + * + * When an event is to be filtered, allocate per cpu buffers to + * write the event data into, and if the event is filtered and discarded + * it is simply dropped, otherwise, the entire data is to be committed + * in one shot. + */ +void trace_buffered_event_enable(void) +{ + struct ring_buffer_event *event; + struct page *page; + int cpu; - tracing_generic_entry_update(ent, flags, pc); - ent->type = type; + WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); + + if (trace_buffered_event_ref++) + return; + + for_each_tracing_cpu(cpu) { + page = alloc_pages_node(cpu_to_node(cpu), + GFP_KERNEL | __GFP_NORETRY, 0); + if (!page) + goto failed; + + event = page_address(page); + memset(event, 0, sizeof(*event)); + + per_cpu(trace_buffered_event, cpu) = event; + + preempt_disable(); + if (cpu == smp_processor_id() && + this_cpu_read(trace_buffered_event) != + per_cpu(trace_buffered_event, cpu)) + WARN_ON_ONCE(1); + preempt_enable(); } - return event; + return; + failed: + trace_buffered_event_disable(); } -void -__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +static void enable_trace_buffered_event(void *data) { - __this_cpu_write(trace_cmdline_save, true); - ring_buffer_unlock_commit(buffer, event); + /* Probably not needed, but do it anyway */ + smp_rmb(); + this_cpu_dec(trace_buffered_event_cnt); } -void trace_buffer_unlock_commit(struct trace_array *tr, - struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc) +static void disable_trace_buffered_event(void *data) { - __buffer_unlock_commit(buffer, event); + this_cpu_inc(trace_buffered_event_cnt); +} - ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); - ftrace_trace_userstack(buffer, flags, pc); +/** + * trace_buffered_event_disable - disable buffering events + * + * When a filter is removed, it is faster to not use the buffered + * events, and to commit directly into the ring buffer. Free up + * the temp buffers when there are no more users. This requires + * special synchronization with current events. + */ +void trace_buffered_event_disable(void) +{ + int cpu; + + WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); + + if (WARN_ON_ONCE(!trace_buffered_event_ref)) + return; + + if (--trace_buffered_event_ref) + return; + + preempt_disable(); + /* For each CPU, set the buffer as used. */ + smp_call_function_many(tracing_buffer_mask, + disable_trace_buffered_event, NULL, 1); + preempt_enable(); + + /* Wait for all current users to finish */ + synchronize_sched(); + + for_each_tracing_cpu(cpu) { + free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); + per_cpu(trace_buffered_event, cpu) = NULL; + } + /* + * Make sure trace_buffered_event is NULL before clearing + * trace_buffered_event_cnt. + */ + smp_wmb(); + + preempt_disable(); + /* Do the work on each cpu */ + smp_call_function_many(tracing_buffer_mask, + enable_trace_buffered_event, NULL, 1); + preempt_enable(); +} + +void +__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +{ + __this_cpu_write(trace_cmdline_save, true); + + /* If this is the temp buffer, we need to commit fully */ + if (this_cpu_read(trace_buffered_event) == event) { + /* Length is in event->array[0] */ + ring_buffer_write(buffer, event->array[0], &event->array[1]); + /* Release the temp buffer */ + this_cpu_dec(trace_buffered_event_cnt); + } else + ring_buffer_unlock_commit(buffer, event); } -EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit); static struct ring_buffer *temp_buffer; @@ -1719,8 +1820,23 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, unsigned long flags, int pc) { struct ring_buffer_event *entry; + int val; *current_rb = trace_file->tr->trace_buffer.buffer; + + if ((trace_file->flags & + (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && + (entry = this_cpu_read(trace_buffered_event))) { + /* Try to use the per cpu buffer first */ + val = this_cpu_inc_return(trace_buffered_event_cnt); + if (val == 1) { + trace_event_setup(entry, type, flags, pc); + entry->array[0] = len; + return entry; + } + this_cpu_dec(trace_buffered_event_cnt); + } + entry = trace_buffer_lock_reserve(*current_rb, type, len, flags, pc); /* @@ -1738,17 +1854,6 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb, } EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, - int type, unsigned long len, - unsigned long flags, int pc) -{ - *current_rb = global_trace.trace_buffer.buffer; - return trace_buffer_lock_reserve(*current_rb, - type, len, flags, pc); -} -EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); - void trace_buffer_unlock_commit_regs(struct trace_array *tr, struct ring_buffer *buffer, struct ring_buffer_event *event, @@ -1760,14 +1865,6 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr, ftrace_trace_stack(tr, buffer, flags, 0, pc, regs); ftrace_trace_userstack(buffer, flags, pc); } -EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); - -void trace_current_buffer_discard_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event) -{ - ring_buffer_discard_commit(buffer, event); -} -EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit); void trace_function(struct trace_array *tr, @@ -3571,6 +3668,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled) if (mask == TRACE_ITER_RECORD_CMD) trace_event_enable_cmd_record(enabled); + if (mask == TRACE_ITER_EVENT_FORK) + trace_event_follow_fork(tr, enabled); + if (mask == TRACE_ITER_OVERWRITE) { ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled); #ifdef CONFIG_TRACER_MAX_TRACE @@ -3658,7 +3758,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf, if (cnt >= sizeof(buf)) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) + if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; @@ -3804,12 +3904,19 @@ static const char readme_msg[] = "\t trigger: traceon, traceoff\n" "\t enable_event:<system>:<event>\n" "\t disable_event:<system>:<event>\n" +#ifdef CONFIG_HIST_TRIGGERS + "\t enable_hist:<system>:<event>\n" + "\t disable_hist:<system>:<event>\n" +#endif #ifdef CONFIG_STACKTRACE "\t\t stacktrace\n" #endif #ifdef CONFIG_TRACER_SNAPSHOT "\t\t snapshot\n" #endif +#ifdef CONFIG_HIST_TRIGGERS + "\t\t hist (see below)\n" +#endif "\t example: echo traceoff > events/block/block_unplug/trigger\n" "\t echo traceoff:3 > events/block/block_unplug/trigger\n" "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" @@ -3825,6 +3932,56 @@ static const char readme_msg[] = "\t To remove a trigger with a count:\n" "\t echo '!<trigger>:0 > <system>/<event>/trigger\n" "\t Filters can be ignored when removing a trigger.\n" +#ifdef CONFIG_HIST_TRIGGERS + " hist trigger\t- If set, event hits are aggregated into a hash table\n" + "\t Format: hist:keys=<field1[,field2,...]>\n" + "\t [:values=<field1[,field2,...]>]\n" + "\t [:sort=<field1[,field2,...]>]\n" + "\t [:size=#entries]\n" + "\t [:pause][:continue][:clear]\n" + "\t [:name=histname1]\n" + "\t [if <filter>]\n\n" + "\t When a matching event is hit, an entry is added to a hash\n" + "\t table using the key(s) and value(s) named, and the value of a\n" + "\t sum called 'hitcount' is incremented. Keys and values\n" + "\t correspond to fields in the event's format description. Keys\n" + "\t can be any field, or the special string 'stacktrace'.\n" + "\t Compound keys consisting of up to two fields can be specified\n" + "\t by the 'keys' keyword. Values must correspond to numeric\n" + "\t fields. Sort keys consisting of up to two fields can be\n" + "\t specified using the 'sort' keyword. The sort direction can\n" + "\t be modified by appending '.descending' or '.ascending' to a\n" + "\t sort field. The 'size' parameter can be used to specify more\n" + "\t or fewer than the default 2048 entries for the hashtable size.\n" + "\t If a hist trigger is given a name using the 'name' parameter,\n" + "\t its histogram data will be shared with other triggers of the\n" + "\t same name, and trigger hits will update this common data.\n\n" + "\t Reading the 'hist' file for the event will dump the hash\n" + "\t table in its entirety to stdout. If there are multiple hist\n" + "\t triggers attached to an event, there will be a table for each\n" + "\t trigger in the output. The table displayed for a named\n" + "\t trigger will be the same as any other instance having the\n" + "\t same name. The default format used to display a given field\n" + "\t can be modified by appending any of the following modifiers\n" + "\t to the field name, as applicable:\n\n" + "\t .hex display a number as a hex value\n" + "\t .sym display an address as a symbol\n" + "\t .sym-offset display an address as a symbol and offset\n" + "\t .execname display a common_pid as a program name\n" + "\t .syscall display a syscall id as a syscall name\n\n" + "\t .log2 display log2 value rather than raw number\n\n" + "\t The 'pause' parameter can be used to pause an existing hist\n" + "\t trigger or to start a hist trigger but not log any events\n" + "\t until told to do so. 'continue' can be used to start or\n" + "\t restart a paused hist trigger.\n\n" + "\t The 'clear' parameter will clear the contents of a running\n" + "\t hist trigger and leave its current paused/active state\n" + "\t unchanged.\n\n" + "\t The enable_hist and disable_hist triggers can be used to\n" + "\t have one event conditionally start and stop another event's\n" + "\t already-attached hist trigger. The syntax is analagous to\n" + "\t the enable_event and disable_event triggers.\n" +#endif ; static ssize_t @@ -4474,7 +4631,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, if (cnt > MAX_TRACER_SIZE) cnt = MAX_TRACER_SIZE; - if (copy_from_user(&buf, ubuf, cnt)) + if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; @@ -5264,7 +5421,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf, if (cnt >= sizeof(buf)) return -EINVAL; - if (copy_from_user(&buf, ubuf, cnt)) + if (copy_from_user(buf, ubuf, cnt)) return -EFAULT; buf[cnt] = 0; @@ -6650,7 +6807,7 @@ static int instance_mkdir(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; - tr->trace_flags = global_trace.trace_flags; + tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; cpumask_copy(tr->tracing_cpumask, cpu_all_mask); @@ -6724,6 +6881,12 @@ static int instance_rmdir(const char *name) list_del(&tr->list); + /* Disable all the flags that were enabled coming in */ + for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { + if ((1 << i) & ZEROED_TRACE_FLAGS) + set_tracer_flag(tr, 1 << i, 0); + } + tracing_set_nop(tr); event_trace_del_tracer(tr); ftrace_destroy_function_files(tr); |