From b7e2ecef92d2e7785e6d76b41e5ba8bcbc45259d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 May 2010 10:52:27 +0200 Subject: perf, trace: Optimize tracepoints by removing IRQ-disable from perf/tracepoint interaction Improves performance. Acked-by: Frederic Weisbecker Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Mike Galbraith Cc: Steven Rostedt LKML-Reference: <1274259525.5605.10352.camel@twins> Signed-off-by: Ingo Molnar --- kernel/trace/trace_event_perf.c | 73 ++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 44 deletions(-) (limited to 'kernel/trace/trace_event_perf.c') diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c index 89b780a7c522..a1304f8c4440 100644 --- a/kernel/trace/trace_event_perf.c +++ b/kernel/trace/trace_event_perf.c @@ -9,13 +9,9 @@ #include #include "trace.h" -DEFINE_PER_CPU(struct pt_regs, perf_trace_regs); -EXPORT_PER_CPU_SYMBOL_GPL(perf_trace_regs); - EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); -static char *perf_trace_buf; -static char *perf_trace_buf_nmi; +static char *perf_trace_buf[4]; /* * Force it to be aligned to unsigned long to avoid misaligned accesses @@ -29,7 +25,6 @@ static int total_ref_count; static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) { - char *buf; int ret = -ENOMEM; if (event->perf_refcount++ > 0) { @@ -38,17 +33,16 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) } if (!total_ref_count) { - buf = (char *)alloc_percpu(perf_trace_t); - if (!buf) - goto fail_buf; - - rcu_assign_pointer(perf_trace_buf, buf); + char *buf; + int i; - buf = (char *)alloc_percpu(perf_trace_t); - if (!buf) - goto fail_buf_nmi; + for (i = 0; i < 4; i++) { + buf = (char *)alloc_percpu(perf_trace_t); + if (!buf) + goto fail_buf; - rcu_assign_pointer(perf_trace_buf_nmi, buf); + rcu_assign_pointer(perf_trace_buf[i], buf); + } } ret = event->perf_event_enable(event); @@ -58,14 +52,15 @@ static int perf_trace_event_enable(struct ftrace_event_call *event, void *data) return 0; } -fail_buf_nmi: +fail_buf: if (!total_ref_count) { - free_percpu(perf_trace_buf_nmi); - free_percpu(perf_trace_buf); - perf_trace_buf_nmi = NULL; - perf_trace_buf = NULL; + int i; + + for (i = 0; i < 4; i++) { + free_percpu(perf_trace_buf[i]); + perf_trace_buf[i] = NULL; + } } -fail_buf: event->perf_refcount--; return ret; @@ -91,19 +86,19 @@ int perf_trace_enable(int event_id, void *data) static void perf_trace_event_disable(struct ftrace_event_call *event) { - char *buf, *nmi_buf; - if (--event->perf_refcount > 0) return; event->perf_event_disable(event); if (!--total_ref_count) { - buf = perf_trace_buf; - rcu_assign_pointer(perf_trace_buf, NULL); + char *buf[4]; + int i; - nmi_buf = perf_trace_buf_nmi; - rcu_assign_pointer(perf_trace_buf_nmi, NULL); + for (i = 0; i < 4; i++) { + buf[i] = perf_trace_buf[i]; + rcu_assign_pointer(perf_trace_buf[i], NULL); + } /* * Ensure every events in profiling have finished before @@ -111,8 +106,8 @@ static void perf_trace_event_disable(struct ftrace_event_call *event) */ synchronize_sched(); - free_percpu(buf); - free_percpu(nmi_buf); + for (i = 0; i < 4; i++) + free_percpu(buf[i]); } } @@ -132,47 +127,37 @@ void perf_trace_disable(int event_id) } __kprobes void *perf_trace_buf_prepare(int size, unsigned short type, - int *rctxp, unsigned long *irq_flags) + struct pt_regs *regs, int *rctxp) { struct trace_entry *entry; char *trace_buf, *raw_data; - int pc, cpu; + int pc; BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long)); pc = preempt_count(); - /* Protect the per cpu buffer, begin the rcu read side */ - local_irq_save(*irq_flags); - *rctxp = perf_swevent_get_recursion_context(); if (*rctxp < 0) goto err_recursion; - cpu = smp_processor_id(); - - if (in_nmi()) - trace_buf = rcu_dereference_sched(perf_trace_buf_nmi); - else - trace_buf = rcu_dereference_sched(perf_trace_buf); - + trace_buf = rcu_dereference_sched(perf_trace_buf[*rctxp]); if (!trace_buf) goto err; - raw_data = per_cpu_ptr(trace_buf, cpu); + raw_data = per_cpu_ptr(trace_buf, smp_processor_id()); /* zero the dead bytes from align to not leak stack to user */ memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64)); entry = (struct trace_entry *)raw_data; - tracing_generic_entry_update(entry, *irq_flags, pc); + tracing_generic_entry_update(entry, regs->flags, pc); entry->type = type; return raw_data; err: perf_swevent_put_recursion_context(*rctxp); err_recursion: - local_irq_restore(*irq_flags); return NULL; } EXPORT_SYMBOL_GPL(perf_trace_buf_prepare); -- cgit v1.2.3