summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/a.out-core.h10
-rw-r--r--arch/x86/include/asm/debugreg.h33
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h73
-rw-r--r--arch/x86/include/asm/processor.h14
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/Makefile1
-rw-r--r--arch/x86/kernel/hw_breakpoint.c549
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes.c9
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c2
-rw-r--r--arch/x86/kernel/process.c21
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c7
-rw-r--r--arch/x86/kernel/ptrace.c293
-rw-r--r--arch/x86/kernel/signal.c9
-rw-r--r--arch/x86/kernel/traps.c73
-rw-r--r--arch/x86/kvm/x86.c18
-rw-r--r--arch/x86/mm/kmmio.c8
-rw-r--r--arch/x86/power/cpu.c26
-rw-r--r--arch/x86/tools/test_get_len.c17
-rw-r--r--include/linux/ftrace_event.h9
-rw-r--r--include/linux/hw_breakpoint.h140
-rw-r--r--include/linux/perf_event.h48
-rw-r--r--include/trace/ftrace.h22
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/hw_breakpoint.c501
-rw-r--r--kernel/kallsyms.c1
-rw-r--r--kernel/perf_event.c517
-rw-r--r--kernel/trace/Kconfig21
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/trace.h7
-rw-r--r--kernel/trace/trace_entries.h16
-rw-r--r--kernel/trace/trace_event_profile.c14
-rw-r--r--kernel/trace/trace_kprobe.c50
-rw-r--r--kernel/trace/trace_ksym.c554
-rw-r--r--kernel/trace/trace_selftest.c55
-rw-r--r--kernel/trace/trace_syscalls.c59
-rw-r--r--samples/Kconfig6
-rw-r--r--samples/Makefile3
-rw-r--r--samples/hw_breakpoint/Makefile1
-rw-r--r--samples/hw_breakpoint/data_breakpoint.c90
-rw-r--r--tools/perf/Documentation/perf-kmem.txt44
-rw-r--r--tools/perf/Documentation/perf-record.txt16
-rw-r--r--tools/perf/Makefile49
-rw-r--r--tools/perf/builtin-annotate.c12
-rw-r--r--tools/perf/builtin-kmem.c833
-rw-r--r--tools/perf/builtin-record.c29
-rw-r--r--tools/perf/builtin-report.c21
-rw-r--r--tools/perf/builtin-sched.c3
-rw-r--r--tools/perf/builtin-timechart.c17
-rw-r--r--tools/perf/builtin-top.c30
-rw-r--r--tools/perf/builtin-trace.c3
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/perf.c27
-rw-r--r--tools/perf/util/ctype.c8
-rw-r--r--tools/perf/util/data_map.c86
-rw-r--r--tools/perf/util/data_map.h2
-rw-r--r--tools/perf/util/event.h10
-rw-r--r--tools/perf/util/header.c163
-rw-r--r--tools/perf/util/header.h9
-rw-r--r--tools/perf/util/include/linux/bitops.h2
-rw-r--r--tools/perf/util/map.c23
-rw-r--r--tools/perf/util/parse-events.c84
-rw-r--r--tools/perf/util/symbol.c505
-rw-r--r--tools/perf/util/symbol.h11
-rw-r--r--tools/perf/util/thread.h3
-rw-r--r--tools/perf/util/trace-event-info.c22
-rw-r--r--tools/perf/util/trace-event-read.c4
-rw-r--r--tools/perf/util/trace-event.h2
-rw-r--r--tools/perf/util/util.h3
76 files changed, 4431 insertions, 902 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 7f418bbc261a..eef3bbb97075 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -126,4 +126,11 @@ config HAVE_DMA_API_DEBUG
config HAVE_DEFAULT_NO_SPIN_MUTEXES
bool
+config HAVE_HW_BREAKPOINT
+ bool
+ depends on HAVE_PERF_EVENTS
+ select ANON_INODES
+ select PERF_EVENTS
+
+
source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 72ace9515a07..178084b4377c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -49,6 +49,7 @@ config X86
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_LZMA
+ select HAVE_HW_BREAKPOINT
select HAVE_ARCH_KMEMCHECK
config OUTPUT_FORMAT
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 4a8e80cdcfa5..9f828f87ca35 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -10,6 +10,7 @@ header-y += ptrace-abi.h
header-y += sigcontext32.h
header-y += ucontext.h
header-y += processor-flags.h
+header-y += hw_breakpoint.h
unifdef-y += e820.h
unifdef-y += ist.h
diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h
index bb70e397aa84..7a15588e45d4 100644
--- a/arch/x86/include/asm/a.out-core.h
+++ b/arch/x86/include/asm/a.out-core.h
@@ -17,6 +17,7 @@
#include <linux/user.h>
#include <linux/elfcore.h>
+#include <asm/debugreg.h>
/*
* fill in the user structure for an a.out core dump
@@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump)
>> PAGE_SHIFT;
dump->u_dsize -= dump->u_tsize;
dump->u_ssize = 0;
- dump->u_debugreg[0] = current->thread.debugreg0;
- dump->u_debugreg[1] = current->thread.debugreg1;
- dump->u_debugreg[2] = current->thread.debugreg2;
- dump->u_debugreg[3] = current->thread.debugreg3;
- dump->u_debugreg[4] = 0;
- dump->u_debugreg[5] = 0;
- dump->u_debugreg[6] = current->thread.debugreg6;
- dump->u_debugreg[7] = current->thread.debugreg7;
+ aout_dump_debugregs(dump);
if (dump->start_stack < TASK_SIZE)
dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack))
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 3ea6f37be9e2..fdabd8435765 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -18,6 +18,7 @@
#define DR_TRAP1 (0x2) /* db1 */
#define DR_TRAP2 (0x4) /* db2 */
#define DR_TRAP3 (0x8) /* db3 */
+#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
#define DR_STEP (0x4000) /* single-step */
#define DR_SWITCH (0x8000) /* task switch */
@@ -49,6 +50,8 @@
#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
+#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
+#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
@@ -67,4 +70,34 @@
#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+/*
+ * HW breakpoint additions
+ */
+#ifdef __KERNEL__
+
+DECLARE_PER_CPU(unsigned long, dr7);
+
+static inline void hw_breakpoint_disable(void)
+{
+ /* Zero the control register for HW Breakpoint */
+ set_debugreg(0UL, 7);
+
+ /* Zero-out the individual HW breakpoint address registers */
+ set_debugreg(0UL, 0);
+ set_debugreg(0UL, 1);
+ set_debugreg(0UL, 2);
+ set_debugreg(0UL, 3);
+}
+
+static inline int hw_breakpoint_active(void)
+{
+ return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK;
+}
+
+extern void aout_dump_debugregs(struct user *dump);
+
+extern void hw_breakpoint_restore(void);
+
+#endif /* __KERNEL__ */
+
#endif /* _ASM_X86_DEBUGREG_H */
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
new file mode 100644
index 000000000000..0675a7c4c20e
--- /dev/null
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -0,0 +1,73 @@
+#ifndef _I386_HW_BREAKPOINT_H
+#define _I386_HW_BREAKPOINT_H
+
+#ifdef __KERNEL__
+#define __ARCH_HW_BREAKPOINT_H
+
+/*
+ * The name should probably be something dealt in
+ * a higher level. While dealing with the user
+ * (display/resolving)
+ */
+struct arch_hw_breakpoint {
+ char *name; /* Contains name of the symbol to set bkpt */
+ unsigned long address;
+ u8 len;
+ u8 type;
+};
+
+#include <linux/kdebug.h>
+#include <linux/percpu.h>
+#include <linux/list.h>
+
+/* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_1 0x40
+#define X86_BREAKPOINT_LEN_2 0x44
+#define X86_BREAKPOINT_LEN_4 0x4c
+#define X86_BREAKPOINT_LEN_EXECUTE 0x40
+
+#ifdef CONFIG_X86_64
+#define X86_BREAKPOINT_LEN_8 0x48
+#endif
+
+/* Available HW breakpoint type encodings */
+
+/* trigger on instruction execute */
+#define X86_BREAKPOINT_EXECUTE 0x80
+/* trigger on memory write */
+#define X86_BREAKPOINT_WRITE 0x81
+/* trigger on memory read or write */
+#define X86_BREAKPOINT_RW 0x83
+
+/* Total number of available HW breakpoint registers */
+#define HBP_NUM 4
+
+struct perf_event;
+struct pmu;
+
+extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp,
+ struct task_struct *tsk);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+ unsigned long val, void *data);
+
+
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp);
+
+extern void
+arch_fill_perf_breakpoint(struct perf_event *bp);
+
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type);
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type);
+
+extern int arch_bp_generic_fields(int x86_len, int x86_type,
+ int *gen_len, int *gen_type);
+
+extern struct pmu perf_ops_bp;
+
+#endif /* __KERNEL__ */
+#endif /* _I386_HW_BREAKPOINT_H */
+
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c9786480f0fe..6f8ec1c37e0a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -30,6 +30,7 @@ struct mm_struct;
#include <linux/math64.h>
#include <linux/init.h>
+#define HBP_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -422,6 +423,8 @@ extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
+struct perf_event;
+
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -443,13 +446,10 @@ struct thread_struct {
unsigned long fs;
#endif
unsigned long gs;
- /* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
- unsigned long debugreg6;
- unsigned long debugreg7;
+ /* Save middle states of ptrace breakpoints */
+ struct perf_event *ptrace_bps[HBP_NUM];
+ /* Debug status used for traps, single steps, etc... */
+ unsigned long debugreg6;
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d8e5d0cdd678..4f2e66e29ecc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
obj-y += bootflag.o e820.o
obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
-obj-y += alternative.o i8253.o pci-nommu.o
+obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o
obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 68537e957a9b..1d2cb383410e 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -5,6 +5,7 @@
# Don't trace early stages of a secondary CPU boot
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_common.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
endif
# Make sure load_percpu_segment has no stackprotector
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..4d267fb77828
--- /dev/null
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -0,0 +1,549 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) 2009 IBM Corporation
+ * Copyright (C) 2009 Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Authors: Alan Stern <stern@rowland.harvard.edu>
+ * K.Prasad <prasad@linux.vnet.ibm.com>
+ * Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/irqflags.h>
+#include <linux/notifier.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/debugreg.h>
+
+/* Per cpu debug control register value */
+DEFINE_PER_CPU(unsigned long, dr7);
+EXPORT_PER_CPU_SYMBOL(dr7);
+
+/* Per cpu debug address registers values */
+static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]);
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for each cpus
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]);
+
+
+/*
+ * Encode the length, type, Exact, and Enable bits for a particular breakpoint
+ * as stored in debug register 7.
+ */
+unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type)
+{
+ unsigned long bp_info;
+
+ bp_info = (len | type) & 0xf;
+ bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE);
+ bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) |
+ DR_GLOBAL_SLOWDOWN;
+ return bp_info;
+}
+
+/*
+ * Decode the length and type bits for a particular breakpoint as
+ * stored in debug register 7. Return the "enabled" status.
+ */
+int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type)
+{
+ int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE);
+
+ *len = (bp_info & 0xc) | 0x40;
+ *type = (bp_info & 0x3) | 0x80;
+
+ return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3;
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint. Eventually we enable it in the debug control register.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long *dr7;
+ int i;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+ if (!*slot) {
+ *slot = bp;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+ return -EBUSY;
+
+ set_debugreg(info->address, i);
+ __get_cpu_var(cpu_debugreg[i]) = info->address;
+
+ dr7 = &__get_cpu_var(dr7);
+ *dr7 |= encode_dr7(i, info->len, info->type);
+
+ set_debugreg(*dr7, 7);
+
+ return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned long *dr7;
+ int i;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]);
+
+ if (*slot == bp) {
+ *slot = NULL;
+ break;
+ }
+ }
+
+ if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot"))
+ return;
+
+ dr7 = &__get_cpu_var(dr7);
+ *dr7 &= ~encode_dr7(i, info->len, info->type);
+
+ set_debugreg(*dr7, 7);
+}
+
+static int get_hbp_len(u8 hbp_len)
+{
+ unsigned int len_in_bytes = 0;
+
+ switch (hbp_len) {
+ case X86_BREAKPOINT_LEN_1:
+ len_in_bytes = 1;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ len_in_bytes = 2;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ len_in_bytes = 4;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ len_in_bytes = 8;
+ break;
+#endif
+ }
+ return len_in_bytes;
+}
+
+/*
+ * Check for virtual address in user space.
+ */
+int arch_check_va_in_userspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va <= TASK_SIZE - len);
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len)
+{
+ unsigned int len;
+
+ len = get_hbp_len(hbp_len);
+
+ return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE);
+}
+
+/*
+ * Store a breakpoint's encoded address, length, and type.
+ */
+static int arch_store_info(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ /*
+ * For kernel-addresses, either the address or symbol name can be
+ * specified.
+ */
+ if (info->name)
+ info->address = (unsigned long)
+ kallsyms_lookup_name(info->name);
+ if (info->address)
+ return 0;
+
+ return -EINVAL;
+}
+
+int arch_bp_generic_fields(int x86_len, int x86_type,
+ int *gen_len, int *gen_type)
+{
+ /* Len */
+ switch (x86_len) {
+ case X86_BREAKPOINT_LEN_1:
+ *gen_len = HW_BREAKPOINT_LEN_1;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ *gen_len = HW_BREAKPOINT_LEN_2;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ *gen_len = HW_BREAKPOINT_LEN_4;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ *gen_len = HW_BREAKPOINT_LEN_8;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ /* Type */
+ switch (x86_type) {
+ case X86_BREAKPOINT_EXECUTE:
+ *gen_type = HW_BREAKPOINT_X;
+ break;
+ case X86_BREAKPOINT_WRITE:
+ *gen_type = HW_BREAKPOINT_W;
+ break;
+ case X86_BREAKPOINT_RW:
+ *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+
+static int arch_build_bp_info(struct perf_event *bp)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+ info->address = bp->attr.bp_addr;
+
+ /* Len */
+ switch (bp->attr.bp_len) {
+ case HW_BREAKPOINT_LEN_1:
+ info->len = X86_BREAKPOINT_LEN_1;
+ break;
+ case HW_BREAKPOINT_LEN_2:
+ info->len = X86_BREAKPOINT_LEN_2;
+ break;
+ case HW_BREAKPOINT_LEN_4:
+ info->len = X86_BREAKPOINT_LEN_4;
+ break;
+#ifdef CONFIG_X86_64
+ case HW_BREAKPOINT_LEN_8:
+ info->len = X86_BREAKPOINT_LEN_8;
+ break;
+#endif
+ default:
+ return -EINVAL;
+ }
+
+ /* Type */
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_W:
+ info->type = X86_BREAKPOINT_WRITE;
+ break;
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ info->type = X86_BREAKPOINT_RW;
+ break;
+ case HW_BREAKPOINT_X:
+ info->type = X86_BREAKPOINT_EXECUTE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp,
+ struct task_struct *tsk)
+{
+ struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+ unsigned int align;
+ int ret;
+
+
+ ret = arch_build_bp_info(bp);
+ if (ret)
+ return ret;
+
+ ret = -EINVAL;
+
+ if (info->type == X86_BREAKPOINT_EXECUTE)
+ /*
+ * Ptrace-refactoring code
+ * For now, we'll allow instruction breakpoint only for user-space
+ * addresses
+ */
+ if ((!arch_check_va_in_userspace(info->address, info->len)) &&
+ info->len != X86_BREAKPOINT_EXECUTE)
+ return ret;
+
+ switch (info->len) {
+ case X86_BREAKPOINT_LEN_1:
+ align = 0;
+ break;
+ case X86_BREAKPOINT_LEN_2:
+ align = 1;
+ break;
+ case X86_BREAKPOINT_LEN_4:
+ align = 3;
+ break;
+#ifdef CONFIG_X86_64
+ case X86_BREAKPOINT_LEN_8:
+ align = 7;
+ break;
+#endif
+ default:
+ return ret;
+ }
+
+ if (bp->callback)
+ ret = arch_store_info(bp);
+
+ if (ret < 0)
+ return ret;
+ /*
+ * Check that the low-order bits of the address are appropriate
+ * for the alignment implied by len.
+ */
+ if (info->address & align)
+ return -EINVAL;
+
+ /* Check that the virtual address is in the proper range */
+ if (tsk) {
+ if (!arch_check_va_in_userspace(info->address, info->len))
+ return -EFAULT;
+ } else {
+ if (!arch_check_va_in_kernelspace(info->address, info->len))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Dump the debug register contents to the user.
+ * We can't dump our per cpu values because it
+ * may contain cpu wide breakpoint, something that
+ * doesn't belong to the current task.
+ *
+ * TODO: include non-ptrace user breakpoints (perf)
+ */
+void aout_dump_debugregs(struct user *dump)
+{
+ int i;
+ int dr7 = 0;
+ struct perf_event *bp;
+ struct arch_hw_breakpoint *info;
+ struct thread_struct *thread = &current->thread;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ bp = thread->ptrace_bps[i];
+
+ if (bp && !bp->attr.disabled) {
+ dump->u_debugreg[i] = bp->attr.bp_addr;
+ info = counter_arch_bp(bp);
+ dr7 |= encode_dr7(i, info->len, info->type);
+ } else {
+ dump->u_debugreg[i] = 0;
+ }
+ }
+
+ dump->u_debugreg[4] = 0;
+ dump->u_debugreg[5] = 0;
+ dump->u_debugreg[6] = current->thread.debugreg6;
+
+ dump->u_debugreg[7] = dr7;
+}
+EXPORT_SYMBOL_GPL(aout_dump_debugregs);
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+ int i;
+ struct thread_struct *t = &tsk->thread;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ unregister_hw_breakpoint(t->ptrace_bps[i]);
+ t->ptrace_bps[i] = NULL;
+ }
+}
+
+void hw_breakpoint_restore(void)
+{
+ set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
+ set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
+ set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
+ set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+ set_debugreg(current->thread.debugreg6, 6);
+ set_debugreg(__get_cpu_var(dr7), 7);
+}
+EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
+
+/*
+ * Handle debug exception notifications.
+ *
+ * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below.
+ *
+ * NOTIFY_DONE returned if one of the following conditions is true.
+ * i) When the causative address is from user-space and the exception
+ * is a valid one, i.e. not triggered as a result of lazy debug register
+ * switching
+ * ii) When there are more bits than trap<n> set in DR6 register (such
+ * as BD, BS or BT) indicating that more than one debug condition is
+ * met and requires some more action in do_debug().
+ *
+ * NOTIFY_STOP returned for all other cases
+ *
+ */
+static int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+ int i, cpu, rc = NOTIFY_STOP;
+ struct perf_event *bp;
+ unsigned long dr7, dr6;
+ unsigned long *dr6_p;
+
+ /* The DR6 value is pointed by args->err */
+ dr6_p = (unsigned long *)ERR_PTR(args->err);
+ dr6 = *dr6_p;
+
+ /* Do an early return if no trap bits are set in DR6 */
+ if ((dr6 & DR_TRAP_BITS) == 0)
+ return NOTIFY_DONE;
+
+ get_debugreg(dr7, 7);
+ /* Disable breakpoints during exception handling */
+ set_debugreg(0UL, 7);
+ /*
+ * Assert that local interrupts are disabled
+ * Reset the DRn bits in the virtualized register value.
+ * The ptrace trigger routine will add in whatever is needed.
+ */
+ current->thread.debugreg6 &= ~DR_TRAP_BITS;
+ cpu = get_cpu();
+
+ /* Handle all the breakpoints that were triggered */
+ for (i = 0; i < HBP_NUM; ++i) {
+ if (likely(!(dr6 & (DR_TRAP0 << i))))
+ continue;
+
+ /*
+ * The counter may be concurrently released but that can only
+ * occur from a call_rcu() path. We can then safely fetch
+ * the breakpoint, use its callback, touch its counter
+ * while we are in an rcu_read_lock() path.
+ */
+ rcu_read_lock();
+
+ bp = per_cpu(bp_per_reg[i], cpu);
+ if (bp)
+ rc = NOTIFY_DONE;
+ /*
+ * Reset the 'i'th TRAP bit in dr6 to denote completion of
+ * exception handling
+ */
+ (*dr6_p) &= ~(DR_TRAP0 << i);
+ /*
+ * bp can be NULL due to lazy debug register switching
+ * or due to concurrent perf counter removing.
+ */
+ if (!bp) {
+ rcu_read_unlock();
+ break;
+ }
+
+ (bp->callback)(bp, args->regs);
+
+ rcu_read_unlock();
+ }
+ if (dr6 & (~DR_TRAP_BITS))
+ rc = NOTIFY_DONE;
+
+ set_debugreg(dr7, 7);
+ put_cpu();
+
+ return rc;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+ struct notifier_block *unused, unsigned long val, void *data)
+{
+ if (val != DIE_DEBUG)
+ return NOTIFY_DONE;
+
+ return hw_breakpoint_handler(data);
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+ /* TODO */
+}
+
+void hw_breakpoint_pmu_unthrottle(struct perf_event *bp)
+{
+ /* TODO */
+}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 8d82a77a3f3b..34e86b67550c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -43,6 +43,7 @@
#include <linux/smp.h>
#include <linux/nmi.h>
+#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/system.h>
@@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args)
"resuming...\n");
kgdb_arch_handle_exception(args->trapnr, args->signr,
args->err, "c", "", regs);
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
return NOTIFY_STOP;
}
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index c5f1f117e0c0..3fe86d706a14 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -56,6 +56,7 @@
#include <asm/uaccess.h>
#include <asm/alternative.h>
#include <asm/insn.h>
+#include <asm/debugreg.h>
void jprobe_return_end(void);
@@ -945,8 +946,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
ret = NOTIFY_STOP;
break;
case DIE_DEBUG:
- if (post_kprobe_handler(args->regs))
+ if (post_kprobe_handler(args->regs)) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP;
ret = NOTIFY_STOP;
+ }
break;
case DIE_GPF:
/*
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index c1c429d00130..c843f8406da2 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -25,6 +25,7 @@
#include <asm/desc.h>
#include <asm/system.h>
#include <asm/cacheflush.h>
+#include <asm/debugreg.h>
static void set_idt(void *newidt, __u16 limit)
{
@@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 84c3bf209e98..4a8bb82248ae 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,6 +18,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
+#include <asm/debugreg.h>
static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
unsigned long addr)
@@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image)
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ hw_breakpoint_disable();
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5284cd2b5776..744508e7cfdd 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -10,6 +10,7 @@
#include <linux/clockchips.h>
#include <linux/random.h>
#include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -17,6 +18,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
unsigned long idle_halt;
EXPORT_SYMBOL(idle_halt);
@@ -103,14 +105,7 @@ void flush_thread(void)
}
#endif
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
-
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
+ flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -192,16 +187,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4cf79567cdab..d5bd3132ee70 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -58,6 +58,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -259,7 +260,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
tsk = current;
+ err = -ENOMEM;
+
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
IO_BITMAP_BYTES, GFP_KERNEL);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index eb62cbcaa490..70cf15873f3d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,6 +52,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
asmlinkage extern void ret_from_fork(void);
@@ -297,12 +298,16 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -341,6 +346,7 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+
return err;
}
@@ -495,6 +501,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
if (preload_fpu)
__math_state_restore();
+
return prev_p;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4f76d275ee4..b25f8947ed7a 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -22,6 +22,8 @@
#include <linux/seccomp.h>
#include <linux/signal.h>
#include <linux/workqueue.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -34,6 +36,7 @@
#include <asm/prctl.h>
#include <asm/proto.h>
#include <asm/ds.h>
+#include <asm/hw_breakpoint.h>
#include "tls.h"
@@ -249,11 +252,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
- return TASK_SIZE - 3;
-}
-
#else /* CONFIG_X86_64 */
#define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT)
@@ -378,15 +376,6 @@ static int set_segment_reg(struct task_struct *task,
return 0;
}
-static unsigned long debugreg_addr_limit(struct task_struct *task)
-{
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- return IA32_PAGE_OFFSET - 3;
-#endif
- return TASK_SIZE_MAX - 7;
-}
-
#endif /* CONFIG_X86_32 */
static unsigned long get_flags(struct task_struct *task)
@@ -566,99 +555,229 @@ static int genregs_set(struct task_struct *target,
return ret;
}
+static void ptrace_triggered(struct perf_event *bp, void *data)
+{
+ int i;
+ struct thread_struct *thread = &(current->thread);
+
+ /*
+ * Store in the virtual DR6 register the fact that the breakpoint
+ * was hit so the thread's debugger will see it.
+ */
+ for (i = 0; i < HBP_NUM; i++) {
+ if (thread->ptrace_bps[i] == bp)
+ break;
+ }
+
+ thread->debugreg6 |= (DR_TRAP0 << i);
+}
+
/*
- * This function is trivial and will be inlined by the compiler.
- * Having it separates the implementation details of debug
- * registers from the interface details of ptrace.
+ * Walk through every ptrace breakpoints for this thread and
+ * build the dr7 value on top of their attributes.
+ *
*/
-static unsigned long ptrace_get_debugreg(struct task_struct *child, int n)
+static unsigned long ptrace_get_dr7(struct perf_event *bp[])
{
- switch (n) {
- case 0: return child->thread.debugreg0;
- case 1: return child->thread.debugreg1;
- case 2: return child->thread.debugreg2;
- case 3: return child->thread.debugreg3;
- case 6: return child->thread.debugreg6;
- case 7: return child->thread.debugreg7;
+ int i;
+ int dr7 = 0;
+ struct arch_hw_breakpoint *info;
+
+ for (i = 0; i < HBP_NUM; i++) {
+ if (bp[i] && !bp[i]->attr.disabled) {
+ info = counter_arch_bp(bp[i]);
+ dr7 |= encode_dr7(i, info->len, info->type);
+ }
}
- return 0;
+
+ return dr7;
}
-static int ptrace_set_debugreg(struct task_struct *child,
- int n, unsigned long data)
+/*
+ * Handle ptrace writes to debug register 7.
+ */
+static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data)
{
- int i;
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long old_dr7;
+ int i, orig_ret = 0, rc = 0;
+ int enabled, second_pass = 0;
+ unsigned len, type;
+ int gen_len, gen_type;
+ struct perf_event *bp;
+
+ data &= ~DR_CONTROL_RESERVED;
+ old_dr7 = ptrace_get_dr7(thread->ptrace_bps);
+restore:
+ /*
+ * Loop through all the hardware breakpoints, making the
+ * appropriate changes to each.
+ */
+ for (i = 0; i < HBP_NUM; i++) {
+ enabled = decode_dr7(data, i, &len, &type);
+ bp = thread->ptrace_bps[i];
+
+ if (!enabled) {
+ if (bp) {
+ /*
+ * Don't unregister the breakpoints right-away,
+ * unless all register_user_hw_breakpoint()
+ * requests have succeeded. This prevents
+ * any window of opportunity for debug
+ * register grabbing by other users.
+ */
+ if (!second_pass)
+ continue;
+ thread->ptrace_bps[i] = NULL;
+ unregister_hw_breakpoint(bp);
+ }
+ continue;
+ }
- if (unlikely(n == 4 || n == 5))
- return -EIO;
+ /*
+ * We shoud have at least an inactive breakpoint at this
+ * slot. It means the user is writing dr7 without having
+ * written the address register first
+ */
+ if (!bp) {
+ rc = -EINVAL;
+ break;
+ }
- if (n < 4 && unlikely(data >= debugreg_addr_limit(child)))
- return -EIO;
+ rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type);
+ if (rc)
+ break;
- switch (n) {
- case 0: child->thread.debugreg0 = data; break;
- case 1: child->thread.debugreg1 = data; break;
- case 2: child->thread.debugreg2 = data; break;
- case 3: child->thread.debugreg3 = data; break;
+ /*
+ * This is a temporary thing as bp is unregistered/registered
+ * to simulate modification
+ */
+ bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len,
+ gen_type, bp->callback,
+ tsk, true);
+ thread->ptrace_bps[i] = NULL;
- case 6:
- if ((data & ~0xffffffffUL) != 0)
- return -EIO;
- child->thread.debugreg6 = data;
- break;
+ if (!bp) { /* incorrect bp, or we have a bug in bp API */
+ rc = -EINVAL;
+ break;
+ }
+ if (IS_ERR(bp)) {
+ rc = PTR_ERR(bp);
+ bp = NULL;
+ break;
+ }
+ thread->ptrace_bps[i] = bp;
+ }
+ /*
+ * Make a second pass to free the remaining unused breakpoints
+ * or to restore the original breakpoints if an error occurred.
+ */
+ if (!second_pass) {
+ second_pass = 1;
+ if (rc < 0) {
+ orig_ret = rc;
+ data = old_dr7;
+ }
+ goto restore;
+ }
+ return ((orig_ret < 0) ? orig_ret : rc);
+}
- case 7:
+/*
+ * Handle PTRACE_PEEKUSR calls for the debug register area.
+ */
+static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ unsigned long val = 0;
+
+ if (n < HBP_NUM) {
+ struct perf_event *bp;
+ bp = thread->ptrace_bps[n];
+ if (!bp)
+ return 0;
+ val = bp->hw.info.address;
+ } else if (n == 6) {
+ val = thread->debugreg6;
+ } else if (n == 7) {
+ val = ptrace_get_dr7(thread->ptrace_bps);
+ }
+ return val;
+}
+
+static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
+ unsigned long addr)
+{
+ struct perf_event *bp;
+ struct thread_struct *t = &tsk->thread;
+
+ if (!t->ptrace_bps[nr]) {
/*
- * Sanity-check data. Take one half-byte at once with
- * check = (val >> (16 + 4*i)) & 0xf. It contains the
- * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits
- * 2 and 3 are LENi. Given a list of invalid values,
- * we do mask |= 1 << invalid_value, so that
- * (mask >> check) & 1 is a correct test for invalid
- * values.
- *
- * R/Wi contains the type of the breakpoint /
- * watchpoint, LENi contains the length of the watched
- * data in the watchpoint case.
- *
- * The invalid values are:
- * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit]
- * - R/Wi == 0x10 (break on I/O reads or writes), so
- * mask |= 0x4444.
- * - R/Wi == 0x00 && LENi != 0x00, so we have mask |=
- * 0x1110.
- *
- * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54.
- *
- * See the Intel Manual "System Programming Guide",
- * 15.2.4
- *
- * Note that LENi == 0x10 is defined on x86_64 in long
- * mode (i.e. even for 32-bit userspace software, but
- * 64-bit kernel), so the x86_64 mask value is 0x5454.
- * See the AMD manual no. 24593 (AMD64 System Programming)
+ * Put stub len and type to register (reserve) an inactive but
+ * correct bp
*/
-#ifdef CONFIG_X86_32
-#define DR7_MASK 0x5f54
-#else
-#define DR7_MASK 0x5554
-#endif
- data &= ~DR_CONTROL_RESERVED;
- for (i = 0; i < 4; i++)
- if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1)
- return -EIO;
- child->thread.debugreg7 = data;
- if (data)
- set_tsk_thread_flag(child, TIF_DEBUG);
- else
- clear_tsk_thread_flag(child, TIF_DEBUG);
- break;
+ bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1,
+ HW_BREAKPOINT_W,
+ ptrace_triggered, tsk,
+ false);
+ } else {
+ bp = t->ptrace_bps[nr];
+ t->ptrace_bps[nr] = NULL;
+ bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len,
+ bp->attr.bp_type,
+ bp->callback,
+ tsk,
+ bp->attr.disabled);
}
+ if (!bp)
+ return -EIO;
+ /*
+ * CHECKME: the previous code returned -EIO if the addr wasn't a
+ * valid task virtual addr. The new one will return -EINVAL in this
+ * case.
+ * -EINVAL may be what we want for in-kernel breakpoints users, but
+ * -EIO looks better for ptrace, since we refuse a register writing
+ * for the user. And anyway this is the previous behaviour.
+ */
+ if (IS_ERR(bp))
+ return PTR_ERR(bp);
+
+ t->ptrace_bps[nr] = bp;
+
return 0;
}
/*
+ * Handle PTRACE_POKEUSR calls for the debug register area.
+ */
+int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val)
+{
+ struct thread_struct *thread = &(tsk->thread);
+ int rc = 0;
+
+ /* There are no DR4 or DR5 registers */
+ if (n == 4 || n == 5)
+ return -EIO;
+
+ if (n == 6) {
+ thread->debugreg6 = val;
+ goto ret_path;
+ }
+ if (n < HBP_NUM) {
+ rc = ptrace_set_breakpoint_addr(tsk, n, val);
+ if (rc)
+ return rc;
+ }
+ /* All that's left is DR7 */
+ if (n == 7)
+ rc = ptrace_write_dr7(tsk, val);
+
+ret_path:
+ return rc;
+}
+
+/*
* These access the current or another (stopped) task's io permission
* bitmap for debugging or core dump.
*/
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 6a44a76055ad..fbf3b07c8567 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -799,15 +799,6 @@ static void do_signal(struct pt_regs *regs)
signr = get_signal_to_deliver(&info, &ka, regs, NULL);
if (signr > 0) {
- /*
- * Re-enable any watchpoints before delivering the
- * signal to user space. The processor register will
- * have been cleared if the watchpoint triggered
- * inside the kernel.
- */
- if (current->thread.debugreg7)
- set_debugreg(current->thread.debugreg7, 7);
-
/* Whee! Actually deliver the signal. */
if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 7e37dcee0cc3..33399176512a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -529,77 +529,56 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- unsigned long condition;
+ unsigned long dr6;
int si_code;
- get_debugreg(condition, 6);
+ get_debugreg(dr6, 6);
/* Catch kmemcheck conditions first of all! */
- if (condition & DR_STEP && kmemcheck_trap(regs))
+ if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
+ /* DR6 may or may not be cleared by the CPU */
+ set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
+ /* Store the virtualized DR6 value */
+ tsk->thread.debugreg6 = dr6;
+
+ if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+ SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7)
- goto clear_dr7;
+ if (regs->flags & X86_VM_MASK) {
+ handle_vm86_trap((struct kernel_vm86_regs *) regs,
+ error_code, 1);
+ return;
}
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK)
- goto debug_vm86;
-#endif
-
- /* Save debug status register where ptrace can see it */
- tsk->thread.debugreg6 = condition;
-
/*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
+ * Single-stepping through system calls: ignore any exceptions in
+ * kernel space, but re-enable TF when returning to user mode.
+ *
+ * We already checked v86 mode above, so we can check for kernel mode
+ * by just checking the CPL of CS.
*/
- if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
+ if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ tsk->thread.debugreg6 &= ~DR_STEP;
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+ regs->flags &= ~X86_EFLAGS_TF;
}
-
- si_code = get_si_code(condition);
- /* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code, si_code);
-
- /*
- * Disable additional traps. They'll be re-enabled when
- * the signal is delivered.
- */
-clear_dr7:
- set_debugreg(0, 7);
+ si_code = get_si_code(tsk->thread.debugreg6);
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+ send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
- return;
-#ifdef CONFIG_X86_32
-debug_vm86:
- /* reenable preemption: handle_vm86_trap() might sleep */
- dec_preempt_count();
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- conditional_cli(regs);
- return;
-#endif
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
- preempt_conditional_cli(regs);
return;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ae07d261527c..4fc80174191c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -42,6 +42,7 @@
#define CREATE_TRACE_POINTS
#include "trace.h"
+#include <asm/debugreg.h>
#include <asm/uaccess.h>
#include <asm/msr.h>
#include <asm/desc.h>
@@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
trace_kvm_entry(vcpu->vcpu_id);
kvm_x86_ops->run(vcpu, kvm_run);
- if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) {
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
- set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(current->thread.debugreg7, 7);
- }
+ /*
+ * If the guest has used debug registers, at least dr7
+ * will be disabled while returning to the host.
+ * If we don't have active breakpoints in the host, we don't
+ * care about the messed up debug address registers. But if
+ * we have some of them active, restore the old state.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_restore();
set_bit(KVM_REQ_KICK, &vcpu->requests);
local_irq_enable();
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 16ccbd77917f..11a4ad4d6253 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
struct die_args *arg = args;
if (val == DIE_DEBUG && (arg->err & DR_STEP))
- if (post_kmmio_handler(arg->err, arg->regs) == 1)
+ if (post_kmmio_handler(arg->err, arg->regs) == 1) {
+ /*
+ * Reset the BS bit in dr6 (pointed by args->err) to
+ * denote completion of processing
+ */
+ (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP;
return NOTIFY_STOP;
+ }
return NOTIFY_DONE;
}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 8aa85f17667e..0a979f3e5b8a 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -18,6 +18,7 @@
#include <asm/mce.h>
#include <asm/xcr.h>
#include <asm/suspend.h>
+#include <asm/debugreg.h>
#ifdef CONFIG_X86_32
static struct saved_context saved_context;
@@ -142,31 +143,6 @@ static void fix_processor_context(void)
#endif
load_TR_desc(); /* This does ltr */
load_LDT(&current->active_mm->context); /* This does lldt */
-
- /*
- * Now maybe reload the debug registers
- */
- if (current->thread.debugreg7) {
-#ifdef CONFIG_X86_32
- set_debugreg(current->thread.debugreg0, 0);
- set_debugreg(current->thread.debugreg1, 1);
- set_debugreg(current->thread.debugreg2, 2);
- set_debugreg(current->thread.debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(current->thread.debugreg7, 7);
-#else
- /* CONFIG_X86_64 */
- loaddebug(&current->thread, 0);
- loaddebug(&current->thread, 1);
- loaddebug(&current->thread, 2);
- loaddebug(&current->thread, 3);
- /* no 4 and 5 */
- loaddebug(&current->thread, 6);
- loaddebug(&current->thread, 7);
-#endif
- }
-
}
/**
diff --git a/arch/x86/tools/test_get_len.c b/arch/x86/tools/test_get_len.c
index af75e07217ba..d8214dc03fa7 100644
--- a/arch/x86/tools/test_get_len.c
+++ b/arch/x86/tools/test_get_len.c
@@ -114,6 +114,7 @@ int main(int argc, char **argv)
unsigned char insn_buf[16];
struct insn insn;
int insns = 0, c;
+ int warnings = 0;
parse_args(argc, argv);
@@ -151,18 +152,22 @@ int main(int argc, char **argv)
insn_init(&insn, insn_buf, x86_64);
insn_get_length(&insn);
if (insn.length != nb) {
- fprintf(stderr, "Error: %s found a difference at %s\n",
+ warnings++;
+ fprintf(stderr, "Warning: %s found difference at %s\n",
prog, sym);
- fprintf(stderr, "Error: %s", line);
- fprintf(stderr, "Error: objdump says %d bytes, but "
+ fprintf(stderr, "Warning: %s", line);
+ fprintf(stderr, "Warning: objdump says %d bytes, but "
"insn_get_length() says %d\n", nb,
insn.length);
if (verbose)
dump_insn(stderr, &insn);
- exit(2);
}
}
- fprintf(stderr, "Succeed: decoded and checked %d instructions\n",
- insns);
+ if (warnings)
+ fprintf(stderr, "Warning: decoded and checked %d"
+ " instructions with %d warnings\n", insns, warnings);
+ else
+ fprintf(stderr, "Succeed: decoded and checked %d"
+ " instructions\n", insns);
return 0;
}
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 43360c1d8f70..47bbdf9c38d0 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -137,13 +137,8 @@ struct ftrace_event_call {
#define FTRACE_MAX_PROFILE_SIZE 2048
-struct perf_trace_buf {
- char buf[FTRACE_MAX_PROFILE_SIZE];
- int recursion;
-};
-
-extern struct perf_trace_buf *perf_trace_buf;
-extern struct perf_trace_buf *perf_trace_buf_nmi;
+extern char *perf_trace_buf;
+extern char *perf_trace_buf_nmi;
#define MAX_FILTER_PRED 32
#define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
new file mode 100644
index 000000000000..c9f7f7c7b0e0
--- /dev/null
+++ b/include/linux/hw_breakpoint.h
@@ -0,0 +1,140 @@
+#ifndef _LINUX_HW_BREAKPOINT_H
+#define _LINUX_HW_BREAKPOINT_H
+
+enum {
+ HW_BREAKPOINT_LEN_1 = 1,
+ HW_BREAKPOINT_LEN_2 = 2,
+ HW_BREAKPOINT_LEN_4 = 4,
+ HW_BREAKPOINT_LEN_8 = 8,
+};
+
+enum {
+ HW_BREAKPOINT_R = 1,
+ HW_BREAKPOINT_W = 2,
+ HW_BREAKPOINT_X = 4,
+};
+
+#ifdef __KERNEL__
+
+#include <linux/perf_event.h>
+
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
+{
+ return bp->attr.bp_addr;
+}
+
+static inline int hw_breakpoint_type(struct perf_event *bp)
+{
+ return bp->attr.bp_type;
+}
+
+static inline int hw_breakpoint_len(struct perf_event *bp)
+{
+ return bp->attr.bp_len;
+}
+
+extern struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ struct task_struct *tsk,
+ bool active);
+
+/* FIXME: only change from the attr, and don't unregister */
+extern struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+ unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ struct task_struct *tsk,
+ bool active);
+
+/*
+ * Kernel breakpoints are not associated with any particular thread.
+ */
+extern struct perf_event *
+register_wide_hw_breakpoint_cpu(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ int cpu,
+ bool active);
+
+extern struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ bool active);
+
+extern int register_perf_hw_breakpoint(struct perf_event *bp);
+extern int __register_perf_hw_breakpoint(struct perf_event *bp);
+extern void unregister_hw_breakpoint(struct perf_event *bp);
+extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events);
+
+extern int reserve_bp_slot(struct perf_event *bp);
+extern void release_bp_slot(struct perf_event *bp);
+
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+ return &bp->hw.info;
+}
+
+#else /* !CONFIG_HAVE_HW_BREAKPOINT */
+
+static inline struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ struct task_struct *tsk,
+ bool active) { return NULL; }
+static inline struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+ unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ struct task_struct *tsk,
+ bool active) { return NULL; }
+static inline struct perf_event *
+register_wide_hw_breakpoint_cpu(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ int cpu,
+ bool active) { return NULL; }
+static inline struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ bool active) { return NULL; }
+static inline int
+register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
+static inline int
+__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; }
+static inline void unregister_hw_breakpoint(struct perf_event *bp) { }
+static inline void
+unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { }
+static inline int
+reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; }
+static inline void release_bp_slot(struct perf_event *bp) { }
+
+static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { }
+
+static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
+{
+ return NULL;
+}
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_HW_BREAKPOINT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7f87563c8485..43adbd7f0010 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -18,6 +18,10 @@
#include <linux/ioctl.h>
#include <asm/byteorder.h>
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#include <asm/hw_breakpoint.h>
+#endif
+
/*
* User-space ABI bits:
*/
@@ -31,6 +35,7 @@ enum perf_type_id {
PERF_TYPE_TRACEPOINT = 2,
PERF_TYPE_HW_CACHE = 3,
PERF_TYPE_RAW = 4,
+ PERF_TYPE_BREAKPOINT = 5,
PERF_TYPE_MAX, /* non-ABI */
};
@@ -209,6 +214,15 @@ struct perf_event_attr {
__u32 wakeup_events; /* wakeup every n events */
__u32 wakeup_watermark; /* bytes before wakeup */
};
+
+ union {
+ struct { /* Hardware breakpoint info */
+ __u64 bp_addr;
+ __u32 bp_type;
+ __u32 bp_len;
+ };
+ };
+
__u32 __reserved_2;
__u64 __reserved_3;
@@ -478,6 +492,11 @@ struct hw_perf_event {
s64 remaining;
struct hrtimer hrtimer;
};
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+ union { /* breakpoint */
+ struct arch_hw_breakpoint info;
+ };
+#endif
};
atomic64_t prev_count;
u64 sample_period;
@@ -546,6 +565,10 @@ struct perf_pending_entry {
void (*func)(struct perf_pending_entry *);
};
+typedef void (*perf_callback_t)(struct perf_event *, void *);
+
+struct perf_sample_data;
+
/**
* struct perf_event - performance event kernel representation:
*/
@@ -588,7 +611,7 @@ struct perf_event {
u64 tstamp_running;
u64 tstamp_stopped;
- struct perf_event_attr attr;
+ struct perf_event_attr attr;
struct hw_perf_event hw;
struct perf_event_context *ctx;
@@ -637,10 +660,18 @@ struct perf_event {
struct pid_namespace *ns;
u64 id;
+ void (*overflow_handler)(struct perf_event *event,
+ int nmi, struct perf_sample_data *data,
+ struct pt_regs *regs);
+
#ifdef CONFIG_EVENT_PROFILE
struct event_filter *filter;
#endif
+ perf_callback_t callback;
+
+ perf_callback_t event_callback;
+
#endif /* CONFIG_PERF_EVENTS */
};
@@ -745,6 +776,14 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader,
struct perf_cpu_context *cpuctx,
struct perf_event_context *ctx, int cpu);
extern void perf_event_update_userpage(struct perf_event *event);
+extern int perf_event_release_kernel(struct perf_event *event);
+extern struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr,
+ int cpu,
+ pid_t pid,
+ perf_callback_t callback);
+extern u64 perf_event_read_value(struct perf_event *event,
+ u64 *enabled, u64 *running);
struct perf_sample_data {
u64 type;
@@ -821,6 +860,7 @@ extern int sysctl_perf_event_sample_rate;
extern void perf_event_init(void);
extern void perf_tp_event(int event_id, u64 addr, u64 count,
void *record, int entry_size);
+extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
#define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \
@@ -834,6 +874,8 @@ extern int perf_output_begin(struct perf_output_handle *handle,
extern void perf_output_end(struct perf_output_handle *handle);
extern void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len);
+extern int perf_swevent_get_recursion_context(void);
+extern void perf_swevent_put_recursion_context(int rctx);
#else
static inline void
perf_event_task_sched_in(struct task_struct *task, int cpu) { }
@@ -855,11 +897,15 @@ static inline int perf_event_task_enable(void) { return -EINVAL; }
static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }
+static inline void
+perf_bp_event(struct perf_event *event, void *data) { }
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_comm(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
+static inline int perf_swevent_get_recursion_context(void) { return -1; }
+static inline void perf_swevent_put_recursion_context(int rctx) { }
#endif
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 4945d1c99864..c3417c13e3ed 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -724,17 +724,20 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
static void ftrace_profile_##call(proto) \
{ \
struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
+ extern int perf_swevent_get_recursion_context(void); \
+ extern void perf_swevent_put_recursion_context(int rctx); \
struct ftrace_event_call *event_call = &event_##call; \
extern void perf_tp_event(int, u64, u64, void *, int); \
struct ftrace_raw_##call *entry; \
- struct perf_trace_buf *trace_buf; \
u64 __addr = 0, __count = 1; \
unsigned long irq_flags; \
struct trace_entry *ent; \
int __entry_size; \
int __data_size; \
+ char *trace_buf; \
char *raw_data; \
int __cpu; \
+ int rctx; \
int pc; \
\
pc = preempt_count(); \
@@ -749,6 +752,11 @@ static void ftrace_profile_##call(proto) \
return; \
\
local_irq_save(irq_flags); \
+ \
+ rctx = perf_swevent_get_recursion_context(); \
+ if (rctx < 0) \
+ goto end_recursion; \
+ \
__cpu = smp_processor_id(); \
\
if (in_nmi()) \
@@ -759,13 +767,7 @@ static void ftrace_profile_##call(proto) \
if (!trace_buf) \
goto end; \
\
- trace_buf = per_cpu_ptr(trace_buf, __cpu); \
- if (trace_buf->recursion++) \
- goto end_recursion; \
- \
- barrier(); \
- \
- raw_data = trace_buf->buf; \
+ raw_data = per_cpu_ptr(trace_buf, __cpu); \
\
*(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
entry = (struct ftrace_raw_##call *)raw_data; \
@@ -780,9 +782,9 @@ static void ftrace_profile_##call(proto) \
perf_tp_event(event_call->id, __addr, __count, entry, \
__entry_size); \
\
-end_recursion: \
- trace_buf->recursion--; \
end: \
+ perf_swevent_put_recursion_context(rctx); \
+end_recursion: \
local_irq_restore(irq_flags); \
\
}
diff --git a/kernel/Makefile b/kernel/Makefile
index b8d4cd8ac0b9..6b7ce8173dfd 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -21,6 +21,7 @@ CFLAGS_REMOVE_mutex-debug.o = -pg
CFLAGS_REMOVE_rtmutex-debug.o = -pg
CFLAGS_REMOVE_cgroup-debug.o = -pg
CFLAGS_REMOVE_sched_clock.o = -pg
+CFLAGS_REMOVE_perf_event.o = -pg
endif
obj-$(CONFIG_FREEZER) += freezer.o
@@ -95,6 +96,7 @@ obj-$(CONFIG_RING_BUFFER) += trace/
obj-$(CONFIG_SMP) += sched_cpupri.o
obj-$(CONFIG_SLOW_WORK) += slow-work.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/exit.c b/kernel/exit.c
index f7864ac2ecc1..3f45e3cf931d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -49,6 +49,7 @@
#include <linux/init_task.h>
#include <linux/perf_event.h>
#include <trace/events/sched.h>
+#include <linux/hw_breakpoint.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
@@ -978,6 +979,10 @@ NORET_TYPE void do_exit(long code)
proc_exit_connector(tsk);
/*
+ * FIXME: do that only when needed, using sched_exit tracepoint
+ */
+ flush_ptrace_hw_breakpoint(tsk);
+ /*
* Flush inherited counters to the parent - before the parent
* gets woken up by child-exit notifications.
*/
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
new file mode 100644
index 000000000000..06d372fc026d
--- /dev/null
+++ b/kernel/hw_breakpoint.c
@@ -0,0 +1,501 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) 2007 Alan Stern
+ * Copyright (C) IBM Corporation, 2009
+ * Copyright (C) 2009, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Thanks to Ingo Molnar for his many suggestions.
+ *
+ * Authors: Alan Stern <stern@rowland.harvard.edu>
+ * K.Prasad <prasad@linux.vnet.ibm.com>
+ * Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers.
+ * This file contains the arch-independent routines.
+ */
+
+#include <linux/irqflags.h>
+#include <linux/kallsyms.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/kdebug.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <linux/hw_breakpoint.h>
+
+/*
+ * Constraints data
+ */
+
+/* Number of pinned cpu breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
+
+/* Number of pinned task breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]);
+
+/* Number of non-pinned cpu/task breakpoints in a cpu */
+static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
+
+/* Gather the number of total pinned and un-pinned bp in a cpuset */
+struct bp_busy_slots {
+ unsigned int pinned;
+ unsigned int flexible;
+};
+
+/* Serialize accesses to the above constraints */
+static DEFINE_MUTEX(nr_bp_mutex);
+
+/*
+ * Report the maximum number of pinned breakpoints a task
+ * have in this cpu
+ */
+static unsigned int max_task_bp_pinned(int cpu)
+{
+ int i;
+ unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu);
+
+ for (i = HBP_NUM -1; i >= 0; i--) {
+ if (tsk_pinned[i] > 0)
+ return i + 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Report the number of pinned/un-pinned breakpoints we have in
+ * a given cpu (cpu > -1) or in all of them (cpu = -1).
+ */
+static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu)
+{
+ if (cpu >= 0) {
+ slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+ slots->pinned += max_task_bp_pinned(cpu);
+ slots->flexible = per_cpu(nr_bp_flexible, cpu);
+
+ return;
+ }
+
+ for_each_online_cpu(cpu) {
+ unsigned int nr;
+
+ nr = per_cpu(nr_cpu_bp_pinned, cpu);
+ nr += max_task_bp_pinned(cpu);
+
+ if (nr > slots->pinned)
+ slots->pinned = nr;
+
+ nr = per_cpu(nr_bp_flexible, cpu);
+
+ if (nr > slots->flexible)
+ slots->flexible = nr;
+ }
+}
+
+/*
+ * Add a pinned breakpoint for the given task in our constraint table
+ */
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+{
+ int count = 0;
+ struct perf_event *bp;
+ struct perf_event_context *ctx = tsk->perf_event_ctxp;
+ unsigned int *task_bp_pinned;
+ struct list_head *list;
+ unsigned long flags;
+
+ if (WARN_ONCE(!ctx, "No perf context for this task"))
+ return;
+
+ list = &ctx->event_list;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+
+ /*
+ * The current breakpoint counter is not included in the list
+ * at the open() callback time
+ */
+ list_for_each_entry(bp, list, event_entry) {
+ if (bp->attr.type == PERF_TYPE_BREAKPOINT)
+ count++;
+ }
+
+ spin_unlock_irqrestore(&ctx->lock, flags);
+
+ if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list"))
+ return;
+
+ task_bp_pinned = per_cpu(task_bp_pinned, cpu);
+ if (enable) {
+ task_bp_pinned[count]++;
+ if (count > 0)
+ task_bp_pinned[count-1]--;
+ } else {
+ task_bp_pinned[count]--;
+ if (count > 0)
+ task_bp_pinned[count-1]++;
+ }
+}
+
+/*
+ * Add/remove the given breakpoint in our constraint table
+ */
+static void toggle_bp_slot(struct perf_event *bp, bool enable)
+{
+ int cpu = bp->cpu;
+ struct task_struct *tsk = bp->ctx->task;
+
+ /* Pinned counter task profiling */
+ if (tsk) {
+ if (cpu >= 0) {
+ toggle_bp_task_slot(tsk, cpu, enable);
+ return;
+ }
+
+ for_each_online_cpu(cpu)
+ toggle_bp_task_slot(tsk, cpu, enable);
+ return;
+ }
+
+ /* Pinned counter cpu profiling */
+ if (enable)
+ per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
+ else
+ per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
+}
+
+/*
+ * Contraints to check before allowing this new breakpoint counter:
+ *
+ * == Non-pinned counter == (Considered as pinned for now)
+ *
+ * - If attached to a single cpu, check:
+ *
+ * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu)
+ * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM
+ *
+ * -> If there are already non-pinned counters in this cpu, it means
+ * there is already a free slot for them.
+ * Otherwise, we check that the maximum number of per task
+ * breakpoints (for this cpu) plus the number of per cpu breakpoint
+ * (for this cpu) doesn't cover every registers.
+ *
+ * - If attached to every cpus, check:
+ *
+ * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *))
+ * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM
+ *
+ * -> This is roughly the same, except we check the number of per cpu
+ * bp for every cpu and we keep the max one. Same for the per tasks
+ * breakpoints.
+ *
+ *
+ * == Pinned counter ==
+ *
+ * - If attached to a single cpu, check:
+ *
+ * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu)
+ * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM
+ *
+ * -> Same checks as before. But now the nr_bp_flexible, if any, must keep
+ * one register at least (or they will never be fed).
+ *
+ * - If attached to every cpus, check:
+ *
+ * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *))
+ * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM
+ */
+int reserve_bp_slot(struct perf_event *bp)
+{
+ struct bp_busy_slots slots = {0};
+ int ret = 0;
+
+ mutex_lock(&nr_bp_mutex);
+
+ fetch_bp_busy_slots(&slots, bp->cpu);
+
+ /* Flexible counters need to keep at least one slot */
+ if (slots.pinned + (!!slots.flexible) == HBP_NUM) {
+ ret = -ENOSPC;
+ goto end;
+ }
+
+ toggle_bp_slot(bp, true);
+
+end:
+ mutex_unlock(&nr_bp_mutex);
+
+ return ret;
+}
+
+void release_bp_slot(struct perf_event *bp)
+{
+ mutex_lock(&nr_bp_mutex);
+
+ toggle_bp_slot(bp, false);
+
+ mutex_unlock(&nr_bp_mutex);
+}
+
+
+int __register_perf_hw_breakpoint(struct perf_event *bp)
+{
+ int ret;
+
+ ret = reserve_bp_slot(bp);
+ if (ret)
+ return ret;
+
+ /*
+ * Ptrace breakpoints can be temporary perf events only
+ * meant to reserve a slot. In this case, it is created disabled and
+ * we don't want to check the params right now (as we put a null addr)
+ * But perf tools create events as disabled and we want to check
+ * the params for them.
+ * This is a quick hack that will be removed soon, once we remove
+ * the tmp breakpoints from ptrace
+ */
+ if (!bp->attr.disabled || bp->callback == perf_bp_event)
+ ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task);
+
+ return ret;
+}
+
+int register_perf_hw_breakpoint(struct perf_event *bp)
+{
+ bp->callback = perf_bp_event;
+
+ return __register_perf_hw_breakpoint(bp);
+}
+
+/*
+ * Register a breakpoint bound to a task and a given cpu.
+ * If cpu is -1, the breakpoint is active for the task in every cpu
+ * If the task is -1, the breakpoint is active for every tasks in the given
+ * cpu.
+ */
+static struct perf_event *
+register_user_hw_breakpoint_cpu(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ pid_t pid,
+ int cpu,
+ bool active)
+{
+ struct perf_event_attr *attr;
+ struct perf_event *bp;
+
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
+ if (!attr)
+ return ERR_PTR(-ENOMEM);
+
+ attr->type = PERF_TYPE_BREAKPOINT;
+ attr->size = sizeof(*attr);
+ attr->bp_addr = addr;
+ attr->bp_len = len;
+ attr->bp_type = type;
+ /*
+ * Such breakpoints are used by debuggers to trigger signals when
+ * we hit the excepted memory op. We can't miss such events, they
+ * must be pinned.
+ */
+ attr->pinned = 1;
+
+ if (!active)
+ attr->disabled = 1;
+
+ bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered);
+ kfree(attr);
+
+ return bp;
+}
+
+/**
+ * register_user_hw_breakpoint - register a hardware breakpoint for user space
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ * @active: should we activate it while registering it
+ *
+ */
+struct perf_event *
+register_user_hw_breakpoint(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ struct task_struct *tsk,
+ bool active)
+{
+ return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
+ tsk->pid, -1, active);
+}
+EXPORT_SYMBOL_GPL(register_user_hw_breakpoint);
+
+/**
+ * modify_user_hw_breakpoint - modify a user-space hardware breakpoint
+ * @bp: the breakpoint structure to modify
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @tsk: pointer to 'task_struct' of the process to which the address belongs
+ * @active: should we activate it while registering it
+ */
+struct perf_event *
+modify_user_hw_breakpoint(struct perf_event *bp,
+ unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ struct task_struct *tsk,
+ bool active)
+{
+ /*
+ * FIXME: do it without unregistering
+ * - We don't want to lose our slot
+ * - If the new bp is incorrect, don't lose the older one
+ */
+ unregister_hw_breakpoint(bp);
+
+ return register_user_hw_breakpoint(addr, len, type, triggered,
+ tsk, active);
+}
+EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint);
+
+/**
+ * unregister_hw_breakpoint - unregister a user-space hardware breakpoint
+ * @bp: the breakpoint structure to unregister
+ */
+void unregister_hw_breakpoint(struct perf_event *bp)
+{
+ if (!bp)
+ return;
+ perf_event_release_kernel(bp);
+}
+EXPORT_SYMBOL_GPL(unregister_hw_breakpoint);
+
+static struct perf_event *
+register_kernel_hw_breakpoint_cpu(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ int cpu,
+ bool active)
+{
+ return register_user_hw_breakpoint_cpu(addr, len, type, triggered,
+ -1, cpu, active);
+}
+
+/**
+ * register_wide_hw_breakpoint - register a wide breakpoint in the kernel
+ * @addr: is the memory address that triggers the breakpoint
+ * @len: the length of the access to the memory (1 byte, 2 bytes etc...)
+ * @type: the type of the access to the memory (read/write/exec)
+ * @triggered: callback to trigger when we hit the breakpoint
+ * @active: should we activate it while registering it
+ *
+ * @return a set of per_cpu pointers to perf events
+ */
+struct perf_event **
+register_wide_hw_breakpoint(unsigned long addr,
+ int len,
+ int type,
+ perf_callback_t triggered,
+ bool active)
+{
+ struct perf_event **cpu_events, **pevent, *bp;
+ long err;
+ int cpu;
+
+ cpu_events = alloc_percpu(typeof(*cpu_events));
+ if (!cpu_events)
+ return ERR_PTR(-ENOMEM);
+
+ for_each_possible_cpu(cpu) {
+ pevent = per_cpu_ptr(cpu_events, cpu);
+ bp = register_kernel_hw_breakpoint_cpu(addr, len, type,
+ triggered, cpu, active);
+
+ *pevent = bp;
+
+ if (IS_ERR(bp) || !bp) {
+ err = PTR_ERR(bp);
+ goto fail;
+ }
+ }
+
+ return cpu_events;
+
+fail:
+ for_each_possible_cpu(cpu) {
+ pevent = per_cpu_ptr(cpu_events, cpu);
+ if (IS_ERR(*pevent) || !*pevent)
+ break;
+ unregister_hw_breakpoint(*pevent);
+ }
+ free_percpu(cpu_events);
+ /* return the error if any */
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint);
+
+/**
+ * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel
+ * @cpu_events: the per cpu set of events to unregister
+ */
+void unregister_wide_hw_breakpoint(struct perf_event **cpu_events)
+{
+ int cpu;
+ struct perf_event **pevent;
+
+ for_each_possible_cpu(cpu) {
+ pevent = per_cpu_ptr(cpu_events, cpu);
+ unregister_hw_breakpoint(*pevent);
+ }
+ free_percpu(cpu_events);
+}
+EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint);
+
+static struct notifier_block hw_breakpoint_exceptions_nb = {
+ .notifier_call = hw_breakpoint_exceptions_notify,
+ /* we need to be notified first */
+ .priority = 0x7fffffff
+};
+
+static int __init init_hw_breakpoint(void)
+{
+ return register_die_notifier(&hw_breakpoint_exceptions_nb);
+}
+core_initcall(init_hw_breakpoint);
+
+
+struct pmu perf_ops_bp = {
+ .enable = arch_install_hw_breakpoint,
+ .disable = arch_uninstall_hw_breakpoint,
+ .read = hw_breakpoint_pmu_read,
+ .unthrottle = hw_breakpoint_pmu_unthrottle
+};
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 8b6b8b697c68..8e5288a8a355 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -181,6 +181,7 @@ unsigned long kallsyms_lookup_name(const char *name)
}
return module_kallsyms_lookup_name(name);
}
+EXPORT_SYMBOL_GPL(kallsyms_lookup_name);
int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
unsigned long),
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3256e36ad251..accfd7bfe387 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -29,6 +29,7 @@
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
#include <asm/irq_regs.h>
@@ -245,6 +246,49 @@ static void perf_unpin_context(struct perf_event_context *ctx)
put_ctx(ctx);
}
+static inline u64 perf_clock(void)
+{
+ return cpu_clock(smp_processor_id());
+}
+
+/*
+ * Update the record of the current time in a context.
+ */
+static void update_context_time(struct perf_event_context *ctx)
+{
+ u64 now = perf_clock();
+
+ ctx->time += now - ctx->timestamp;
+ ctx->timestamp = now;
+}
+
+/*
+ * Update the total_time_enabled and total_time_running fields for a event.
+ */
+static void update_event_times(struct perf_event *event)
+{
+ struct perf_event_context *ctx = event->ctx;
+ u64 run_end;
+
+ if (event->state < PERF_EVENT_STATE_INACTIVE ||
+ event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
+ return;
+
+ if (ctx->is_active)
+ run_end = ctx->time;
+ else
+ run_end = event->tstamp_stopped;
+
+ event->total_time_enabled = run_end - event->tstamp_enabled;
+
+ if (event->state == PERF_EVENT_STATE_INACTIVE)
+ run_end = event->tstamp_stopped;
+ else
+ run_end = ctx->time;
+
+ event->total_time_running = run_end - event->tstamp_running;
+}
+
/*
* Add a event from the lists for its context.
* Must be called with ctx->mutex and ctx->lock held.
@@ -293,6 +337,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
if (event->group_leader != event)
event->group_leader->nr_siblings--;
+ update_event_times(event);
+ event->state = PERF_EVENT_STATE_OFF;
+
/*
* If this was a group event with sibling events then
* upgrade the siblings to singleton events by adding them
@@ -446,50 +493,11 @@ retry:
* can remove the event safely, if the call above did not
* succeed.
*/
- if (!list_empty(&event->group_entry)) {
+ if (!list_empty(&event->group_entry))
list_del_event(event, ctx);
- }
spin_unlock_irq(&ctx->lock);
}
-static inline u64 perf_clock(void)
-{
- return cpu_clock(smp_processor_id());
-}
-
-/*
- * Update the record of the current time in a context.
- */
-static void update_context_time(struct perf_event_context *ctx)
-{
- u64 now = perf_clock();
-
- ctx->time += now - ctx->timestamp;
- ctx->timestamp = now;
-}
-
-/*
- * Update the total_time_enabled and total_time_running fields for a event.
- */
-static void update_event_times(struct perf_event *event)
-{
- struct perf_event_context *ctx = event->ctx;
- u64 run_end;
-
- if (event->state < PERF_EVENT_STATE_INACTIVE ||
- event->group_leader->state < PERF_EVENT_STATE_INACTIVE)
- return;
-
- event->total_time_enabled = ctx->time - event->tstamp_enabled;
-
- if (event->state == PERF_EVENT_STATE_INACTIVE)
- run_end = event->tstamp_stopped;
- else
- run_end = ctx->time;
-
- event->total_time_running = run_end - event->tstamp_running;
-}
-
/*
* Update total_time_enabled and total_time_running for all events in a group.
*/
@@ -1032,10 +1040,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx,
update_context_time(ctx);
perf_disable();
- if (ctx->nr_active)
+ if (ctx->nr_active) {
list_for_each_entry(event, &ctx->group_list, group_entry)
group_sched_out(event, cpuctx, ctx);
-
+ }
perf_enable();
out:
spin_unlock(&ctx->lock);
@@ -1060,8 +1068,6 @@ static int context_equiv(struct perf_event_context *ctx1,
&& !ctx1->pin_count && !ctx2->pin_count;
}
-static void __perf_event_read(void *event);
-
static void __perf_event_sync_stat(struct perf_event *event,
struct perf_event *next_event)
{
@@ -1079,8 +1085,8 @@ static void __perf_event_sync_stat(struct perf_event *event,
*/
switch (event->state) {
case PERF_EVENT_STATE_ACTIVE:
- __perf_event_read(event);
- break;
+ event->pmu->read(event);
+ /* fall-through */
case PERF_EVENT_STATE_INACTIVE:
update_event_times(event);
@@ -1119,6 +1125,8 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
if (!ctx->nr_stat)
return;
+ update_context_time(ctx);
+
event = list_first_entry(&ctx->event_list,
struct perf_event, event_entry);
@@ -1162,8 +1170,6 @@ void perf_event_task_sched_out(struct task_struct *task,
if (likely(!ctx || !cpuctx->task_ctx))
return;
- update_context_time(ctx);
-
rcu_read_lock();
parent = rcu_dereference(ctx->parent_ctx);
next_ctx = next->perf_event_ctxp;
@@ -1516,7 +1522,6 @@ static void __perf_event_read(void *info)
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
- unsigned long flags;
/*
* If this is a task context, we need to check whether it is
@@ -1528,12 +1533,12 @@ static void __perf_event_read(void *info)
if (ctx->task && cpuctx->task_ctx != ctx)
return;
- local_irq_save(flags);
- if (ctx->is_active)
- update_context_time(ctx);
- event->pmu->read(event);
+ spin_lock(&ctx->lock);
+ update_context_time(ctx);
update_event_times(event);
- local_irq_restore(flags);
+ spin_unlock(&ctx->lock);
+
+ event->pmu->read(event);
}
static u64 perf_event_read(struct perf_event *event)
@@ -1546,7 +1551,13 @@ static u64 perf_event_read(struct perf_event *event)
smp_call_function_single(event->oncpu,
__perf_event_read, event, 1);
} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
+ struct perf_event_context *ctx = event->ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->lock, flags);
+ update_context_time(ctx);
update_event_times(event);
+ spin_unlock_irqrestore(&ctx->lock, flags);
}
return atomic64_read(&event->count);
@@ -1700,16 +1711,10 @@ static void free_event(struct perf_event *event)
call_rcu(&event->rcu_head, free_event_rcu);
}
-/*
- * Called when the last reference to the file is gone.
- */
-static int perf_release(struct inode *inode, struct file *file)
+int perf_event_release_kernel(struct perf_event *event)
{
- struct perf_event *event = file->private_data;
struct perf_event_context *ctx = event->ctx;
- file->private_data = NULL;
-
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
perf_event_remove_from_context(event);
@@ -1724,6 +1729,19 @@ static int perf_release(struct inode *inode, struct file *file)
return 0;
}
+EXPORT_SYMBOL_GPL(perf_event_release_kernel);
+
+/*
+ * Called when the last reference to the file is gone.
+ */
+static int perf_release(struct inode *inode, struct file *file)
+{
+ struct perf_event *event = file->private_data;
+
+ file->private_data = NULL;
+
+ return perf_event_release_kernel(event);
+}
static int perf_event_read_size(struct perf_event *event)
{
@@ -1750,91 +1768,94 @@ static int perf_event_read_size(struct perf_event *event)
return size;
}
-static u64 perf_event_read_value(struct perf_event *event)
+u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
{
struct perf_event *child;
u64 total = 0;
+ *enabled = 0;
+ *running = 0;
+
+ mutex_lock(&event->child_mutex);
total += perf_event_read(event);
- list_for_each_entry(child, &event->child_list, child_list)
+ *enabled += event->total_time_enabled +
+ atomic64_read(&event->child_total_time_enabled);
+ *running += event->total_time_running +
+ atomic64_read(&event->child_total_time_running);
+
+ list_for_each_entry(child, &event->child_list, child_list) {
total += perf_event_read(child);
+ *enabled += child->total_time_enabled;
+ *running += child->total_time_running;
+ }
+ mutex_unlock(&event->child_mutex);
return total;
}
-
-static int perf_event_read_entry(struct perf_event *event,
- u64 read_format, char __user *buf)
-{
- int n = 0, count = 0;
- u64 values[2];
-
- values[n++] = perf_event_read_value(event);
- if (read_format & PERF_FORMAT_ID)
- values[n++] = primary_event_id(event);
-
- count = n * sizeof(u64);
-
- if (copy_to_user(buf, values, count))
- return -EFAULT;
-
- return count;
-}
+EXPORT_SYMBOL_GPL(perf_event_read_value);
static int perf_event_read_group(struct perf_event *event,
u64 read_format, char __user *buf)
{
struct perf_event *leader = event->group_leader, *sub;
- int n = 0, size = 0, err = -EFAULT;
- u64 values[3];
+ int n = 0, size = 0, ret = -EFAULT;
+ struct perf_event_context *ctx = leader->ctx;
+ u64 values[5];
+ u64 count, enabled, running;
+
+ mutex_lock(&ctx->mutex);
+ count = perf_event_read_value(leader, &enabled, &running);
values[n++] = 1 + leader->nr_siblings;
- if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- values[n++] = leader->total_time_enabled +
- atomic64_read(&leader->child_total_time_enabled);
- }
- if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- values[n++] = leader->total_time_running +
- atomic64_read(&leader->child_total_time_running);
- }
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ values[n++] = enabled;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ values[n++] = running;
+ values[n++] = count;
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_event_id(leader);
size = n * sizeof(u64);
if (copy_to_user(buf, values, size))
- return -EFAULT;
-
- err = perf_event_read_entry(leader, read_format, buf + size);
- if (err < 0)
- return err;
+ goto unlock;
- size += err;
+ ret = size;
list_for_each_entry(sub, &leader->sibling_list, group_entry) {
- err = perf_event_read_entry(sub, read_format,
- buf + size);
- if (err < 0)
- return err;
+ n = 0;
- size += err;
+ values[n++] = perf_event_read_value(sub, &enabled, &running);
+ if (read_format & PERF_FORMAT_ID)
+ values[n++] = primary_event_id(sub);
+
+ size = n * sizeof(u64);
+
+ if (copy_to_user(buf + ret, values, size)) {
+ ret = -EFAULT;
+ goto unlock;
+ }
+
+ ret += size;
}
+unlock:
+ mutex_unlock(&ctx->mutex);
- return size;
+ return ret;
}
static int perf_event_read_one(struct perf_event *event,
u64 read_format, char __user *buf)
{
+ u64 enabled, running;
u64 values[4];
int n = 0;
- values[n++] = perf_event_read_value(event);
- if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
- values[n++] = event->total_time_enabled +
- atomic64_read(&event->child_total_time_enabled);
- }
- if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
- values[n++] = event->total_time_running +
- atomic64_read(&event->child_total_time_running);
- }
+ values[n++] = perf_event_read_value(event, &enabled, &running);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ values[n++] = enabled;
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ values[n++] = running;
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(event);
@@ -1865,12 +1886,10 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
return -ENOSPC;
WARN_ON_ONCE(event->ctx->parent_ctx);
- mutex_lock(&event->child_mutex);
if (read_format & PERF_FORMAT_GROUP)
ret = perf_event_read_group(event, read_format, buf);
else
ret = perf_event_read_one(event, read_format, buf);
- mutex_unlock(&event->child_mutex);
return ret;
}
@@ -2315,7 +2334,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
}
if (!data->watermark)
- data->watermark = max_t(long, PAGE_SIZE, max_size / 2);
+ data->watermark = max_size / 2;
rcu_assign_pointer(event->data, data);
@@ -3245,15 +3264,10 @@ static void perf_event_task_ctx(struct perf_event_context *ctx,
{
struct perf_event *event;
- if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
- return;
-
- rcu_read_lock();
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (perf_event_task_match(event))
perf_event_task_output(event, task_event);
}
- rcu_read_unlock();
}
static void perf_event_task_event(struct perf_task_event *task_event)
@@ -3261,11 +3275,11 @@ static void perf_event_task_event(struct perf_task_event *task_event)
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx = task_event->task_ctx;
+ rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
perf_event_task_ctx(&cpuctx->ctx, task_event);
put_cpu_var(perf_cpu_context);
- rcu_read_lock();
if (!ctx)
ctx = rcu_dereference(task_event->task->perf_event_ctxp);
if (ctx)
@@ -3357,15 +3371,10 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx,
{
struct perf_event *event;
- if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
- return;
-
- rcu_read_lock();
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (perf_event_comm_match(event))
perf_event_comm_output(event, comm_event);
}
- rcu_read_unlock();
}
static void perf_event_comm_event(struct perf_comm_event *comm_event)
@@ -3376,7 +3385,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
char comm[TASK_COMM_LEN];
memset(comm, 0, sizeof(comm));
- strncpy(comm, comm_event->task->comm, sizeof(comm));
+ strlcpy(comm, comm_event->task->comm, sizeof(comm));
size = ALIGN(strlen(comm)+1, sizeof(u64));
comm_event->comm = comm;
@@ -3384,11 +3393,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
comm_event->event_id.header.size = sizeof(comm_event->event_id) + size;
+ rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
perf_event_comm_ctx(&cpuctx->ctx, comm_event);
put_cpu_var(perf_cpu_context);
- rcu_read_lock();
/*
* doesn't really matter which of the child contexts the
* events ends up in.
@@ -3481,15 +3490,10 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx,
{
struct perf_event *event;
- if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
- return;
-
- rcu_read_lock();
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (perf_event_mmap_match(event, mmap_event))
perf_event_mmap_output(event, mmap_event);
}
- rcu_read_unlock();
}
static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
@@ -3545,11 +3549,11 @@ got_name:
mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size;
+ rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
put_cpu_var(perf_cpu_context);
- rcu_read_lock();
/*
* doesn't really matter which of the child contexts the
* events ends up in.
@@ -3688,7 +3692,11 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
perf_event_disable(event);
}
- perf_event_output(event, nmi, data, regs);
+ if (event->overflow_handler)
+ event->overflow_handler(event, nmi, data, regs);
+ else
+ perf_event_output(event, nmi, data, regs);
+
return ret;
}
@@ -3733,16 +3741,16 @@ again:
return nr;
}
-static void perf_swevent_overflow(struct perf_event *event,
+static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
int nmi, struct perf_sample_data *data,
struct pt_regs *regs)
{
struct hw_perf_event *hwc = &event->hw;
int throttle = 0;
- u64 overflow;
data->period = event->hw.last_period;
- overflow = perf_swevent_set_period(event);
+ if (!overflow)
+ overflow = perf_swevent_set_period(event);
if (hwc->interrupts == MAX_INTERRUPTS)
return;
@@ -3775,14 +3783,19 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
atomic64_add(nr, &event->count);
+ if (!regs)
+ return;
+
if (!hwc->sample_period)
return;
- if (!regs)
+ if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
+ return perf_swevent_overflow(event, 1, nmi, data, regs);
+
+ if (atomic64_add_negative(nr, &hwc->period_left))
return;
- if (!atomic64_add_negative(nr, &hwc->period_left))
- perf_swevent_overflow(event, nmi, data, regs);
+ perf_swevent_overflow(event, 0, nmi, data, regs);
}
static int perf_swevent_is_counting(struct perf_event *event)
@@ -3818,6 +3831,20 @@ static int perf_swevent_is_counting(struct perf_event *event)
static int perf_tp_event_match(struct perf_event *event,
struct perf_sample_data *data);
+static int perf_exclude_event(struct perf_event *event,
+ struct pt_regs *regs)
+{
+ if (regs) {
+ if (event->attr.exclude_user && user_mode(regs))
+ return 1;
+
+ if (event->attr.exclude_kernel && !user_mode(regs))
+ return 1;
+ }
+
+ return 0;
+}
+
static int perf_swevent_match(struct perf_event *event,
enum perf_type_id type,
u32 event_id,
@@ -3829,16 +3856,12 @@ static int perf_swevent_match(struct perf_event *event,
if (event->attr.type != type)
return 0;
+
if (event->attr.config != event_id)
return 0;
- if (regs) {
- if (event->attr.exclude_user && user_mode(regs))
- return 0;
-
- if (event->attr.exclude_kernel && !user_mode(regs))
- return 0;
- }
+ if (perf_exclude_event(event, regs))
+ return 0;
if (event->attr.type == PERF_TYPE_TRACEPOINT &&
!perf_tp_event_match(event, data))
@@ -3855,49 +3878,59 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx,
{
struct perf_event *event;
- if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
- return;
-
- rcu_read_lock();
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
if (perf_swevent_match(event, type, event_id, data, regs))
perf_swevent_add(event, nr, nmi, data, regs);
}
- rcu_read_unlock();
}
-static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx)
+int perf_swevent_get_recursion_context(void)
{
+ struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
+ int rctx;
+
if (in_nmi())
- return &cpuctx->recursion[3];
+ rctx = 3;
+ else if (in_irq())
+ rctx = 2;
+ else if (in_softirq())
+ rctx = 1;
+ else
+ rctx = 0;
+
+ if (cpuctx->recursion[rctx]) {
+ put_cpu_var(perf_cpu_context);
+ return -1;
+ }
- if (in_irq())
- return &cpuctx->recursion[2];
+ cpuctx->recursion[rctx]++;
+ barrier();
- if (in_softirq())
- return &cpuctx->recursion[1];
+ return rctx;
+}
+EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
- return &cpuctx->recursion[0];
+void perf_swevent_put_recursion_context(int rctx)
+{
+ struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ barrier();
+ cpuctx->recursion[rctx]++;
+ put_cpu_var(perf_cpu_context);
}
+EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
u64 nr, int nmi,
struct perf_sample_data *data,
struct pt_regs *regs)
{
- struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
- int *recursion = perf_swevent_recursion_context(cpuctx);
+ struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
- if (*recursion)
- goto out;
-
- (*recursion)++;
- barrier();
-
+ cpuctx = &__get_cpu_var(perf_cpu_context);
+ rcu_read_lock();
perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
nr, nmi, data, regs);
- rcu_read_lock();
/*
* doesn't really matter which of the child contexts the
* events ends up in.
@@ -3906,23 +3939,24 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
if (ctx)
perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
rcu_read_unlock();
-
- barrier();
- (*recursion)--;
-
-out:
- put_cpu_var(perf_cpu_context);
}
void __perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr)
{
- struct perf_sample_data data = {
- .addr = addr,
- };
+ struct perf_sample_data data;
+ int rctx;
- do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi,
- &data, regs);
+ rctx = perf_swevent_get_recursion_context();
+ if (rctx < 0)
+ return;
+
+ data.addr = addr;
+ data.raw = NULL;
+
+ do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs);
+
+ perf_swevent_put_recursion_context(rctx);
}
static void perf_swevent_read(struct perf_event *event)
@@ -4145,6 +4179,7 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record,
if (!regs)
regs = task_pt_regs(current);
+ /* Trace events already protected against recursion */
do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1,
&data, regs);
}
@@ -4231,6 +4266,57 @@ static void perf_event_free_filter(struct perf_event *event)
#endif /* CONFIG_EVENT_PROFILE */
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+ release_bp_slot(event);
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+ int err;
+ /*
+ * The breakpoint is already filled if we haven't created the counter
+ * through perf syscall
+ * FIXME: manage to get trigerred to NULL if it comes from syscalls
+ */
+ if (!bp->callback)
+ err = register_perf_hw_breakpoint(bp);
+ else
+ err = __register_perf_hw_breakpoint(bp);
+ if (err)
+ return ERR_PTR(err);
+
+ bp->destroy = bp_perf_event_destroy;
+
+ return &perf_ops_bp;
+}
+
+void perf_bp_event(struct perf_event *bp, void *data)
+{
+ struct perf_sample_data sample;
+ struct pt_regs *regs = data;
+
+ sample.addr = bp->attr.bp_addr;
+
+ if (!perf_exclude_event(bp, regs))
+ perf_swevent_add(bp, 1, 1, &sample, regs);
+}
+#else
+static void bp_perf_event_destroy(struct perf_event *event)
+{
+}
+
+static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+{
+ return NULL;
+}
+
+void perf_bp_event(struct perf_event *bp, void *regs)
+{
+}
+#endif
+
atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
static void sw_perf_event_destroy(struct perf_event *event)
@@ -4297,6 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr,
struct perf_event_context *ctx,
struct perf_event *group_leader,
struct perf_event *parent_event,
+ perf_callback_t callback,
gfp_t gfpflags)
{
const struct pmu *pmu;
@@ -4339,6 +4426,11 @@ perf_event_alloc(struct perf_event_attr *attr,
event->state = PERF_EVENT_STATE_INACTIVE;
+ if (!callback && parent_event)
+ callback = parent_event->callback;
+
+ event->callback = callback;
+
if (attr->disabled)
event->state = PERF_EVENT_STATE_OFF;
@@ -4373,6 +4465,11 @@ perf_event_alloc(struct perf_event_attr *attr,
pmu = tp_perf_event_init(event);
break;
+ case PERF_TYPE_BREAKPOINT:
+ pmu = bp_perf_event_init(event);
+ break;
+
+
default:
break;
}
@@ -4615,7 +4712,7 @@ SYSCALL_DEFINE5(perf_event_open,
}
event = perf_event_alloc(&attr, cpu, ctx, group_leader,
- NULL, GFP_KERNEL);
+ NULL, NULL, GFP_KERNEL);
err = PTR_ERR(event);
if (IS_ERR(event))
goto err_put_context;
@@ -4663,6 +4760,58 @@ err_put_context:
return err;
}
+/**
+ * perf_event_create_kernel_counter
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @pid: task to profile
+ */
+struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+ pid_t pid, perf_callback_t callback)
+{
+ struct perf_event *event;
+ struct perf_event_context *ctx;
+ int err;
+
+ /*
+ * Get the target context (task or percpu):
+ */
+
+ ctx = find_get_context(pid, cpu);
+ if (IS_ERR(ctx))
+ return NULL;
+
+ event = perf_event_alloc(attr, cpu, ctx, NULL,
+ NULL, callback, GFP_KERNEL);
+ err = PTR_ERR(event);
+ if (IS_ERR(event))
+ goto err_put_context;
+
+ event->filp = NULL;
+ WARN_ON_ONCE(ctx->parent_ctx);
+ mutex_lock(&ctx->mutex);
+ perf_install_in_context(ctx, event, cpu);
+ ++ctx->generation;
+ mutex_unlock(&ctx->mutex);
+
+ event->owner = current;
+ get_task_struct(current);
+ mutex_lock(&current->perf_event_mutex);
+ list_add_tail(&event->owner_entry, &current->perf_event_list);
+ mutex_unlock(&current->perf_event_mutex);
+
+ return event;
+
+err_put_context:
+ if (err < 0)
+ put_ctx(ctx);
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+
/*
* inherit a event from parent task to child task:
*/
@@ -4688,7 +4837,7 @@ inherit_event(struct perf_event *parent_event,
child_event = perf_event_alloc(&parent_event->attr,
parent_event->cpu, child_ctx,
group_leader, parent_event,
- GFP_KERNEL);
+ NULL, GFP_KERNEL);
if (IS_ERR(child_event))
return child_event;
get_ctx(child_ctx);
@@ -4706,6 +4855,8 @@ inherit_event(struct perf_event *parent_event,
if (parent_event->attr.freq)
child_event->hw.sample_period = parent_event->hw.sample_period;
+ child_event->overflow_handler = parent_event->overflow_handler;
+
/*
* Link it up in the child's context:
*/
@@ -4795,7 +4946,6 @@ __perf_event_exit_task(struct perf_event *child_event,
{
struct perf_event *parent_event;
- update_event_times(child_event);
perf_event_remove_from_context(child_event);
parent_event = child_event->parent;
@@ -4847,6 +4997,7 @@ void perf_event_exit_task(struct task_struct *child)
* the events from it.
*/
unclone_ctx(child_ctx);
+ update_context_time(child_ctx);
spin_unlock_irqrestore(&child_ctx->lock, flags);
/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index f05671609a89..d006554888dc 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -339,6 +339,27 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config KSYM_TRACER
+ bool "Trace read and write access on kernel memory locations"
+ depends on HAVE_HW_BREAKPOINT
+ select TRACING
+ help
+ This tracer helps find read and write operations on any given kernel
+ symbol i.e. /proc/kallsyms.
+
+config PROFILE_KSYM_TRACER
+ bool "Profile all kernel memory accesses on 'watched' variables"
+ depends on KSYM_TRACER
+ help
+ This tracer profiles kernel accesses on variables watched through the
+ ksym tracer ftrace plugin. Depending upon the hardware, all read
+ and write operations on kernel variables can be monitored for
+ accesses.
+
+ The results will be displayed in:
+ /debugfs/tracing/profile_ksym
+
+ Say N if unsure.
config STACK_TRACER
bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index edc3a3cca1a1..cd9ecd89ec77 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b4e4212e66d7..4da6ede74401 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
#include <linux/ftrace.h>
#include <trace/boot.h>
#include <linux/kmemtrace.h>
+#include <linux/hw_breakpoint.h>
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
@@ -37,6 +38,7 @@ enum trace_type {
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
TRACE_BLK,
+ TRACE_KSYM,
__TRACE_LAST_TYPE,
};
@@ -232,6 +234,7 @@ extern void __ftrace_bad_type(void);
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
TRACE_KMEM_FREE); \
+ IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
@@ -387,6 +390,8 @@ int register_tracer(struct tracer *type);
void unregister_tracer(struct tracer *type);
int is_tracing_stopped(void);
+extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
+
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -461,6 +466,8 @@ extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_hw_branches(struct tracer *trace,
struct trace_array *tr);
+extern int trace_selftest_startup_ksym(struct tracer *trace,
+ struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ead3d724599d..c16a08f399df 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -364,3 +364,19 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
F_printk("type:%u call_site:%lx ptr:%p",
__entry->type_id, __entry->call_site, __entry->ptr)
);
+
+FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
+
+ TRACE_KSYM,
+
+ F_STRUCT(
+ __field( unsigned long, ip )
+ __field( unsigned char, type )
+ __array( char , cmd, TASK_COMM_LEN )
+ __field( unsigned long, addr )
+ ),
+
+ F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
+ (void *)__entry->ip, (unsigned int)__entry->type,
+ (void *)__entry->addr, __entry->cmd)
+);
diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c
index e0d351b01f5a..d9c60f80aa0d 100644
--- a/kernel/trace/trace_event_profile.c
+++ b/kernel/trace/trace_event_profile.c
@@ -9,31 +9,33 @@
#include "trace.h"
-struct perf_trace_buf *perf_trace_buf;
+char *perf_trace_buf;
EXPORT_SYMBOL_GPL(perf_trace_buf);
-struct perf_trace_buf *perf_trace_buf_nmi;
+char *perf_trace_buf_nmi;
EXPORT_SYMBOL_GPL(perf_trace_buf_nmi);
+typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ;
+
/* Count the events in use (per event id, not per instance) */
static int total_profile_count;
static int ftrace_profile_enable_event(struct ftrace_event_call *event)
{
- struct perf_trace_buf *buf;
+ char *buf;
int ret = -ENOMEM;
if (atomic_inc_return(&event->profile_count))
return 0;
if (!total_profile_count) {
- buf = alloc_percpu(struct perf_trace_buf);
+ buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail_buf;
rcu_assign_pointer(perf_trace_buf, buf);
- buf = alloc_percpu(struct perf_trace_buf);
+ buf = (char *)alloc_percpu(perf_trace_t);
if (!buf)
goto fail_buf_nmi;
@@ -79,7 +81,7 @@ int ftrace_profile_enable(int event_id)
static void ftrace_profile_disable_event(struct ftrace_event_call *event)
{
- struct perf_trace_buf *buf, *nmi_buf;
+ char *buf, *nmi_buf;
if (!atomic_add_negative(-1, &event->profile_count))
return;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3696476f307d..79ce6a2bd74f 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1208,11 +1208,12 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
struct ftrace_event_call *call = &tp->call;
struct kprobe_trace_entry *entry;
- struct perf_trace_buf *trace_buf;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
unsigned long irq_flags;
+ char *trace_buf;
char *raw_data;
+ int rctx;
pc = preempt_count();
__size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1227,6 +1228,11 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
* This also protects the rcu read side
*/
local_irq_save(irq_flags);
+
+ rctx = perf_swevent_get_recursion_context();
+ if (rctx < 0)
+ goto end_recursion;
+
__cpu = smp_processor_id();
if (in_nmi())
@@ -1237,18 +1243,7 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
if (!trace_buf)
goto end;
- trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
- if (trace_buf->recursion++)
- goto end_recursion;
-
- /*
- * Make recursion update visible before entering perf_tp_event
- * so that we protect from perf recursions.
- */
- barrier();
-
- raw_data = trace_buf->buf;
+ raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1263,9 +1258,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp,
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ip, 1, entry, size);
-end_recursion:
- trace_buf->recursion--;
end:
+ perf_swevent_put_recursion_context(rctx);
+end_recursion:
local_irq_restore(irq_flags);
return 0;
@@ -1278,11 +1273,12 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
struct ftrace_event_call *call = &tp->call;
struct kretprobe_trace_entry *entry;
- struct perf_trace_buf *trace_buf;
struct trace_entry *ent;
int size, __size, i, pc, __cpu;
unsigned long irq_flags;
+ char *trace_buf;
char *raw_data;
+ int rctx;
pc = preempt_count();
__size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args);
@@ -1297,6 +1293,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
* This also protects the rcu read side
*/
local_irq_save(irq_flags);
+
+ rctx = perf_swevent_get_recursion_context();
+ if (rctx < 0)
+ goto end_recursion;
+
__cpu = smp_processor_id();
if (in_nmi())
@@ -1307,18 +1308,7 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
if (!trace_buf)
goto end;
- trace_buf = per_cpu_ptr(trace_buf, __cpu);
-
- if (trace_buf->recursion++)
- goto end_recursion;
-
- /*
- * Make recursion update visible before entering perf_tp_event
- * so that we protect from perf recursions.
- */
- barrier();
-
- raw_data = trace_buf->buf;
+ raw_data = per_cpu_ptr(trace_buf, __cpu);
/* Zero dead bytes from alignment to avoid buffer leak to userspace */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -1334,9 +1324,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri,
entry->args[i] = call_fetch(&tp->args[i].fetch, regs);
perf_tp_event(call->id, entry->ret_ip, 1, entry, size);
-end_recursion:
- trace_buf->recursion--;
end:
+ perf_swevent_put_recursion_context(rctx);
+end_recursion:
local_irq_restore(irq_flags);
return 0;
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
new file mode 100644
index 000000000000..11935b53a6cb
--- /dev/null
+++ b/kernel/trace/trace_ksym.c
@@ -0,0 +1,554 @@
+/*
+ * trace_ksym.c - Kernel Symbol Tracer
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/debugfs.h>
+#include <linux/ftrace.h>
+#include <linux/module.h>
+#include <linux/fs.h>
+
+#include "trace_output.h"
+#include "trace_stat.h"
+#include "trace.h"
+
+#include <linux/hw_breakpoint.h>
+#include <asm/hw_breakpoint.h>
+
+/*
+ * For now, let us restrict the no. of symbols traced simultaneously to number
+ * of available hardware breakpoint registers.
+ */
+#define KSYM_TRACER_MAX HBP_NUM
+
+#define KSYM_TRACER_OP_LEN 3 /* rw- */
+
+struct trace_ksym {
+ struct perf_event **ksym_hbp;
+ unsigned long ksym_addr;
+ int type;
+ int len;
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ unsigned long counter;
+#endif
+ struct hlist_node ksym_hlist;
+};
+
+static struct trace_array *ksym_trace_array;
+
+static unsigned int ksym_filter_entry_count;
+static unsigned int ksym_tracing_enabled;
+
+static HLIST_HEAD(ksym_filter_head);
+
+static DEFINE_MUTEX(ksym_tracer_mutex);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+
+#define MAX_UL_INT 0xffffffff
+
+void ksym_collect_stats(unsigned long hbp_hit_addr)
+{
+ struct hlist_node *node;
+ struct trace_ksym *entry;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
+ if ((entry->ksym_addr == hbp_hit_addr) &&
+ (entry->counter <= MAX_UL_INT)) {
+ entry->counter++;
+ break;
+ }
+ }
+ rcu_read_unlock();
+}
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+void ksym_hbp_handler(struct perf_event *hbp, void *data)
+{
+ struct ring_buffer_event *event;
+ struct ksym_trace_entry *entry;
+ struct pt_regs *regs = data;
+ struct ring_buffer *buffer;
+ int pc;
+
+ if (!ksym_tracing_enabled)
+ return;
+
+ buffer = ksym_trace_array->buffer;
+
+ pc = preempt_count();
+
+ event = trace_buffer_lock_reserve(buffer, TRACE_KSYM,
+ sizeof(*entry), 0, pc);
+ if (!event)
+ return;
+
+ entry = ring_buffer_event_data(event);
+ entry->ip = instruction_pointer(regs);
+ entry->type = hw_breakpoint_type(hbp);
+ entry->addr = hw_breakpoint_addr(hbp);
+ strlcpy(entry->cmd, current->comm, TASK_COMM_LEN);
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+ ksym_collect_stats(hw_breakpoint_addr(hbp));
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
+
+ trace_buffer_unlock_commit(buffer, event, 0, pc);
+}
+
+/* Valid access types are represented as
+ *
+ * rw- : Set Read/Write Access Breakpoint
+ * -w- : Set Write Access Breakpoint
+ * --- : Clear Breakpoints
+ * --x : Set Execution Break points (Not available yet)
+ *
+ */
+static int ksym_trace_get_access_type(char *str)
+{
+ int access = 0;
+
+ if (str[0] == 'r')
+ access |= HW_BREAKPOINT_R;
+
+ if (str[1] == 'w')
+ access |= HW_BREAKPOINT_W;
+
+ if (str[2] == 'x')
+ access |= HW_BREAKPOINT_X;
+
+ switch (access) {
+ case HW_BREAKPOINT_R:
+ case HW_BREAKPOINT_W:
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ return access;
+ default:
+ return -EINVAL;
+ }
+}
+
+/*
+ * There can be several possible malformed requests and we attempt to capture
+ * all of them. We enumerate some of the rules
+ * 1. We will not allow kernel symbols with ':' since it is used as a delimiter.
+ * i.e. multiple ':' symbols disallowed. Possible uses are of the form
+ * <module>:<ksym_name>:<op>.
+ * 2. No delimiter symbol ':' in the input string
+ * 3. Spurious operator symbols or symbols not in their respective positions
+ * 4. <ksym_name>:--- i.e. clear breakpoint request when ksym_name not in file
+ * 5. Kernel symbol not a part of /proc/kallsyms
+ * 6. Duplicate requests
+ */
+static int parse_ksym_trace_str(char *input_string, char **ksymname,
+ unsigned long *addr)
+{
+ int ret;
+
+ *ksymname = strsep(&input_string, ":");
+ *addr = kallsyms_lookup_name(*ksymname);
+
+ /* Check for malformed request: (2), (1) and (5) */
+ if ((!input_string) ||
+ (strlen(input_string) != KSYM_TRACER_OP_LEN) ||
+ (*addr == 0))
+ return -EINVAL;;
+
+ ret = ksym_trace_get_access_type(input_string);
+
+ return ret;
+}
+
+int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
+{
+ struct trace_ksym *entry;
+ int ret = -ENOMEM;
+
+ if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
+ printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
+ " new requests for tracing can be accepted now.\n",
+ KSYM_TRACER_MAX);
+ return -ENOSPC;
+ }
+
+ entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->type = op;
+ entry->ksym_addr = addr;
+ entry->len = HW_BREAKPOINT_LEN_4;
+
+ ret = -EAGAIN;
+ entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr,
+ entry->len, entry->type,
+ ksym_hbp_handler, true);
+ if (IS_ERR(entry->ksym_hbp)) {
+ entry->ksym_hbp = NULL;
+ ret = PTR_ERR(entry->ksym_hbp);
+ }
+
+ if (!entry->ksym_hbp) {
+ printk(KERN_INFO "ksym_tracer request failed. Try again"
+ " later!!\n");
+ goto err;
+ }
+
+ hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
+ ksym_filter_entry_count++;
+
+ return 0;
+
+err:
+ kfree(entry);
+
+ return ret;
+}
+
+static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ struct trace_seq *s;
+ ssize_t cnt = 0;
+ int ret;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+ trace_seq_init(s);
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr);
+ if (entry->type == HW_BREAKPOINT_R)
+ ret = trace_seq_puts(s, "r--\n");
+ else if (entry->type == HW_BREAKPOINT_W)
+ ret = trace_seq_puts(s, "-w-\n");
+ else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R))
+ ret = trace_seq_puts(s, "rw-\n");
+ WARN_ON_ONCE(!ret);
+ }
+
+ cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+
+ mutex_unlock(&ksym_tracer_mutex);
+
+ kfree(s);
+
+ return cnt;
+}
+
+static void __ksym_trace_reset(void)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node, *node1;
+
+ mutex_lock(&ksym_tracer_mutex);
+ hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
+ ksym_hlist) {
+ unregister_wide_hw_breakpoint(entry->ksym_hbp);
+ ksym_filter_entry_count--;
+ hlist_del_rcu(&(entry->ksym_hlist));
+ synchronize_rcu();
+ kfree(entry);
+ }
+ mutex_unlock(&ksym_tracer_mutex);
+}
+
+static ssize_t ksym_trace_filter_write(struct file *file,
+ const char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct trace_ksym *entry;
+ struct hlist_node *node;
+ char *input_string, *ksymname = NULL;
+ unsigned long ksym_addr = 0;
+ int ret, op, changed = 0;
+
+ input_string = kzalloc(count + 1, GFP_KERNEL);
+ if (!input_string)
+ return -ENOMEM;
+
+ if (copy_from_user(input_string, buffer, count)) {
+ kfree(input_string);
+ return -EFAULT;
+ }
+ input_string[count] = '\0';
+
+ strstrip(input_string);
+
+ /*
+ * Clear all breakpoints if:
+ * 1: echo > ksym_trace_filter
+ * 2: echo 0 > ksym_trace_filter
+ * 3: echo "*:---" > ksym_trace_filter
+ */
+ if (!input_string[0] || !strcmp(input_string, "0") ||
+ !strcmp(input_string, "*:---")) {
+ __ksym_trace_reset();
+ kfree(input_string);
+ return count;
+ }
+
+ ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
+ if (ret < 0) {
+ kfree(input_string);
+ return ret;
+ }
+
+ mutex_lock(&ksym_tracer_mutex);
+
+ ret = -EINVAL;
+ hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
+ if (entry->ksym_addr == ksym_addr) {
+ /* Check for malformed request: (6) */
+ if (entry->type != op)
+ changed = 1;
+ else
+ goto out;
+ break;
+ }
+ }
+ if (changed) {
+ unregister_wide_hw_breakpoint(entry->ksym_hbp);
+ entry->type = op;
+ if (op > 0) {
+ entry->ksym_hbp =
+ register_wide_hw_breakpoint(entry->ksym_addr,
+ entry->len, entry->type,
+ ksym_hbp_handler, true);
+ if (IS_ERR(entry->ksym_hbp))
+ entry->ksym_hbp = NULL;
+ if (!entry->ksym_hbp)
+ goto out;
+ }
+ ksym_filter_entry_count--;
+ hlist_del_rcu(&(entry->ksym_hlist));
+ synchronize_rcu();
+ kfree(entry);
+ ret = 0;
+ goto out;
+ } else {
+ /* Check for malformed request: (4) */
+ if (op == 0)
+ goto out;
+ ret = process_new_ksym_entry(ksymname, op, ksym_addr);
+ }
+out:
+ mutex_unlock(&ksym_tracer_mutex);
+
+ kfree(input_string);
+
+ if (!ret)
+ ret = count;
+ return ret;
+}
+
+static const struct file_operations ksym_tracing_fops = {
+ .open = tracing_open_generic,
+ .read = ksym_trace_filter_read,
+ .write = ksym_trace_filter_write,
+};
+
+static void ksym_trace_reset(struct trace_array *tr)
+{
+ ksym_tracing_enabled = 0;
+ __ksym_trace_reset();
+}
+
+static int ksym_trace_init(struct trace_array *tr)
+{
+ int cpu, ret = 0;
+
+ for_each_online_cpu(cpu)
+ tracing_reset(tr, cpu);
+ ksym_tracing_enabled = 1;
+ ksym_trace_array = tr;
+
+ return ret;
+}
+
+static void ksym_trace_print_header(struct seq_file *m)
+{
+ seq_puts(m,
+ "# TASK-PID CPU# Symbol "
+ "Type Function\n");
+ seq_puts(m,
+ "# | | | "
+ " | |\n");
+}
+
+static enum print_line_t ksym_trace_output(struct trace_iterator *iter)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct ksym_trace_entry *field;
+ char str[KSYM_SYMBOL_LEN];
+ int ret;
+
+ if (entry->type != TRACE_KSYM)
+ return TRACE_TYPE_UNHANDLED;
+
+ trace_assign_type(field, entry);
+
+ ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd,
+ entry->pid, iter->cpu, (char *)field->addr);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ switch (field->type) {
+ case HW_BREAKPOINT_R:
+ ret = trace_seq_printf(s, " R ");
+ break;
+ case HW_BREAKPOINT_W:
+ ret = trace_seq_printf(s, " W ");
+ break;
+ case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+ ret = trace_seq_printf(s, " RW ");
+ break;
+ default:
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ sprint_symbol(str, field->ip);
+ ret = trace_seq_printf(s, "%s\n", str);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ return TRACE_TYPE_HANDLED;
+}
+
+struct tracer ksym_tracer __read_mostly =
+{
+ .name = "ksym_tracer",
+ .init = ksym_trace_init,
+ .reset = ksym_trace_reset,
+#ifdef CONFIG_FTRACE_SELFTEST
+ .selftest = trace_selftest_startup_ksym,
+#endif
+ .print_header = ksym_trace_print_header,
+ .print_line = ksym_trace_output
+};
+
+__init static int init_ksym_trace(void)
+{
+ struct dentry *d_tracer;
+ struct dentry *entry;
+
+ d_tracer = tracing_init_dentry();
+ ksym_filter_entry_count = 0;
+
+ entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer,
+ NULL, &ksym_tracing_fops);
+ if (!entry)
+ pr_warning("Could not create debugfs "
+ "'ksym_trace_filter' file\n");
+
+ return register_tracer(&ksym_tracer);
+}
+device_initcall(init_ksym_trace);
+
+
+#ifdef CONFIG_PROFILE_KSYM_TRACER
+static int ksym_tracer_stat_headers(struct seq_file *m)
+{
+ seq_puts(m, " Access Type ");
+ seq_puts(m, " Symbol Counter\n");
+ seq_puts(m, " ----------- ");
+ seq_puts(m, " ------ -------\n");
+ return 0;
+}
+
+static int ksym_tracer_stat_show(struct seq_file *m, void *v)
+{
+ struct hlist_node *stat = v;
+ struct trace_ksym *entry;
+ int access_type = 0;
+ char fn_name[KSYM_NAME_LEN];
+
+ entry = hlist_entry(stat, struct trace_ksym, ksym_hlist);
+
+ access_type = entry->type;
+
+ switch (access_type) {
+ case HW_BREAKPOINT_R:
+ seq_puts(m, " R ");
+ break;
+ case HW_BREAKPOINT_W:
+ seq_puts(m, " W ");
+ break;
+ case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+ seq_puts(m, " RW ");
+ break;
+ default:
+ seq_puts(m, " NA ");
+ }
+
+ if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0)
+ seq_printf(m, " %-36s", fn_name);
+ else
+ seq_printf(m, " %-36s", "<NA>");
+ seq_printf(m, " %15lu\n", entry->counter);
+
+ return 0;
+}
+
+static void *ksym_tracer_stat_start(struct tracer_stat *trace)
+{
+ return ksym_filter_head.first;
+}
+
+static void *
+ksym_tracer_stat_next(void *v, int idx)
+{
+ struct hlist_node *stat = v;
+
+ return stat->next;
+}
+
+static struct tracer_stat ksym_tracer_stats = {
+ .name = "ksym_tracer",
+ .stat_start = ksym_tracer_stat_start,
+ .stat_next = ksym_tracer_stat_next,
+ .stat_headers = ksym_tracer_stat_headers,
+ .stat_show = ksym_tracer_stat_show
+};
+
+__init static int ksym_tracer_stat_init(void)
+{
+ int ret;
+
+ ret = register_stat_tracer(&ksym_tracer_stats);
+ if (ret) {
+ printk(KERN_WARNING "Warning: could not register "
+ "ksym tracer stats\n");
+ return 1;
+ }
+
+ return 0;
+}
+fs_initcall(ksym_tracer_stat_init);
+#endif /* CONFIG_PROFILE_KSYM_TRACER */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index d2cdbabb4ead..dc98309e839a 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -17,6 +17,7 @@ static inline int trace_valid_entry(struct trace_entry *entry)
case TRACE_GRAPH_ENT:
case TRACE_GRAPH_RET:
case TRACE_HW_BRANCHES:
+ case TRACE_KSYM:
return 1;
}
return 0;
@@ -808,3 +809,57 @@ trace_selftest_startup_hw_branches(struct tracer *trace,
return ret;
}
#endif /* CONFIG_HW_BRANCH_TRACER */
+
+#ifdef CONFIG_KSYM_TRACER
+static int ksym_selftest_dummy;
+
+int
+trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr)
+{
+ unsigned long count;
+ int ret;
+
+ /* start the tracing */
+ ret = tracer_init(trace, tr);
+ if (ret) {
+ warn_failed_init_tracer(trace, ret);
+ return ret;
+ }
+
+ ksym_selftest_dummy = 0;
+ /* Register the read-write tracing request */
+
+ ret = process_new_ksym_entry("ksym_selftest_dummy",
+ HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+ (unsigned long)(&ksym_selftest_dummy));
+
+ if (ret < 0) {
+ printk(KERN_CONT "ksym_trace read-write startup test failed\n");
+ goto ret_path;
+ }
+ /* Perform a read and a write operation over the dummy variable to
+ * trigger the tracer
+ */
+ if (ksym_selftest_dummy == 0)
+ ksym_selftest_dummy++;
+
+ /* stop the tracing. */
+ tracing_stop();
+ /* check the trace buffer */
+ ret = trace_test_buffer(tr, &count);
+ trace->reset(tr);
+ tracing_start();
+
+ /* read & write operations - one each is performed on the dummy variable
+ * triggering two entries in the trace buffer
+ */
+ if (!ret && count != 2) {
+ printk(KERN_CONT "Ksym tracer startup test failed");
+ ret = -1;
+ }
+
+ret_path:
+ return ret;
+}
+#endif /* CONFIG_KSYM_TRACER */
+
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 51213b0aa81b..9189cbe86079 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -477,11 +477,12 @@ static int sys_prof_refcount_exit;
static void prof_syscall_enter(struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
- struct perf_trace_buf *trace_buf;
struct syscall_trace_enter *rec;
unsigned long flags;
+ char *trace_buf;
char *raw_data;
int syscall_nr;
+ int rctx;
int size;
int cpu;
@@ -505,28 +506,18 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
+ rctx = perf_swevent_get_recursion_context();
+ if (rctx < 0)
+ goto end_recursion;
+
cpu = smp_processor_id();
- if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
- else
- trace_buf = rcu_dereference(perf_trace_buf);
+ trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
- trace_buf = per_cpu_ptr(trace_buf, cpu);
-
- if (trace_buf->recursion++)
- goto end_recursion;
-
- /*
- * Make recursion update visible before entering perf_tp_event
- * so that we protect from perf recursions.
- */
- barrier();
-
- raw_data = trace_buf->buf;
+ raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -539,9 +530,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id)
(unsigned long *)&rec->args);
perf_tp_event(sys_data->enter_id, 0, 1, rec, size);
-end_recursion:
- trace_buf->recursion--;
end:
+ perf_swevent_put_recursion_context(rctx);
+end_recursion:
local_irq_restore(flags);
}
@@ -588,10 +579,11 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
- struct perf_trace_buf *trace_buf;
unsigned long flags;
int syscall_nr;
+ char *trace_buf;
char *raw_data;
+ int rctx;
int size;
int cpu;
@@ -617,28 +609,19 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
/* Protect the per cpu buffer, begin the rcu read side */
local_irq_save(flags);
+
+ rctx = perf_swevent_get_recursion_context();
+ if (rctx < 0)
+ goto end_recursion;
+
cpu = smp_processor_id();
- if (in_nmi())
- trace_buf = rcu_dereference(perf_trace_buf_nmi);
- else
- trace_buf = rcu_dereference(perf_trace_buf);
+ trace_buf = rcu_dereference(perf_trace_buf);
if (!trace_buf)
goto end;
- trace_buf = per_cpu_ptr(trace_buf, cpu);
-
- if (trace_buf->recursion++)
- goto end_recursion;
-
- /*
- * Make recursion update visible before entering perf_tp_event
- * so that we protect from perf recursions.
- */
- barrier();
-
- raw_data = trace_buf->buf;
+ raw_data = per_cpu_ptr(trace_buf, cpu);
/* zero the dead bytes from align to not leak stack to user */
*(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL;
@@ -652,9 +635,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret)
perf_tp_event(sys_data->exit_id, 0, 1, rec, size);
-end_recursion:
- trace_buf->recursion--;
end:
+ perf_swevent_put_recursion_context(rctx);
+end_recursion:
local_irq_restore(flags);
}
diff --git a/samples/Kconfig b/samples/Kconfig
index b92bde3c6a89..e4be84ac3d38 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -40,5 +40,11 @@ config SAMPLE_KRETPROBES
default m
depends on SAMPLE_KPROBES && KRETPROBES
+config SAMPLE_HW_BREAKPOINT
+ tristate "Build kernel hardware breakpoint examples -- loadable module only"
+ depends on HAVE_HW_BREAKPOINT && m
+ help
+ This builds kernel hardware breakpoint example modules.
+
endif # SAMPLES
diff --git a/samples/Makefile b/samples/Makefile
index 43343a03b1f4..0f15e6d77fd6 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -1,3 +1,4 @@
# Makefile for Linux samples code
-obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/
+obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \
+ hw_breakpoint/
diff --git a/samples/hw_breakpoint/Makefile b/samples/hw_breakpoint/Makefile
new file mode 100644
index 000000000000..0f5c31c2fc47
--- /dev/null
+++ b/samples/hw_breakpoint/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o
diff --git a/samples/hw_breakpoint/data_breakpoint.c b/samples/hw_breakpoint/data_breakpoint.c
new file mode 100644
index 000000000000..95063818bcf4
--- /dev/null
+++ b/samples/hw_breakpoint/data_breakpoint.c
@@ -0,0 +1,90 @@
+/*
+ * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * usage: insmod data_breakpoint.ko ksym=<ksym_name>
+ *
+ * This file is a kernel module that places a breakpoint over ksym_name kernel
+ * variable using Hardware Breakpoint register. The corresponding handler which
+ * prints a backtrace is invoked everytime a write operation is performed on
+ * that variable.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ *
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ */
+#include <linux/module.h> /* Needed by all modules */
+#include <linux/kernel.h> /* Needed for KERN_INFO */
+#include <linux/init.h> /* Needed for the macros */
+#include <linux/kallsyms.h>
+
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+
+struct perf_event **sample_hbp;
+
+static char ksym_name[KSYM_NAME_LEN] = "pid_max";
+module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO);
+MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any"
+ " write operations on the kernel symbol");
+
+static void sample_hbp_handler(struct perf_event *temp, void *data)
+{
+ printk(KERN_INFO "%s value is changed\n", ksym_name);
+ dump_stack();
+ printk(KERN_INFO "Dump stack from sample_hbp_handler\n");
+}
+
+static int __init hw_break_module_init(void)
+{
+ int ret;
+ unsigned long addr;
+
+ addr = kallsyms_lookup_name(ksym_name);
+
+ sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4,
+ HW_BREAKPOINT_W | HW_BREAKPOINT_R,
+ sample_hbp_handler, true);
+ if (IS_ERR(sample_hbp)) {
+ ret = PTR_ERR(sample_hbp);
+ goto fail;
+ } else if (!sample_hbp) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name);
+
+ return 0;
+
+fail:
+ printk(KERN_INFO "Breakpoint registration failed\n");
+
+ return ret;
+}
+
+static void __exit hw_break_module_exit(void)
+{
+ unregister_wide_hw_breakpoint(sample_hbp);
+ printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name);
+}
+
+module_init(hw_break_module_init);
+module_exit(hw_break_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("K.Prasad");
+MODULE_DESCRIPTION("ksym breakpoint");
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 000000000000..44b0ce35c28a
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,44 @@
+perf-kmem(1)
+==============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory(slab) properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record} [<options>]
+
+DESCRIPTION
+-----------
+There's two variants of perf kmem:
+
+ 'perf kmem record <command>' to record the kmem events
+ of an arbitrary workload.
+
+ 'perf kmem' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+ Select the input file (default: perf.data)
+
+--stat=<caller|alloc>::
+ Select per callsite or per allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+ Sort the output (default: frag,hit,bytes)
+
+-l <num>::
+--line=<num>::
+ Print n lines only
+
+--raw-ip::
+ Print raw ip instead of symbol
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 0ff23de9e453..fc46c0b40f6e 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -26,11 +26,19 @@ OPTIONS
-e::
--event=::
- Select the PMU event. Selection can be a symbolic event name
- (use 'perf list' to list all events) or a raw PMU
- event (eventsel+umask) in the form of rNNN where NNN is a
- hexadecimal event descriptor.
+ Select the PMU event. Selection can be:
+ - a symbolic event name (use 'perf list' to list all events)
+
+ - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+ - a hardware breakpoint event in the form of '\mem:addr[:access]'
+ where addr is the address in memory you want to break in.
+ Access is the memory access type (read, write, execute) it can
+ be passed as follows: '\mem:addr[:[r][w][x]]'.
+ If you want to profile read-write accesses in 0x1000, just set
+ 'mem:0x1000:rw'.
-a::
System-wide collection.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 53e663a5fa2f..f1537a94a05f 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -2,6 +2,7 @@
all::
# Define V=1 to have a more verbose compile.
+# Define V=2 to have an even more verbose compile.
#
# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf()
# or vsnprintf() return -1 instead of number of characters which would
@@ -147,6 +148,8 @@ all::
# broken, or spawning external process is slower than built-in grep perf has).
#
# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
@$(SHELL_PATH) util/PERF-VERSION-GEN
@@ -159,22 +162,6 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not')
uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not')
uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not')
-#
-# Add -m32 for cross-builds:
-#
-ifdef NO_64BIT
- MBITS := -m32
-else
- #
- # If we're on a 64-bit kernel (except ia64), use -m64:
- #
- ifneq ($(uname_M),ia64)
- ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M))
- MBITS := -m64
- endif
- endif
-endif
-
# CFLAGS and LDFLAGS are for the users to override from the command line.
#
@@ -211,7 +198,7 @@ ifndef PERF_DEBUG
CFLAGS_OPTIMIZE = -O6
endif
-CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS)
+CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
EXTLIBS = -lpthread -lrt -lelf -lm
ALL_CFLAGS = $(CFLAGS)
ALL_LDFLAGS = $(LDFLAGS)
@@ -263,7 +250,7 @@ PTHREAD_LIBS = -lpthread
# explicitly what architecture to check for. Fix this up for yours..
SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
-ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null >/dev/null 2>&1 && echo y"), y)
+ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y)
CFLAGS := $(CFLAGS) -fstack-protector-all
endif
@@ -445,9 +432,15 @@ BUILTIN_OBJS += builtin-timechart.o
BUILTIN_OBJS += builtin-top.o
BUILTIN_OBJS += builtin-trace.o
BUILTIN_OBJS += builtin-probe.o
+BUILTIN_OBJS += builtin-kmem.o
PERFLIBS = $(LIB_FILE)
+ifeq ($(V), 2)
+ QUIET_STDERR = ">/dev/null"
+else
+ QUIET_STDERR = ">/dev/null 2>&1"
+endif
#
# Platform specific tweaks
#
@@ -475,19 +468,19 @@ ifeq ($(uname_S),Darwin)
PTHREAD_LIBS =
endif
-ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
-ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
- msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]);
+ifeq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+ifneq ($(shell sh -c "(echo '\#include <gnu/libc-version.h>'; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
+ msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
endif
- ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
+ ifneq ($(shell sh -c "(echo '\#include <libelf.h>'; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
BASIC_CFLAGS += -DLIBELF_NO_MMAP
endif
else
msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]);
endif
-ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y)
+ifneq ($(shell sh -c "(echo '\#include <libdwarf/dwarf.h>'; echo '\#include <libdwarf/libdwarf.h>'; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y)
msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231);
BASIC_CFLAGS += -DNO_LIBDWARF
else
@@ -499,25 +492,25 @@ endif
ifdef NO_DEMANGLE
BASIC_CFLAGS += -DNO_DEMANGLE
else
- has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd > /dev/null 2>&1 && echo y")
+ has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y")
ifeq ($(has_bfd),y)
EXTLIBS += -lbfd
else
- has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty > /dev/null 2>&1 && echo y")
+ has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y")
ifeq ($(has_bfd_iberty),y)
EXTLIBS += -lbfd -liberty
else
- has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
+ has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y")
ifeq ($(has_bfd_iberty_z),y)
EXTLIBS += -lbfd -liberty -lz
else
- has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty > /dev/null 2>&1 && echo y")
+ has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y")
ifeq ($(has_cplus_demangle),y)
EXTLIBS += -liberty
BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
else
- msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
+ msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
BASIC_CFLAGS += -DNO_DEMANGLE
endif
endif
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 77d50a6d6802..6b13a1ecf1e7 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -33,9 +33,11 @@ static int input;
static int full_paths;
static int print_line;
+static bool use_modules;
static unsigned long page_size;
static unsigned long mmap_window = 32;
+const char *vmlinux_name;
struct sym_hist {
u64 sum;
@@ -156,7 +158,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
level = 'k';
- sym = kernel_maps__find_symbol(ip, &map);
+ sym = kernel_maps__find_symbol(ip, &map, symbol_filter);
dump_printf(" ...... dso: %s\n",
map ? map->dso->long_name : "<not found>");
} else if (event->header.misc & PERF_RECORD_MISC_USER) {
@@ -636,9 +638,9 @@ static int __cmd_annotate(void)
exit(0);
}
- if (load_kernel(symbol_filter) < 0) {
- perror("failed to load kernel symbols");
- return EXIT_FAILURE;
+ if (kernel_maps__init(vmlinux_name, true, use_modules) < 0) {
+ pr_err("failed to create kernel maps for symbol resolution\b");
+ return -1;
}
remap:
@@ -742,7 +744,7 @@ static const struct option options[] = {
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
- OPT_BOOLEAN('m', "modules", &modules,
+ OPT_BOOLEAN('m', "modules", &use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('l', "print-line", &print_line,
"print matching source lines (may be slow)"),
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
new file mode 100644
index 000000000000..173d6db42ecb
--- /dev/null
+++ b/tools/perf/builtin-kmem.c
@@ -0,0 +1,833 @@
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+#include "util/data_map.h"
+
+#include <linux/rbtree.h>
+
+struct alloc_stat;
+typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
+
+static char const *input_name = "perf.data";
+
+static struct perf_header *header;
+static u64 sample_type;
+
+static int alloc_flag;
+static int caller_flag;
+
+static int alloc_lines = -1;
+static int caller_lines = -1;
+
+static bool raw_ip;
+
+static char default_sort_order[] = "frag,hit,bytes";
+
+static char *cwd;
+static int cwdlen;
+
+static int *cpunode_map;
+static int max_cpu_num;
+
+struct alloc_stat {
+ u64 call_site;
+ u64 ptr;
+ u64 bytes_req;
+ u64 bytes_alloc;
+ u32 hit;
+ u32 pingpong;
+
+ short alloc_cpu;
+
+ struct rb_node node;
+};
+
+static struct rb_root root_alloc_stat;
+static struct rb_root root_alloc_sorted;
+static struct rb_root root_caller_stat;
+static struct rb_root root_caller_sorted;
+
+static unsigned long total_requested, total_allocated;
+static unsigned long nr_allocs, nr_cross_allocs;
+
+struct raw_event_sample {
+ u32 size;
+ char data[0];
+};
+
+#define PATH_SYS_NODE "/sys/devices/system/node"
+
+static void init_cpunode_map(void)
+{
+ FILE *fp;
+ int i;
+
+ fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
+ if (!fp) {
+ max_cpu_num = 4096;
+ return;
+ }
+
+ if (fscanf(fp, "%d", &max_cpu_num) < 1)
+ die("Failed to read 'kernel_max' from sysfs");
+ max_cpu_num++;
+
+ cpunode_map = calloc(max_cpu_num, sizeof(int));
+ if (!cpunode_map)
+ die("calloc");
+ for (i = 0; i < max_cpu_num; i++)
+ cpunode_map[i] = -1;
+ fclose(fp);
+}
+
+static void setup_cpunode_map(void)
+{
+ struct dirent *dent1, *dent2;
+ DIR *dir1, *dir2;
+ unsigned int cpu, mem;
+ char buf[PATH_MAX];
+
+ init_cpunode_map();
+
+ dir1 = opendir(PATH_SYS_NODE);
+ if (!dir1)
+ return;
+
+ while (true) {
+ dent1 = readdir(dir1);
+ if (!dent1)
+ break;
+
+ if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ continue;
+
+ snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
+ dir2 = opendir(buf);
+ if (!dir2)
+ continue;
+ while (true) {
+ dent2 = readdir(dir2);
+ if (!dent2)
+ break;
+ if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ continue;
+ cpunode_map[cpu] = mem;
+ }
+ }
+}
+
+static int
+process_comm_event(event_t *event, unsigned long offset, unsigned long head)
+{
+ struct thread *thread = threads__findnew(event->comm.pid);
+
+ dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n",
+ (void *)(offset + head),
+ (void *)(long)(event->header.size),
+ event->comm.comm, event->comm.pid);
+
+ if (thread == NULL ||
+ thread__set_comm(thread, event->comm.comm)) {
+ dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+ int bytes_req, int bytes_alloc, int cpu)
+{
+ struct rb_node **node = &root_alloc_stat.rb_node;
+ struct rb_node *parent = NULL;
+ struct alloc_stat *data = NULL;
+
+ while (*node) {
+ parent = *node;
+ data = rb_entry(*node, struct alloc_stat, node);
+
+ if (ptr > data->ptr)
+ node = &(*node)->rb_right;
+ else if (ptr < data->ptr)
+ node = &(*node)->rb_left;
+ else
+ break;
+ }
+
+ if (data && data->ptr == ptr) {
+ data->hit++;
+ data->bytes_req += bytes_req;
+ data->bytes_alloc += bytes_req;
+ } else {
+ data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
+ data->ptr = ptr;
+ data->pingpong = 0;
+ data->hit = 1;
+ data->bytes_req = bytes_req;
+ data->bytes_alloc = bytes_alloc;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &root_alloc_stat);
+ }
+ data->call_site = call_site;
+ data->alloc_cpu = cpu;
+}
+
+static void insert_caller_stat(unsigned long call_site,
+ int bytes_req, int bytes_alloc)
+{
+ struct rb_node **node = &root_caller_stat.rb_node;
+ struct rb_node *parent = NULL;
+ struct alloc_stat *data = NULL;
+
+ while (*node) {
+ parent = *node;
+ data = rb_entry(*node, struct alloc_stat, node);
+
+ if (call_site > data->call_site)
+ node = &(*node)->rb_right;
+ else if (call_site < data->call_site)
+ node = &(*node)->rb_left;
+ else
+ break;
+ }
+
+ if (data && data->call_site == call_site) {
+ data->hit++;
+ data->bytes_req += bytes_req;
+ data->bytes_alloc += bytes_req;
+ } else {
+ data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
+ data->call_site = call_site;
+ data->pingpong = 0;
+ data->hit = 1;
+ data->bytes_req = bytes_req;
+ data->bytes_alloc = bytes_alloc;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &root_caller_stat);
+ }
+}
+
+static void process_alloc_event(struct raw_event_sample *raw,
+ struct event *event,
+ int cpu,
+ u64 timestamp __used,
+ struct thread *thread __used,
+ int node)
+{
+ unsigned long call_site;
+ unsigned long ptr;
+ int bytes_req;
+ int bytes_alloc;
+ int node1, node2;
+
+ ptr = raw_field_value(event, "ptr", raw->data);
+ call_site = raw_field_value(event, "call_site", raw->data);
+ bytes_req = raw_field_value(event, "bytes_req", raw->data);
+ bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
+
+ insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
+ insert_caller_stat(call_site, bytes_req, bytes_alloc);
+
+ total_requested += bytes_req;
+ total_allocated += bytes_alloc;
+
+ if (node) {
+ node1 = cpunode_map[cpu];
+ node2 = raw_field_value(event, "node", raw->data);
+ if (node1 != node2)
+ nr_cross_allocs++;
+ }
+ nr_allocs++;
+}
+
+static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
+static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+ unsigned long call_site,
+ struct rb_root *root,
+ sort_fn_t sort_fn)
+{
+ struct rb_node *node = root->rb_node;
+ struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+ while (node) {
+ struct alloc_stat *data;
+ int cmp;
+
+ data = rb_entry(node, struct alloc_stat, node);
+
+ cmp = sort_fn(&key, data);
+ if (cmp < 0)
+ node = node->rb_left;
+ else if (cmp > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void process_free_event(struct raw_event_sample *raw,
+ struct event *event,
+ int cpu,
+ u64 timestamp __used,
+ struct thread *thread __used)
+{
+ unsigned long ptr;
+ struct alloc_stat *s_alloc, *s_caller;
+
+ ptr = raw_field_value(event, "ptr", raw->data);
+
+ s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+ if (!s_alloc)
+ return;
+
+ if (cpu != s_alloc->alloc_cpu) {
+ s_alloc->pingpong++;
+
+ s_caller = search_alloc_stat(0, s_alloc->call_site,
+ &root_caller_stat, callsite_cmp);
+ assert(s_caller);
+ s_caller->pingpong++;
+ }
+ s_alloc->alloc_cpu = -1;
+}
+
+static void
+process_raw_event(event_t *raw_event __used, void *more_data,
+ int cpu, u64 timestamp, struct thread *thread)
+{
+ struct raw_event_sample *raw = more_data;
+ struct event *event;
+ int type;
+
+ type = trace_parse_common_type(raw->data);
+ event = trace_find_event(type);
+
+ if (!strcmp(event->name, "kmalloc") ||
+ !strcmp(event->name, "kmem_cache_alloc")) {
+ process_alloc_event(raw, event, cpu, timestamp, thread, 0);
+ return;
+ }
+
+ if (!strcmp(event->name, "kmalloc_node") ||
+ !strcmp(event->name, "kmem_cache_alloc_node")) {
+ process_alloc_event(raw, event, cpu, timestamp, thread, 1);
+ return;
+ }
+
+ if (!strcmp(event->name, "kfree") ||
+ !strcmp(event->name, "kmem_cache_free")) {
+ process_free_event(raw, event, cpu, timestamp, thread);
+ return;
+ }
+}
+
+static int
+process_sample_event(event_t *event, unsigned long offset, unsigned long head)
+{
+ u64 ip = event->ip.ip;
+ u64 timestamp = -1;
+ u32 cpu = -1;
+ u64 period = 1;
+ void *more_data = event->ip.__more_data;
+ struct thread *thread = threads__findnew(event->ip.pid);
+
+ if (sample_type & PERF_SAMPLE_TIME) {
+ timestamp = *(u64 *)more_data;
+ more_data += sizeof(u64);
+ }
+
+ if (sample_type & PERF_SAMPLE_CPU) {
+ cpu = *(u32 *)more_data;
+ more_data += sizeof(u32);
+ more_data += sizeof(u32); /* reserved */
+ }
+
+ if (sample_type & PERF_SAMPLE_PERIOD) {
+ period = *(u64 *)more_data;
+ more_data += sizeof(u64);
+ }
+
+ dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
+ (void *)(offset + head),
+ (void *)(long)(event->header.size),
+ event->header.misc,
+ event->ip.pid, event->ip.tid,
+ (void *)(long)ip,
+ (long long)period);
+
+ if (thread == NULL) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+
+ process_raw_event(event, more_data, cpu, timestamp, thread);
+
+ return 0;
+}
+
+static int sample_type_check(u64 type)
+{
+ sample_type = type;
+
+ if (!(sample_type & PERF_SAMPLE_RAW)) {
+ fprintf(stderr,
+ "No trace sample to read. Did you call perf record "
+ "without -R?");
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct perf_file_handler file_handler = {
+ .process_sample_event = process_sample_event,
+ .process_comm_event = process_comm_event,
+ .sample_type_check = sample_type_check,
+};
+
+static int read_events(void)
+{
+ register_idle_thread();
+ register_perf_file_handler(&file_handler);
+
+ return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0,
+ &cwdlen, &cwd);
+}
+
+static double fragmentation(unsigned long n_req, unsigned long n_alloc)
+{
+ if (n_alloc == 0)
+ return 0.0;
+ else
+ return 100.0 - (100.0 * n_req / n_alloc);
+}
+
+static void __print_result(struct rb_root *root, int n_lines, int is_caller)
+{
+ struct rb_node *next;
+
+ printf("%.102s\n", graph_dotted_line);
+ printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
+ printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
+ printf("%.102s\n", graph_dotted_line);
+
+ next = rb_first(root);
+
+ while (next && n_lines--) {
+ struct alloc_stat *data = rb_entry(next, struct alloc_stat,
+ node);
+ struct symbol *sym = NULL;
+ char buf[BUFSIZ];
+ u64 addr;
+
+ if (is_caller) {
+ addr = data->call_site;
+ if (!raw_ip)
+ sym = kernel_maps__find_symbol(addr,
+ NULL, NULL);
+ } else
+ addr = data->ptr;
+
+ if (sym != NULL)
+ snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
+ addr - sym->start);
+ else
+ snprintf(buf, sizeof(buf), "%#Lx", addr);
+ printf(" %-34s |", buf);
+
+ printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n",
+ (unsigned long long)data->bytes_alloc,
+ (unsigned long)data->bytes_alloc / data->hit,
+ (unsigned long long)data->bytes_req,
+ (unsigned long)data->bytes_req / data->hit,
+ (unsigned long)data->hit,
+ (unsigned long)data->pingpong,
+ fragmentation(data->bytes_req, data->bytes_alloc));
+
+ next = rb_next(next);
+ }
+
+ if (n_lines == -1)
+ printf(" ... | ... | ... | ... | ... | ... \n");
+
+ printf("%.102s\n", graph_dotted_line);
+}
+
+static void print_summary(void)
+{
+ printf("\nSUMMARY\n=======\n");
+ printf("Total bytes requested: %lu\n", total_requested);
+ printf("Total bytes allocated: %lu\n", total_allocated);
+ printf("Total bytes wasted on internal fragmentation: %lu\n",
+ total_allocated - total_requested);
+ printf("Internal fragmentation: %f%%\n",
+ fragmentation(total_requested, total_allocated));
+ printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
+}
+
+static void print_result(void)
+{
+ if (caller_flag)
+ __print_result(&root_caller_sorted, caller_lines, 1);
+ if (alloc_flag)
+ __print_result(&root_alloc_sorted, alloc_lines, 0);
+ print_summary();
+}
+
+struct sort_dimension {
+ const char name[20];
+ sort_fn_t cmp;
+ struct list_head list;
+};
+
+static LIST_HEAD(caller_sort);
+static LIST_HEAD(alloc_sort);
+
+static void sort_insert(struct rb_root *root, struct alloc_stat *data,
+ struct list_head *sort_list)
+{
+ struct rb_node **new = &(root->rb_node);
+ struct rb_node *parent = NULL;
+ struct sort_dimension *sort;
+
+ while (*new) {
+ struct alloc_stat *this;
+ int cmp = 0;
+
+ this = rb_entry(*new, struct alloc_stat, node);
+ parent = *new;
+
+ list_for_each_entry(sort, sort_list, list) {
+ cmp = sort->cmp(data, this);
+ if (cmp)
+ break;
+ }
+
+ if (cmp > 0)
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void __sort_result(struct rb_root *root, struct rb_root *root_sorted,
+ struct list_head *sort_list)
+{
+ struct rb_node *node;
+ struct alloc_stat *data;
+
+ for (;;) {
+ node = rb_first(root);
+ if (!node)
+ break;
+
+ rb_erase(node, root);
+ data = rb_entry(node, struct alloc_stat, node);
+ sort_insert(root_sorted, data, sort_list);
+ }
+}
+
+static void sort_result(void)
+{
+ __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
+ __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
+}
+
+static int __cmd_kmem(void)
+{
+ setup_pager();
+ read_events();
+ sort_result();
+ print_result();
+
+ return 0;
+}
+
+static const char * const kmem_usage[] = {
+ "perf kmem [<options>] {record}",
+ NULL
+};
+
+static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ if (l->ptr < r->ptr)
+ return -1;
+ else if (l->ptr > r->ptr)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension ptr_sort_dimension = {
+ .name = "ptr",
+ .cmp = ptr_cmp,
+};
+
+static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ if (l->call_site < r->call_site)
+ return -1;
+ else if (l->call_site > r->call_site)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension callsite_sort_dimension = {
+ .name = "callsite",
+ .cmp = callsite_cmp,
+};
+
+static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ if (l->hit < r->hit)
+ return -1;
+ else if (l->hit > r->hit)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension hit_sort_dimension = {
+ .name = "hit",
+ .cmp = hit_cmp,
+};
+
+static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ if (l->bytes_alloc < r->bytes_alloc)
+ return -1;
+ else if (l->bytes_alloc > r->bytes_alloc)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension bytes_sort_dimension = {
+ .name = "bytes",
+ .cmp = bytes_cmp,
+};
+
+static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ double x, y;
+
+ x = fragmentation(l->bytes_req, l->bytes_alloc);
+ y = fragmentation(r->bytes_req, r->bytes_alloc);
+
+ if (x < y)
+ return -1;
+ else if (x > y)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension frag_sort_dimension = {
+ .name = "frag",
+ .cmp = frag_cmp,
+};
+
+static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+{
+ if (l->pingpong < r->pingpong)
+ return -1;
+ else if (l->pingpong > r->pingpong)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension pingpong_sort_dimension = {
+ .name = "pingpong",
+ .cmp = pingpong_cmp,
+};
+
+static struct sort_dimension *avail_sorts[] = {
+ &ptr_sort_dimension,
+ &callsite_sort_dimension,
+ &hit_sort_dimension,
+ &bytes_sort_dimension,
+ &frag_sort_dimension,
+ &pingpong_sort_dimension,
+};
+
+#define NUM_AVAIL_SORTS \
+ (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
+
+static int sort_dimension__add(const char *tok, struct list_head *list)
+{
+ struct sort_dimension *sort;
+ int i;
+
+ for (i = 0; i < NUM_AVAIL_SORTS; i++) {
+ if (!strcmp(avail_sorts[i]->name, tok)) {
+ sort = malloc(sizeof(*sort));
+ if (!sort)
+ die("malloc");
+ memcpy(sort, avail_sorts[i], sizeof(*sort));
+ list_add_tail(&sort->list, list);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static int setup_sorting(struct list_head *sort_list, const char *arg)
+{
+ char *tok;
+ char *str = strdup(arg);
+
+ if (!str)
+ die("strdup");
+
+ while (true) {
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+ if (sort_dimension__add(tok, sort_list) < 0) {
+ error("Unknown --sort key: '%s'", tok);
+ return -1;
+ }
+ }
+
+ free(str);
+ return 0;
+}
+
+static int parse_sort_opt(const struct option *opt __used,
+ const char *arg, int unset __used)
+{
+ if (!arg)
+ return -1;
+
+ if (caller_flag > alloc_flag)
+ return setup_sorting(&caller_sort, arg);
+ else
+ return setup_sorting(&alloc_sort, arg);
+
+ return 0;
+}
+
+static int parse_stat_opt(const struct option *opt __used,
+ const char *arg, int unset __used)
+{
+ if (!arg)
+ return -1;
+
+ if (strcmp(arg, "alloc") == 0)
+ alloc_flag = (caller_flag + 1);
+ else if (strcmp(arg, "caller") == 0)
+ caller_flag = (alloc_flag + 1);
+ else
+ return -1;
+ return 0;
+}
+
+static int parse_line_opt(const struct option *opt __used,
+ const char *arg, int unset __used)
+{
+ int lines;
+
+ if (!arg)
+ return -1;
+
+ lines = strtoul(arg, NULL, 10);
+
+ if (caller_flag > alloc_flag)
+ caller_lines = lines;
+ else
+ alloc_lines = lines;
+
+ return 0;
+}
+
+static const struct option kmem_options[] = {
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_CALLBACK(0, "stat", NULL, "<alloc>|<caller>",
+ "stat selector, Pass 'alloc' or 'caller'.",
+ parse_stat_opt),
+ OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
+ "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
+ parse_sort_opt),
+ OPT_CALLBACK('l', "line", NULL, "num",
+ "show n lins",
+ parse_line_opt),
+ OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
+ OPT_END()
+};
+
+static const char *record_args[] = {
+ "record",
+ "-a",
+ "-R",
+ "-M",
+ "-f",
+ "-c", "1",
+ "-e", "kmem:kmalloc",
+ "-e", "kmem:kmalloc_node",
+ "-e", "kmem:kfree",
+ "-e", "kmem:kmem_cache_alloc",
+ "-e", "kmem:kmem_cache_alloc_node",
+ "-e", "kmem:kmem_cache_free",
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+ unsigned int rec_argc, i, j;
+ const char **rec_argv;
+
+ rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[i] = strdup(record_args[i]);
+
+ for (j = 1; j < (unsigned int)argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ return cmd_record(i, rec_argv, NULL);
+}
+
+int cmd_kmem(int argc, const char **argv, const char *prefix __used)
+{
+ symbol__init(0);
+
+ argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
+
+ if (argc && !strncmp(argv[0], "rec", 3))
+ return __cmd_record(argc, argv);
+ else if (argc)
+ usage_with_options(kmem_usage, kmem_options);
+
+ if (list_empty(&caller_sort))
+ setup_sorting(&caller_sort, default_sort_order);
+ if (list_empty(&alloc_sort))
+ setup_sorting(&alloc_sort, default_sort_order);
+
+ setup_cpunode_map();
+
+ return __cmd_kmem();
+}
+
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 82260c56db3d..0e519c667e3a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -307,6 +307,12 @@ try_again:
printf("\n");
error("perfcounter syscall returned with %d (%s)\n",
fd[nr_cpu][counter], strerror(err));
+
+#if defined(__i386__) || defined(__x86_64__)
+ if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP)
+ die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n");
+#endif
+
die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
exit(-1);
}
@@ -400,7 +406,7 @@ static int __cmd_record(int argc, const char **argv)
struct stat st;
pid_t pid = 0;
int flags;
- int ret;
+ int err;
unsigned long waking = 0;
page_size = sysconf(_SC_PAGE_SIZE);
@@ -434,16 +440,18 @@ static int __cmd_record(int argc, const char **argv)
exit(-1);
}
- if (!file_new)
- header = perf_header__read(output);
- else
- header = perf_header__new();
-
+ header = perf_header__new();
if (header == NULL) {
pr_err("Not enough memory for reading perf file header\n");
return -1;
}
+ if (!file_new) {
+ err = perf_header__read(header, output);
+ if (err < 0)
+ return err;
+ }
+
if (raw_samples) {
perf_header__set_feat(header, HEADER_TRACE_INFO);
} else {
@@ -472,8 +480,11 @@ static int __cmd_record(int argc, const char **argv)
}
}
- if (file_new)
- perf_header__write(header, output, false);
+ if (file_new) {
+ err = perf_header__write(header, output, false);
+ if (err < 0)
+ return err;
+ }
if (!system_wide)
event__synthesize_thread(pid, process_synthesized_event);
@@ -527,7 +538,7 @@ static int __cmd_record(int argc, const char **argv)
if (hits == samples) {
if (done)
break;
- ret = poll(event_array, nr_poll, -1);
+ err = poll(event_array, nr_poll, -1);
waking++;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 1a806d5f05cf..fe474b7f8ad0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -38,6 +38,7 @@ static char *dso_list_str, *comm_list_str, *sym_list_str,
static struct strlist *dso_list, *comm_list, *sym_list;
static int force;
+static bool use_modules;
static int full_paths;
static int show_nr_samples;
@@ -51,6 +52,7 @@ static char *pretty_printing_style = default_pretty_printing_style;
static int exclude_other = 1;
static char callchain_default_opt[] = "fractal,0.5";
+const char *vmlinux_name;
static char *cwd;
static int cwdlen;
@@ -448,7 +450,7 @@ got_map:
* trick of looking in the whole kernel symbol list.
*/
if ((long long)ip < 0)
- return kernel_maps__find_symbol(ip, mapp);
+ return kernel_maps__find_symbol(ip, mapp, NULL);
}
dump_printf(" ...... dso: %s\n",
map ? map->dso->long_name : "<not found>");
@@ -466,7 +468,7 @@ static int call__match(struct symbol *sym)
return 0;
}
-static struct symbol **resolve_callchain(struct thread *thread, struct map *map,
+static struct symbol **resolve_callchain(struct thread *thread,
struct ip_callchain *chain,
struct symbol **parent)
{
@@ -495,10 +497,10 @@ static struct symbol **resolve_callchain(struct thread *thread, struct map *map,
case PERF_CONTEXT_HV:
break;
case PERF_CONTEXT_KERNEL:
- sym = kernel_maps__find_symbol(ip, &map);
+ sym = kernel_maps__find_symbol(ip, NULL, NULL);
break;
default:
- sym = resolve_symbol(thread, &map, &ip);
+ sym = resolve_symbol(thread, NULL, &ip);
break;
}
@@ -528,7 +530,7 @@ hist_entry__add(struct thread *thread, struct map *map,
struct hist_entry *he;
if ((sort__has_parent || callchain) && chain)
- syms = resolve_callchain(thread, map, chain, &parent);
+ syms = resolve_callchain(thread, chain, &parent);
he = __hist_entry__add(thread, map, sym, parent,
ip, count, level, &hit);
@@ -715,7 +717,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
if (cpumode == PERF_RECORD_MISC_KERNEL) {
level = 'k';
- sym = kernel_maps__find_symbol(ip, &map);
+ sym = kernel_maps__find_symbol(ip, &map, NULL);
dump_printf(" ...... dso: %s\n",
map ? map->dso->long_name : "<not found>");
} else if (cpumode == PERF_RECORD_MISC_USER) {
@@ -924,8 +926,9 @@ static int __cmd_report(void)
register_perf_file_handler(&file_handler);
- ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths,
- &cwdlen, &cwd);
+ ret = mmap_dispatch_perf_file(&header, input_name, vmlinux_name,
+ !vmlinux_name, force,
+ full_paths, &cwdlen, &cwd);
if (ret)
return ret;
@@ -1023,7 +1026,7 @@ static const struct option options[] = {
"dump raw trace in ASCII"),
OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
- OPT_BOOLEAN('m', "modules", &modules,
+ OPT_BOOLEAN('m', "modules", &use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples,
"Show a column with the number of samples"),
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index df44b756cecc..260f57a72ee0 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1718,7 +1718,8 @@ static int read_events(void)
register_idle_thread();
register_perf_file_handler(&file_handler);
- return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
+ return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0,
+ &cwdlen, &cwd);
}
static void print_bad_events(void)
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 665877e4a944..dd4d82ac7aa4 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1093,7 +1093,7 @@ static void process_samples(void)
static int __cmd_timechart(void)
{
- int ret, rc = EXIT_FAILURE;
+ int err, rc = EXIT_FAILURE;
unsigned long offset = 0;
unsigned long head, shift;
struct stat statbuf;
@@ -1111,8 +1111,8 @@ static int __cmd_timechart(void)
exit(-1);
}
- ret = fstat(input, &statbuf);
- if (ret < 0) {
+ err = fstat(input, &statbuf);
+ if (err < 0) {
perror("failed to stat file");
exit(-1);
}
@@ -1122,7 +1122,16 @@ static int __cmd_timechart(void)
exit(0);
}
- header = perf_header__read(input);
+ header = perf_header__new();
+ if (header == NULL)
+ return -ENOMEM;
+
+ err = perf_header__read(header, input);
+ if (err < 0) {
+ perf_header__delete(header);
+ return err;
+ }
+
head = header->data_offset;
sample_type = perf_header__sample_type(header);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 07b92c378ae2..6a5de90e9b83 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -79,13 +79,7 @@ static int dump_symtab = 0;
static bool hide_kernel_symbols = false;
static bool hide_user_symbols = false;
static struct winsize winsize;
-static const char *graph_line =
- "_____________________________________________________________________"
- "_____________________________________________________________________";
-static const char *graph_dotted_line =
- "---------------------------------------------------------------------"
- "---------------------------------------------------------------------"
- "---------------------------------------------------------------------";
+const char *vmlinux_name;
/*
* Source
@@ -830,6 +824,8 @@ static void handle_keypress(int c)
case 'q':
case 'Q':
printf("exiting.\n");
+ if (dump_symtab)
+ dsos__fprintf(stderr);
exit(0);
case 's':
prompt_symbol(&sym_filter_entry, "Enter details symbol");
@@ -946,17 +942,6 @@ static int symbol_filter(struct map *map, struct symbol *sym)
return 0;
}
-static int parse_symbols(void)
-{
- if (dsos__load_kernel(vmlinux_name, symbol_filter, 1) <= 0)
- return -1;
-
- if (dump_symtab)
- dsos__fprintf(stderr);
-
- return 0;
-}
-
static void event__process_sample(const event_t *self, int counter)
{
u64 ip = self->ip.ip;
@@ -999,7 +984,7 @@ static void event__process_sample(const event_t *self, int counter)
if (hide_kernel_symbols)
return;
- sym = kernel_maps__find_symbol(ip, &map);
+ sym = kernel_maps__find_symbol(ip, &map, symbol_filter);
if (sym == NULL)
return;
break;
@@ -1326,7 +1311,7 @@ static const struct option options[] = {
int cmd_top(int argc, const char **argv, const char *prefix __used)
{
- int counter;
+ int counter, err;
page_size = sysconf(_SC_PAGE_SIZE);
@@ -1350,10 +1335,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
if (delay_secs < 1)
delay_secs = 1;
- parse_symbols();
+ err = kernel_maps__init(vmlinux_name, !vmlinux_name, true);
+ if (err < 0)
+ return err;
parse_source(sym_filter_entry);
-
/*
* User specified count overrides default frequency.
*/
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d042d656c561..b71198e5dc14 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -131,7 +131,8 @@ static int __cmd_trace(void)
register_idle_thread();
register_perf_file_handler(&file_handler);
- return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd);
+ return mmap_dispatch_perf_file(&header, input_name, NULL, false,
+ 0, 0, &cwdlen, &cwd);
}
static const char * const annotate_usage[] = {
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 9b02d85091fe..a3d8bf65f26c 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -28,5 +28,6 @@ extern int cmd_top(int argc, const char **argv, const char *prefix);
extern int cmd_trace(int argc, const char **argv, const char *prefix);
extern int cmd_version(int argc, const char **argv, const char *prefix);
extern int cmd_probe(int argc, const char **argv, const char *prefix);
+extern int cmd_kmem(int argc, const char **argv, const char *prefix);
#endif
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
index d3a6e18e4a5e..02b09ea17a3e 100644
--- a/tools/perf/command-list.txt
+++ b/tools/perf/command-list.txt
@@ -14,3 +14,4 @@ perf-timechart mainporcelain common
perf-top mainporcelain common
perf-trace mainporcelain common
perf-probe mainporcelain common
+perf-kmem mainporcelain common
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 89b82acac7d9..cf64049bc9bd 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -285,20 +285,21 @@ static void handle_internal_command(int argc, const char **argv)
{
const char *cmd = argv[0];
static struct cmd_struct commands[] = {
- { "help", cmd_help, 0 },
- { "list", cmd_list, 0 },
{ "buildid-list", cmd_buildid_list, 0 },
- { "record", cmd_record, 0 },
- { "report", cmd_report, 0 },
- { "bench", cmd_bench, 0 },
- { "stat", cmd_stat, 0 },
- { "timechart", cmd_timechart, 0 },
- { "top", cmd_top, 0 },
- { "annotate", cmd_annotate, 0 },
- { "version", cmd_version, 0 },
- { "trace", cmd_trace, 0 },
- { "sched", cmd_sched, 0 },
- { "probe", cmd_probe, 0 },
+ { "help", cmd_help, 0 },
+ { "list", cmd_list, 0 },
+ { "record", cmd_record, 0 },
+ { "report", cmd_report, 0 },
+ { "bench", cmd_bench, 0 },
+ { "stat", cmd_stat, 0 },
+ { "timechart", cmd_timechart, 0 },
+ { "top", cmd_top, 0 },
+ { "annotate", cmd_annotate, 0 },
+ { "version", cmd_version, 0 },
+ { "trace", cmd_trace, 0 },
+ { "sched", cmd_sched, 0 },
+ { "probe", cmd_probe, 0 },
+ { "kmem", cmd_kmem, 0 },
};
unsigned int i;
static const char ext[] = STRIP_EXTENSION;
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index 0b791bd346bc..35073621e5de 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -29,3 +29,11 @@ unsigned char sane_ctype[256] = {
A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */
/* Nothing in the 128.. range */
};
+
+const char *graph_line =
+ "_____________________________________________________________________"
+ "_____________________________________________________________________";
+const char *graph_dotted_line =
+ "---------------------------------------------------------------------"
+ "---------------------------------------------------------------------"
+ "---------------------------------------------------------------------";
diff --git a/tools/perf/util/data_map.c b/tools/perf/util/data_map.c
index 14cb8465eb08..f318d19b2562 100644
--- a/tools/perf/util/data_map.c
+++ b/tools/perf/util/data_map.c
@@ -101,12 +101,14 @@ out:
int mmap_dispatch_perf_file(struct perf_header **pheader,
const char *input_name,
+ const char *vmlinux_name,
+ bool try_vmlinux_path,
int force,
int full_paths,
int *cwdlen,
char **cwd)
{
- int ret, rc = EXIT_FAILURE;
+ int err;
struct perf_header *header;
unsigned long head, shift;
unsigned long offset = 0;
@@ -118,56 +120,69 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
int input;
char *buf;
- if (!curr_handler)
- die("Forgot to register perf file handler");
+ if (curr_handler == NULL) {
+ pr_debug("Forgot to register perf file handler\n");
+ return -EINVAL;
+ }
page_size = getpagesize();
input = open(input_name, O_RDONLY);
if (input < 0) {
- fprintf(stderr, " failed to open file: %s", input_name);
+ pr_err("Failed to open file: %s", input_name);
if (!strcmp(input_name, "perf.data"))
- fprintf(stderr, " (try 'perf record' first)");
- fprintf(stderr, "\n");
- exit(-1);
+ pr_err(" (try 'perf record' first)");
+ pr_err("\n");
+ return -errno;
}
- ret = fstat(input, &input_stat);
- if (ret < 0) {
- perror("failed to stat file");
- exit(-1);
+ if (fstat(input, &input_stat) < 0) {
+ pr_err("failed to stat file");
+ err = -errno;
+ goto out_close;
}
+ err = -EACCES;
if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) {
- fprintf(stderr, "file: %s not owned by current user or root\n",
+ pr_err("file: %s not owned by current user or root\n",
input_name);
- exit(-1);
+ goto out_close;
}
- if (!input_stat.st_size) {
- fprintf(stderr, "zero-sized file, nothing to do!\n");
- exit(0);
+ if (input_stat.st_size == 0) {
+ pr_info("zero-sized file, nothing to do!\n");
+ goto done;
}
- *pheader = perf_header__read(input);
- header = *pheader;
+ err = -ENOMEM;
+ header = perf_header__new();
+ if (header == NULL)
+ goto out_close;
+
+ err = perf_header__read(header, input);
+ if (err < 0)
+ goto out_delete;
+ *pheader = header;
head = header->data_offset;
sample_type = perf_header__sample_type(header);
- if (curr_handler->sample_type_check)
- if (curr_handler->sample_type_check(sample_type) < 0)
- exit(-1);
+ err = -EINVAL;
+ if (curr_handler->sample_type_check &&
+ curr_handler->sample_type_check(sample_type) < 0)
+ goto out_delete;
- if (load_kernel(NULL) < 0) {
- perror("failed to load kernel symbols");
- return EXIT_FAILURE;
+ err = -ENOMEM;
+ if (kernel_maps__init(vmlinux_name, try_vmlinux_path, true) < 0) {
+ pr_err("failed to setup the kernel maps to resolve symbols\n");
+ goto out_delete;
}
if (!full_paths) {
if (getcwd(__cwd, sizeof(__cwd)) == NULL) {
- perror("failed to get the current directory");
- return EXIT_FAILURE;
+ pr_err("failed to get the current directory\n");
+ err = -errno;
+ goto out_delete;
}
*cwd = __cwd;
*cwdlen = strlen(*cwd);
@@ -181,11 +196,12 @@ int mmap_dispatch_perf_file(struct perf_header **pheader,
head -= shift;
remap:
- buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
- MAP_SHARED, input, offset);
+ buf = mmap(NULL, page_size * mmap_window, PROT_READ,
+ MAP_SHARED, input, offset);
if (buf == MAP_FAILED) {
- perror("failed to mmap file");
- exit(-1);
+ pr_err("failed to mmap file\n");
+ err = -errno;
+ goto out_delete;
}
more:
@@ -242,10 +258,12 @@ more:
goto more;
done:
- rc = EXIT_SUCCESS;
+ err = 0;
+out_close:
close(input);
- return rc;
+ return err;
+out_delete:
+ perf_header__delete(header);
+ goto out_close;
}
-
-
diff --git a/tools/perf/util/data_map.h b/tools/perf/util/data_map.h
index ae036ecd7625..3f0d21b3819e 100644
--- a/tools/perf/util/data_map.h
+++ b/tools/perf/util/data_map.h
@@ -23,6 +23,8 @@ struct perf_file_handler {
void register_perf_file_handler(struct perf_file_handler *handler);
int mmap_dispatch_perf_file(struct perf_header **pheader,
const char *input_name,
+ const char *vmlinux_name,
+ bool try_vmlinux_path,
int force,
int full_paths,
int *cwdlen,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 1f771ce3a957..f1e392612652 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -69,13 +69,6 @@ struct build_id_event {
char filename[];
};
-struct build_id_list {
- struct build_id_event event;
- struct list_head list;
- const char *dso_name;
- int len;
-};
-
typedef union event_union {
struct perf_event_header header;
struct ip_event ip;
@@ -122,10 +115,13 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
void map__init(struct map *self, u64 start, u64 end, u64 pgoff,
struct dso *dso);
struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen);
+void map__delete(struct map *self);
struct map *map__clone(struct map *self);
int map__overlap(struct map *l, struct map *r);
size_t map__fprintf(struct map *self, FILE *fp);
struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter);
+void map__fixup_start(struct map *self);
+void map__fixup_end(struct map *self);
int event__synthesize_thread(pid_t pid, int (*process)(event_t *event));
void event__synthesize_threads(int (*process)(event_t *event));
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index b01a9537977f..1332f8ec04aa 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -78,16 +78,24 @@ struct perf_header *perf_header__new(void)
return self;
}
+void perf_header__delete(struct perf_header *self)
+{
+ int i;
+
+ for (i = 0; i < self->attrs; ++i)
+ perf_header_attr__delete(self->attr[i]);
+
+ free(self->attr);
+ free(self);
+}
+
int perf_header__add_attr(struct perf_header *self,
struct perf_header_attr *attr)
{
- int pos = self->attrs;
-
if (self->frozen)
return -1;
- self->attrs++;
- if (self->attrs > self->size) {
+ if (self->attrs == self->size) {
int nsize = self->size * 2;
struct perf_header_attr **nattr;
@@ -98,7 +106,8 @@ int perf_header__add_attr(struct perf_header *self,
self->size = nsize;
self->attr = nattr;
}
- self->attr[pos] = attr;
+
+ self->attr[self->attrs++] = attr;
return 0;
}
@@ -167,7 +176,7 @@ static int do_write(int fd, const void *buf, size_t size)
int ret = write(fd, buf, size);
if (ret < 0)
- return -1;
+ return -errno;
size -= ret;
buf += ret;
@@ -176,43 +185,51 @@ static int do_write(int fd, const void *buf, size_t size)
return 0;
}
-static int write_buildid_table(int fd, struct list_head *id_head)
+static int dsos__write_buildid_table(int fd)
{
- struct build_id_list *iter, *next;
-
- list_for_each_entry_safe(iter, next, id_head, list) {
- struct build_id_event *b = &iter->event;
-
- if (do_write(fd, b, sizeof(*b)) < 0 ||
- do_write(fd, iter->dso_name, iter->len) < 0)
- return -1;
- list_del(&iter->list);
- free(iter);
+ struct dso *pos;
+
+ list_for_each_entry(pos, &dsos, node) {
+ int err;
+ struct build_id_event b;
+ size_t len;
+
+ if (!pos->has_build_id)
+ continue;
+ len = pos->long_name_len + 1;
+ len = ALIGN(len, 64);
+ memset(&b, 0, sizeof(b));
+ memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id));
+ b.header.size = sizeof(b) + len;
+ err = do_write(fd, &b, sizeof(b));
+ if (err < 0)
+ return err;
+ err = do_write(fd, pos->long_name, len);
+ if (err < 0)
+ return err;
}
return 0;
}
-static void
-perf_header__adds_write(struct perf_header *self, int fd)
+static int perf_header__adds_write(struct perf_header *self, int fd)
{
- LIST_HEAD(id_list);
int nr_sections;
struct perf_file_section *feat_sec;
int sec_size;
u64 sec_start;
- int idx = 0;
+ int idx = 0, err;
- if (fetch_build_id_table(&id_list))
+ if (dsos__read_build_ids())
perf_header__set_feat(self, HEADER_BUILD_ID);
nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS);
if (!nr_sections)
- return;
+ return 0;
feat_sec = calloc(sizeof(*feat_sec), nr_sections);
- if (!feat_sec)
- die("No memory");
+ if (feat_sec == NULL)
+ return -ENOMEM;
sec_size = sizeof(*feat_sec) * nr_sections;
@@ -236,25 +253,37 @@ perf_header__adds_write(struct perf_header *self, int fd)
buildid_sec = &feat_sec[idx++];
+ /*
+ * Read the kernel buildid nad the list of loaded modules with
+ * its build_ids:
+ */
+ kernel_maps__init(NULL, false, true);
+
/* Write build-ids */
buildid_sec->offset = lseek(fd, 0, SEEK_CUR);
- if (write_buildid_table(fd, &id_list) < 0)
- die("failed to write buildid table");
+ err = dsos__write_buildid_table(fd);
+ if (err < 0) {
+ pr_debug("failed to write buildid table\n");
+ goto out_free;
+ }
buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset;
}
lseek(fd, sec_start, SEEK_SET);
- if (do_write(fd, feat_sec, sec_size) < 0)
- die("failed to write feature section");
+ err = do_write(fd, feat_sec, sec_size);
+ if (err < 0)
+ pr_debug("failed to write feature section\n");
+out_free:
free(feat_sec);
+ return err;
}
-void perf_header__write(struct perf_header *self, int fd, bool at_exit)
+int perf_header__write(struct perf_header *self, int fd, bool at_exit)
{
struct perf_file_header f_header;
struct perf_file_attr f_attr;
struct perf_header_attr *attr;
- int i;
+ int i, err;
lseek(fd, sizeof(f_header), SEEK_SET);
@@ -263,8 +292,11 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit)
attr = self->attr[i];
attr->id_offset = lseek(fd, 0, SEEK_CUR);
- if (do_write(fd, attr->id, attr->ids * sizeof(u64)) < 0)
- die("failed to write perf header");
+ err = do_write(fd, attr->id, attr->ids * sizeof(u64));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ return err;
+ }
}
@@ -280,20 +312,30 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit)
.size = attr->ids * sizeof(u64),
}
};
- if (do_write(fd, &f_attr, sizeof(f_attr)) < 0)
- die("failed to write perf header attribute");
+ err = do_write(fd, &f_attr, sizeof(f_attr));
+ if (err < 0) {
+ pr_debug("failed to write perf header attribute\n");
+ return err;
+ }
}
self->event_offset = lseek(fd, 0, SEEK_CUR);
self->event_size = event_count * sizeof(struct perf_trace_event_type);
- if (events)
- if (do_write(fd, events, self->event_size) < 0)
- die("failed to write perf header events");
+ if (events) {
+ err = do_write(fd, events, self->event_size);
+ if (err < 0) {
+ pr_debug("failed to write perf header events\n");
+ return err;
+ }
+ }
self->data_offset = lseek(fd, 0, SEEK_CUR);
- if (at_exit)
- perf_header__adds_write(self, fd);
+ if (at_exit) {
+ err = perf_header__adds_write(self, fd);
+ if (err < 0)
+ return err;
+ }
f_header = (struct perf_file_header){
.magic = PERF_MAGIC,
@@ -316,11 +358,15 @@ void perf_header__write(struct perf_header *self, int fd, bool at_exit)
memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features));
lseek(fd, 0, SEEK_SET);
- if (do_write(fd, &f_header, sizeof(f_header)) < 0)
- die("failed to write perf header");
+ err = do_write(fd, &f_header, sizeof(f_header));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ return err;
+ }
lseek(fd, self->data_offset + self->data_size, SEEK_SET);
self->frozen = 1;
+ return 0;
}
static void do_read(int fd, void *buf, size_t size)
@@ -430,19 +476,17 @@ static int perf_file_section__process(struct perf_file_section *self,
return 0;
}
-struct perf_header *perf_header__read(int fd)
+int perf_header__read(struct perf_header *self, int fd)
{
- struct perf_header *self = perf_header__new();
struct perf_file_header f_header;
struct perf_file_attr f_attr;
u64 f_id;
int nr_attrs, nr_ids, i, j;
- if (self == NULL)
- die("nomem");
-
- if (perf_file_header__read(&f_header, self, fd) < 0)
- die("incompatible file format");
+ if (perf_file_header__read(&f_header, self, fd) < 0) {
+ pr_debug("incompatible file format\n");
+ return -EINVAL;
+ }
nr_attrs = f_header.attrs.size / sizeof(f_attr);
lseek(fd, f_header.attrs.offset, SEEK_SET);
@@ -456,7 +500,7 @@ struct perf_header *perf_header__read(int fd)
attr = perf_header_attr__new(&f_attr.attr);
if (attr == NULL)
- die("nomem");
+ return -ENOMEM;
nr_ids = f_attr.ids.size / sizeof(u64);
lseek(fd, f_attr.ids.offset, SEEK_SET);
@@ -464,11 +508,15 @@ struct perf_header *perf_header__read(int fd)
for (j = 0; j < nr_ids; j++) {
do_read(fd, &f_id, sizeof(f_id));
- if (perf_header_attr__add_id(attr, f_id) < 0)
- die("nomem");
+ if (perf_header_attr__add_id(attr, f_id) < 0) {
+ perf_header_attr__delete(attr);
+ return -ENOMEM;
+ }
+ }
+ if (perf_header__add_attr(self, attr) < 0) {
+ perf_header_attr__delete(attr);
+ return -ENOMEM;
}
- if (perf_header__add_attr(self, attr) < 0)
- die("nomem");
lseek(fd, tmp, SEEK_SET);
}
@@ -476,8 +524,8 @@ struct perf_header *perf_header__read(int fd)
if (f_header.event_types.size) {
lseek(fd, f_header.event_types.offset, SEEK_SET);
events = malloc(f_header.event_types.size);
- if (!events)
- die("nomem");
+ if (events == NULL)
+ return -ENOMEM;
do_read(fd, events, f_header.event_types.size);
event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type);
}
@@ -487,8 +535,7 @@ struct perf_header *perf_header__read(int fd)
lseek(fd, self->data_offset, SEEK_SET);
self->frozen = 1;
-
- return self;
+ return 0;
}
u64 perf_header__sample_type(struct perf_header *header)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index f46a94e09eea..d1dbe2b79c42 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -55,8 +55,11 @@ struct perf_header {
DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
};
-struct perf_header *perf_header__read(int fd);
-void perf_header__write(struct perf_header *self, int fd, bool at_exit);
+struct perf_header *perf_header__new(void);
+void perf_header__delete(struct perf_header *self);
+
+int perf_header__read(struct perf_header *self, int fd);
+int perf_header__write(struct perf_header *self, int fd, bool at_exit);
int perf_header__add_attr(struct perf_header *self,
struct perf_header_attr *attr);
@@ -75,8 +78,6 @@ perf_header__find_attr(u64 id, struct perf_header *header);
void perf_header__set_feat(struct perf_header *self, int feat);
bool perf_header__has_feat(const struct perf_header *self, int feat);
-struct perf_header *perf_header__new(void);
-
int perf_header__process_sections(struct perf_header *self, int fd,
int (*process)(struct perf_file_section *self,
int feat, int fd));
diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
index ace57c36d1d0..8d63116e9435 100644
--- a/tools/perf/util/include/linux/bitops.h
+++ b/tools/perf/util/include/linux/bitops.h
@@ -7,6 +7,8 @@
#define CONFIG_GENERIC_FIND_FIRST_BIT
#include "../../../../include/linux/bitops.h"
+#undef __KERNEL__
+
static inline void set_bit(int nr, unsigned long *addr)
{
addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 94ca95073c40..09412321a80d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -75,6 +75,29 @@ out_delete:
return NULL;
}
+void map__delete(struct map *self)
+{
+ free(self);
+}
+
+void map__fixup_start(struct map *self)
+{
+ struct rb_node *nd = rb_first(&self->dso->syms);
+ if (nd != NULL) {
+ struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+ self->start = sym->start;
+ }
+}
+
+void map__fixup_end(struct map *self)
+{
+ struct rb_node *nd = rb_last(&self->dso->syms);
+ if (nd != NULL) {
+ struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+ self->end = sym->end;
+ }
+}
+
#define DSO__DELETED "(deleted)"
struct symbol *
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 0faf4f2bb5ca..070027469270 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1,4 +1,4 @@
-
+#include "../../../include/linux/hw_breakpoint.h"
#include "util.h"
#include "../perf.h"
#include "parse-options.h"
@@ -540,6 +540,81 @@ static enum event_result parse_tracepoint_event(const char **strp,
attr, strp);
}
+static enum event_result
+parse_breakpoint_type(const char *type, const char **strp,
+ struct perf_event_attr *attr)
+{
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ if (!type[i])
+ break;
+
+ switch (type[i]) {
+ case 'r':
+ attr->bp_type |= HW_BREAKPOINT_R;
+ break;
+ case 'w':
+ attr->bp_type |= HW_BREAKPOINT_W;
+ break;
+ case 'x':
+ attr->bp_type |= HW_BREAKPOINT_X;
+ break;
+ default:
+ return EVT_FAILED;
+ }
+ }
+ if (!attr->bp_type) /* Default */
+ attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+
+ *strp = type + i;
+
+ return EVT_HANDLED;
+}
+
+static enum event_result
+parse_breakpoint_event(const char **strp, struct perf_event_attr *attr)
+{
+ const char *target;
+ const char *type;
+ char *endaddr;
+ u64 addr;
+ enum event_result err;
+
+ target = strchr(*strp, ':');
+ if (!target)
+ return EVT_FAILED;
+
+ if (strncmp(*strp, "mem", target - *strp) != 0)
+ return EVT_FAILED;
+
+ target++;
+
+ addr = strtoull(target, &endaddr, 0);
+ if (target == endaddr)
+ return EVT_FAILED;
+
+ attr->bp_addr = addr;
+ *strp = endaddr;
+
+ type = strchr(target, ':');
+
+ /* If no type is defined, just rw as default */
+ if (!type) {
+ attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+ } else {
+ err = parse_breakpoint_type(++type, strp, attr);
+ if (err == EVT_FAILED)
+ return EVT_FAILED;
+ }
+
+ /* We should find a nice way to override the access type */
+ attr->bp_len = HW_BREAKPOINT_LEN_4;
+ attr->type = PERF_TYPE_BREAKPOINT;
+
+ return EVT_HANDLED;
+}
+
static int check_events(const char *str, unsigned int i)
{
int n;
@@ -673,6 +748,10 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr)
if (ret != EVT_FAILED)
goto modifier;
+ ret = parse_breakpoint_event(str, attr);
+ if (ret != EVT_FAILED)
+ goto modifier;
+
fprintf(stderr, "invalid or unsupported event: '%s'\n", *str);
fprintf(stderr, "Run 'perf list' for a list of valid events\n");
return EVT_FAILED;
@@ -859,6 +938,9 @@ void print_events(void)
"rNNN");
printf("\n");
+ printf(" %-42s [hardware breakpoint]\n", "mem:<addr>[:access]");
+ printf("\n");
+
print_tracepoint_events();
exit(129);
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5cc96c86861b..44d81d5ae8cf 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -9,8 +9,13 @@
#include <libelf.h>
#include <gelf.h>
#include <elf.h>
+#include <limits.h>
#include <sys/utsname.h>
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
enum dso_origin {
DSO__ORIG_KERNEL = 0,
DSO__ORIG_JAVA_JIT,
@@ -26,7 +31,11 @@ static void dsos__add(struct dso *dso);
static struct dso *dsos__find(const char *name);
static struct map *map__new2(u64 start, struct dso *dso);
static void kernel_maps__insert(struct map *map);
+static int dso__load_kernel_sym(struct dso *self, struct map *map,
+ symbol_filter_t filter);
unsigned int symbol__priv_size;
+static int vmlinux_path__nr_entries;
+static char **vmlinux_path;
static struct rb_root kernel_maps;
@@ -69,11 +78,11 @@ static void kernel_maps__fixup_end(void)
prev->end = curr->start - 1;
}
- nd = rb_last(&curr->dso->syms);
- if (nd) {
- struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
- curr->end = sym->end;
- }
+ /*
+ * We still haven't the actual symbols, so guess the
+ * last map final address.
+ */
+ curr->end = ~0UL;
}
static struct symbol *symbol__new(u64 start, u64 len, const char *name)
@@ -111,6 +120,8 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp)
static void dso__set_long_name(struct dso *self, char *name)
{
+ if (name == NULL)
+ return;
self->long_name = name;
self->long_name_len = strlen(name);
}
@@ -323,7 +334,7 @@ out_failure:
* kernel range is broken in several maps, named [kernel].N, as we don't have
* the original ELF section names vmlinux have.
*/
-static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules)
+static int kernel_maps__split_kallsyms(symbol_filter_t filter)
{
struct map *map = kernel_map;
struct symbol *pos;
@@ -339,9 +350,6 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules)
module = strchr(pos->name, '\t');
if (module) {
- if (!use_modules)
- goto delete_symbol;
-
*module++ = '\0';
if (strcmp(map->dso->name, module)) {
@@ -381,7 +389,6 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules)
}
if (filter && filter(map, pos)) {
-delete_symbol:
rb_erase(&pos->rb_node, &kernel_map->dso->syms);
symbol__delete(pos);
} else {
@@ -397,17 +404,18 @@ delete_symbol:
}
-static int kernel_maps__load_kallsyms(symbol_filter_t filter, int use_modules)
+static int kernel_maps__load_kallsyms(symbol_filter_t filter)
{
if (kernel_maps__load_all_kallsyms())
return -1;
dso__fixup_sym_end(kernel_map->dso);
+ kernel_map->dso->origin = DSO__ORIG_KERNEL;
- return kernel_maps__split_kallsyms(filter, use_modules);
+ return kernel_maps__split_kallsyms(filter);
}
-static size_t kernel_maps__fprintf(FILE *fp)
+size_t kernel_maps__fprintf(FILE *fp)
{
size_t printed = fprintf(fp, "Kernel maps:\n");
struct rb_node *nd;
@@ -883,47 +891,40 @@ out_close:
return err;
}
-bool fetch_build_id_table(struct list_head *head)
+static bool dso__build_id_equal(const struct dso *self, u8 *build_id)
{
- bool have_buildid = false;
- struct dso *pos;
-
- list_for_each_entry(pos, &dsos, node) {
- struct build_id_list *new;
- struct build_id_event b;
- size_t len;
-
- if (filename__read_build_id(pos->long_name,
- &b.build_id,
- sizeof(b.build_id)) < 0)
- continue;
- have_buildid = true;
- memset(&b.header, 0, sizeof(b.header));
- len = pos->long_name_len + 1;
- len = ALIGN(len, 64);
- b.header.size = sizeof(b) + len;
-
- new = malloc(sizeof(*new));
- if (!new)
- die("No memory\n");
+ return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0;
+}
- memcpy(&new->event, &b, sizeof(b));
- new->dso_name = pos->long_name;
- new->len = len;
+bool dsos__read_build_ids(void)
+{
+ bool have_build_id = false;
+ struct dso *pos;
- list_add_tail(&new->list, head);
- }
+ list_for_each_entry(pos, &dsos, node)
+ if (filename__read_build_id(pos->long_name, pos->build_id,
+ sizeof(pos->build_id)) > 0) {
+ have_build_id = true;
+ pos->has_build_id = true;
+ }
- return have_buildid;
+ return have_build_id;
}
+/*
+ * Align offset to 4 bytes as needed for note name and descriptor data.
+ */
+#define NOTE_ALIGN(n) (((n) + 3) & -4U)
+
int filename__read_build_id(const char *filename, void *bf, size_t size)
{
int fd, err = -1;
GElf_Ehdr ehdr;
GElf_Shdr shdr;
- Elf_Data *build_id_data;
+ Elf_Data *data;
Elf_Scn *sec;
+ Elf_Kind ek;
+ void *ptr;
Elf *elf;
if (size < BUILD_ID_SIZE)
@@ -939,6 +940,10 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
goto out_close;
}
+ ek = elf_kind(elf);
+ if (ek != ELF_K_ELF)
+ goto out_elf_end;
+
if (gelf_getehdr(elf, &ehdr) == NULL) {
pr_err("%s: cannot get elf header.\n", __func__);
goto out_elf_end;
@@ -946,14 +951,37 @@ int filename__read_build_id(const char *filename, void *bf, size_t size)
sec = elf_section_by_name(elf, &ehdr, &shdr,
".note.gnu.build-id", NULL);
- if (sec == NULL)
- goto out_elf_end;
+ if (sec == NULL) {
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".notes", NULL);
+ if (sec == NULL)
+ goto out_elf_end;
+ }
- build_id_data = elf_getdata(sec, NULL);
- if (build_id_data == NULL)
+ data = elf_getdata(sec, NULL);
+ if (data == NULL)
goto out_elf_end;
- memcpy(bf, build_id_data->d_buf + 16, BUILD_ID_SIZE);
- err = BUILD_ID_SIZE;
+
+ ptr = data->d_buf;
+ while (ptr < (data->d_buf + data->d_size)) {
+ GElf_Nhdr *nhdr = ptr;
+ int namesz = NOTE_ALIGN(nhdr->n_namesz),
+ descsz = NOTE_ALIGN(nhdr->n_descsz);
+ const char *name;
+
+ ptr += sizeof(*nhdr);
+ name = ptr;
+ ptr += namesz;
+ if (nhdr->n_type == NT_GNU_BUILD_ID &&
+ nhdr->n_namesz == sizeof("GNU")) {
+ if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+ memcpy(bf, ptr, BUILD_ID_SIZE);
+ err = BUILD_ID_SIZE;
+ break;
+ }
+ }
+ ptr += descsz;
+ }
out_elf_end:
elf_end(elf);
out_close:
@@ -962,23 +990,48 @@ out:
return err;
}
-static char *dso__read_build_id(struct dso *self)
+int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
{
- int len;
- char *build_id = NULL;
- unsigned char rawbf[BUILD_ID_SIZE];
+ int fd, err = -1;
- len = filename__read_build_id(self->long_name, rawbf, sizeof(rawbf));
- if (len < 0)
+ if (size < BUILD_ID_SIZE)
goto out;
- build_id = malloc(len * 2 + 1);
- if (build_id == NULL)
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
goto out;
- build_id__sprintf(rawbf, len, build_id);
+ while (1) {
+ char bf[BUFSIZ];
+ GElf_Nhdr nhdr;
+ int namesz, descsz;
+
+ if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
+ break;
+
+ namesz = NOTE_ALIGN(nhdr.n_namesz);
+ descsz = NOTE_ALIGN(nhdr.n_descsz);
+ if (nhdr.n_type == NT_GNU_BUILD_ID &&
+ nhdr.n_namesz == sizeof("GNU")) {
+ if (read(fd, bf, namesz) != namesz)
+ break;
+ if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
+ if (read(fd, build_id,
+ BUILD_ID_SIZE) == BUILD_ID_SIZE) {
+ err = 0;
+ break;
+ }
+ } else if (read(fd, bf, descsz) != descsz)
+ break;
+ } else {
+ int n = namesz + descsz;
+ if (read(fd, bf, n) != n)
+ break;
+ }
+ }
+ close(fd);
out:
- return build_id;
+ return err;
}
char dso__symtab_origin(const struct dso *self)
@@ -1001,12 +1054,17 @@ char dso__symtab_origin(const struct dso *self)
int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
{
int size = PATH_MAX;
- char *name = malloc(size), *build_id = NULL;
+ char *name;
+ u8 build_id[BUILD_ID_SIZE];
int ret = -1;
int fd;
self->loaded = 1;
+ if (self->kernel)
+ return dso__load_kernel_sym(self, map, filter);
+
+ name = malloc(size);
if (!name)
return -1;
@@ -1023,8 +1081,6 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter)
more:
do {
- int berr = 0;
-
self->origin++;
switch (self->origin) {
case DSO__ORIG_FEDORA:
@@ -1036,12 +1092,18 @@ more:
self->long_name);
break;
case DSO__ORIG_BUILDID:
- build_id = dso__read_build_id(self);
- if (build_id != NULL) {
+ if (filename__read_build_id(self->long_name, build_id,
+ sizeof(build_id))) {
+ char build_id_hex[BUILD_ID_SIZE * 2 + 1];
+
+ build_id__sprintf(build_id, sizeof(build_id),
+ build_id_hex);
snprintf(name, size,
"/usr/lib/debug/.build-id/%.2s/%s.debug",
- build_id, build_id + 2);
- goto compare_build_id;
+ build_id_hex, build_id_hex + 2);
+ if (self->has_build_id)
+ goto compare_build_id;
+ break;
}
self->origin++;
/* Fall thru */
@@ -1054,18 +1116,11 @@ more:
}
if (self->has_build_id) {
- bool match;
- build_id = malloc(BUILD_ID_SIZE);
- if (build_id == NULL)
+ if (filename__read_build_id(name, build_id,
+ sizeof(build_id)) < 0)
goto more;
- berr = filename__read_build_id(name, build_id,
- BUILD_ID_SIZE);
compare_build_id:
- match = berr > 0 && memcmp(build_id, self->build_id,
- sizeof(self->build_id)) == 0;
- free(build_id);
- build_id = NULL;
- if (!match)
+ if (!dso__build_id_equal(self, build_id))
goto more;
}
@@ -1100,7 +1155,8 @@ static void kernel_maps__insert(struct map *map)
maps__insert(&kernel_maps, map);
}
-struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp)
+struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp,
+ symbol_filter_t filter)
{
struct map *map = maps__find(&kernel_maps, ip);
@@ -1109,7 +1165,7 @@ struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp)
if (map) {
ip = map->map_ip(map, ip);
- return map->dso->find_symbol(map->dso, ip);
+ return map__find_symbol(map, ip, filter);
}
return NULL;
@@ -1129,32 +1185,13 @@ struct map *kernel_maps__find_by_dso_name(const char *name)
return NULL;
}
-static int dso__load_module_sym(struct dso *self, struct map *map,
- symbol_filter_t filter)
-{
- int err = 0, fd = open(self->long_name, O_RDONLY);
-
- self->loaded = 1;
-
- if (fd < 0) {
- pr_err("%s: cannot open %s\n", __func__, self->long_name);
- return err;
- }
-
- err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1);
- close(fd);
-
- return err;
-}
-
-static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter)
+static int dsos__set_modules_path_dir(char *dirname)
{
struct dirent *dent;
- int nr_symbols = 0, err;
DIR *dir = opendir(dirname);
if (!dir) {
- pr_err("%s: cannot open %s dir\n", __func__, dirname);
+ pr_debug("%s: cannot open %s dir\n", __func__, dirname);
return -1;
}
@@ -1168,14 +1205,12 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter)
snprintf(path, sizeof(path), "%s/%s",
dirname, dent->d_name);
- err = dsos__load_modules_sym_dir(path, filter);
- if (err < 0)
+ if (dsos__set_modules_path_dir(path) < 0)
goto failure;
} else {
char *dot = strrchr(dent->d_name, '.'),
dso_name[PATH_MAX];
struct map *map;
- struct rb_node *last;
char *long_name;
if (dot == NULL || strcmp(dot, ".ko"))
@@ -1195,36 +1230,16 @@ static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter)
if (long_name == NULL)
goto failure;
dso__set_long_name(map->dso, long_name);
- dso__set_basename(map->dso);
-
- err = dso__load_module_sym(map->dso, map, filter);
- if (err < 0)
- goto failure;
- last = rb_last(&map->dso->syms);
- if (last) {
- struct symbol *sym;
- /*
- * We do this here as well, even having the
- * symbol size found in the symtab because
- * misannotated ASM symbols may have the size
- * set to zero.
- */
- dso__fixup_sym_end(map->dso);
-
- sym = rb_entry(last, struct symbol, rb_node);
- map->end = map->start + sym->end;
- }
}
- nr_symbols += err;
}
- return nr_symbols;
+ return 0;
failure:
closedir(dir);
return -1;
}
-static int dsos__load_modules_sym(symbol_filter_t filter)
+static int dsos__set_modules_path(void)
{
struct utsname uts;
char modules_path[PATH_MAX];
@@ -1235,7 +1250,7 @@ static int dsos__load_modules_sym(symbol_filter_t filter)
snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel",
uts.release);
- return dsos__load_modules_sym_dir(modules_path, filter);
+ return dsos__set_modules_path_dir(modules_path);
}
/*
@@ -1257,7 +1272,7 @@ static struct map *map__new2(u64 start, struct dso *dso)
return self;
}
-static int dsos__load_modules(void)
+static int kernel_maps__create_module_maps(void)
{
char *line = NULL;
size_t n;
@@ -1307,6 +1322,12 @@ static int dsos__load_modules(void)
goto out_delete_line;
}
+ snprintf(name, sizeof(name),
+ "/sys/module/%s/notes/.note.gnu.build-id", line);
+ if (sysfs__read_build_id(name, dso->build_id,
+ sizeof(dso->build_id)) == 0)
+ dso->has_build_id = true;
+
dso->origin = DSO__ORIG_KMODULE;
kernel_maps__insert(map);
dsos__add(dso);
@@ -1315,7 +1336,7 @@ static int dsos__load_modules(void)
free(line);
fclose(file);
- return 0;
+ return dsos__set_modules_path();
out_delete_line:
free(line);
@@ -1326,13 +1347,37 @@ out_failure:
static int dso__load_vmlinux(struct dso *self, struct map *map,
const char *vmlinux, symbol_filter_t filter)
{
- int err, fd = open(vmlinux, O_RDONLY);
+ int err = -1, fd;
- self->loaded = 1;
+ if (self->has_build_id) {
+ u8 build_id[BUILD_ID_SIZE];
+
+ if (filename__read_build_id(vmlinux, build_id,
+ sizeof(build_id)) < 0) {
+ pr_debug("No build_id in %s, ignoring it\n", vmlinux);
+ return -1;
+ }
+ if (!dso__build_id_equal(self, build_id)) {
+ char expected_build_id[BUILD_ID_SIZE * 2 + 1],
+ vmlinux_build_id[BUILD_ID_SIZE * 2 + 1];
+
+ build_id__sprintf(self->build_id,
+ sizeof(self->build_id),
+ expected_build_id);
+ build_id__sprintf(build_id, sizeof(build_id),
+ vmlinux_build_id);
+ pr_debug("build_id in %s is %s while expected is %s, "
+ "ignoring it\n", vmlinux, vmlinux_build_id,
+ expected_build_id);
+ return -1;
+ }
+ }
+ fd = open(vmlinux, O_RDONLY);
if (fd < 0)
return -1;
+ self->loaded = 1;
err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0);
close(fd);
@@ -1340,78 +1385,55 @@ static int dso__load_vmlinux(struct dso *self, struct map *map,
return err;
}
-int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter,
- int use_modules)
+static int dso__load_kernel_sym(struct dso *self, struct map *map,
+ symbol_filter_t filter)
{
- int err = -1;
- struct dso *dso = dso__new(vmlinux);
-
- if (dso == NULL)
- return -1;
-
- dso->short_name = "[kernel]";
- kernel_map = map__new2(0, dso);
- if (kernel_map == NULL)
- goto out_delete_dso;
-
- kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip;
-
- if (use_modules && dsos__load_modules() < 0) {
- pr_warning("Failed to load list of modules in use! "
- "Continuing...\n");
- use_modules = 0;
- }
-
- if (vmlinux) {
- err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter);
- if (err > 0 && use_modules) {
- int syms = dsos__load_modules_sym(filter);
-
- if (syms < 0)
- pr_warning("Failed to read module symbols!"
- " Continuing...\n");
- else
- err += syms;
+ int err;
+ bool is_kallsyms;
+
+ if (vmlinux_path != NULL) {
+ int i;
+ pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+ vmlinux_path__nr_entries);
+ for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+ err = dso__load_vmlinux(self, map, vmlinux_path[i],
+ filter);
+ if (err > 0) {
+ pr_debug("Using %s for symbols\n",
+ vmlinux_path[i]);
+ dso__set_long_name(self,
+ strdup(vmlinux_path[i]));
+ goto out_fixup;
+ }
}
}
- if (err <= 0)
- err = kernel_maps__load_kallsyms(filter, use_modules);
+ is_kallsyms = self->long_name[0] == '[';
+ if (is_kallsyms)
+ goto do_kallsyms;
+
+ err = dso__load_vmlinux(self, map, self->long_name, filter);
+ if (err <= 0) {
+ pr_info("The file %s cannot be used, "
+ "trying to use /proc/kallsyms...", self->long_name);
+ sleep(2);
+do_kallsyms:
+ err = kernel_maps__load_kallsyms(filter);
+ if (err > 0 && !is_kallsyms)
+ dso__set_long_name(self, strdup("[kernel.kallsyms]"));
+ }
if (err > 0) {
- struct rb_node *node = rb_first(&dso->syms);
- struct symbol *sym = rb_entry(node, struct symbol, rb_node);
-
- kernel_map->start = sym->start;
- node = rb_last(&dso->syms);
- sym = rb_entry(node, struct symbol, rb_node);
- kernel_map->end = sym->end;
-
- dso->origin = DSO__ORIG_KERNEL;
- kernel_maps__insert(kernel_map);
- /*
- * Now that we have all sorted out, just set the ->end of all
- * maps:
- */
- kernel_maps__fixup_end();
- dsos__add(dso);
-
- if (verbose)
- kernel_maps__fprintf(stderr);
+out_fixup:
+ map__fixup_start(map);
+ map__fixup_end(map);
}
return err;
-
-out_delete_dso:
- dso__delete(dso);
- return -1;
}
LIST_HEAD(dsos);
-struct dso *vdso;
-
-const char *vmlinux_name = "vmlinux";
-int modules;
+struct dso *vdso;
static void dsos__add(struct dso *dso)
{
@@ -1463,18 +1485,117 @@ size_t dsos__fprintf_buildid(FILE *fp)
return ret;
}
-int load_kernel(symbol_filter_t filter)
+static int kernel_maps__create_kernel_map(const char *vmlinux_name)
{
- if (dsos__load_kernel(vmlinux_name, filter, modules) <= 0)
+ struct dso *kernel = dso__new(vmlinux_name ?: "[kernel.kallsyms]");
+
+ if (kernel == NULL)
return -1;
+ kernel_map = map__new2(0, kernel);
+ if (kernel_map == NULL)
+ goto out_delete_kernel_dso;
+
+ kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip;
+ kernel->short_name = "[kernel]";
+ kernel->kernel = 1;
+
vdso = dso__new("[vdso]");
- if (!vdso)
- return -1;
+ if (vdso == NULL)
+ goto out_delete_kernel_map;
+
+ if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id,
+ sizeof(kernel->build_id)) == 0)
+ kernel->has_build_id = true;
+ kernel_maps__insert(kernel_map);
+ dsos__add(kernel);
dsos__add(vdso);
return 0;
+
+out_delete_kernel_map:
+ map__delete(kernel_map);
+ kernel_map = NULL;
+out_delete_kernel_dso:
+ dso__delete(kernel);
+ return -1;
+}
+
+static void vmlinux_path__exit(void)
+{
+ while (--vmlinux_path__nr_entries >= 0) {
+ free(vmlinux_path[vmlinux_path__nr_entries]);
+ vmlinux_path[vmlinux_path__nr_entries] = NULL;
+ }
+
+ free(vmlinux_path);
+ vmlinux_path = NULL;
+}
+
+static int vmlinux_path__init(void)
+{
+ struct utsname uts;
+ char bf[PATH_MAX];
+
+ if (uname(&uts) < 0)
+ return -1;
+
+ vmlinux_path = malloc(sizeof(char *) * 5);
+ if (vmlinux_path == NULL)
+ return -1;
+
+ vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux");
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ goto out_fail;
+ ++vmlinux_path__nr_entries;
+ vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux");
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ goto out_fail;
+ ++vmlinux_path__nr_entries;
+ snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release);
+ vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ goto out_fail;
+ ++vmlinux_path__nr_entries;
+ snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release);
+ vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ goto out_fail;
+ ++vmlinux_path__nr_entries;
+ snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux",
+ uts.release);
+ vmlinux_path[vmlinux_path__nr_entries] = strdup(bf);
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ goto out_fail;
+ ++vmlinux_path__nr_entries;
+
+ return 0;
+
+out_fail:
+ vmlinux_path__exit();
+ return -1;
+}
+
+int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path,
+ bool use_modules)
+{
+ if (try_vmlinux_path && vmlinux_path__init() < 0)
+ return -1;
+
+ if (kernel_maps__create_kernel_map(vmlinux_name) < 0) {
+ vmlinux_path__exit();
+ return -1;
+ }
+
+ if (use_modules && kernel_maps__create_module_maps() < 0)
+ pr_debug("Failed to load list of modules in use, "
+ "continuing...\n");
+ /*
+ * Now that we have all the maps created, just set the ->end of them:
+ */
+ kernel_maps__fixup_end();
+ return 0;
}
void symbol__init(unsigned int priv_size)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 5ad1019607dd..8c4d026e067a 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -64,6 +64,7 @@ struct dso {
u8 slen_calculated:1;
u8 loaded:1;
u8 has_build_id:1;
+ u8 kernel:1;
unsigned char origin;
u8 build_id[BUILD_ID_SIZE];
u16 long_name_len;
@@ -77,7 +78,6 @@ void dso__delete(struct dso *self);
struct symbol *dso__find_symbol(struct dso *self, u64 ip);
-int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules);
struct dso *dsos__findnew(const char *name);
int dso__load(struct dso *self, struct map *map, symbol_filter_t filter);
void dsos__fprintf(FILE *fp);
@@ -89,16 +89,17 @@ char dso__symtab_origin(const struct dso *self);
void dso__set_build_id(struct dso *self, void *build_id);
int filename__read_build_id(const char *filename, void *bf, size_t size);
-bool fetch_build_id_table(struct list_head *head);
+int sysfs__read_build_id(const char *filename, void *bf, size_t size);
+bool dsos__read_build_ids(void);
int build_id__sprintf(u8 *self, int len, char *bf);
-int load_kernel(symbol_filter_t filter);
+int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path,
+ bool use_modules);
+size_t kernel_maps__fprintf(FILE *fp);
void symbol__init(unsigned int priv_size);
extern struct list_head dsos;
extern struct map *kernel_map;
extern struct dso *vdso;
-extern const char *vmlinux_name;
-extern int modules;
#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 53addd77ce8f..e4b8d437725a 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -26,7 +26,8 @@ size_t threads__fprintf(FILE *fp);
void maps__insert(struct rb_root *maps, struct map *map);
struct map *maps__find(struct rb_root *maps, u64 ip);
-struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp);
+struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp,
+ symbol_filter_t filter);
struct map *kernel_maps__find_by_dso_name(const char *name);
static inline struct map *thread__find_map(struct thread *self, u64 ip)
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 831052d4b4fb..cace35595530 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -33,11 +33,11 @@
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
+#include <linux/kernel.h>
#include "../perf.h"
#include "trace-event.h"
-
#define VERSION "0.5"
#define _STR(x) #x
@@ -483,23 +483,31 @@ static struct tracepoint_path *
get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events)
{
struct tracepoint_path path, *ppath = &path;
- int i;
+ int i, nr_tracepoints = 0;
for (i = 0; i < nb_events; i++) {
if (pattrs[i].type != PERF_TYPE_TRACEPOINT)
continue;
+ ++nr_tracepoints;
ppath->next = tracepoint_id_to_path(pattrs[i].config);
if (!ppath->next)
die("%s\n", "No memory to alloc tracepoints list");
ppath = ppath->next;
}
- return path.next;
+ return nr_tracepoints > 0 ? path.next : NULL;
}
-void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
+
+int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
{
char buf[BUFSIZ];
- struct tracepoint_path *tps;
+ struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events);
+
+ /*
+ * What? No tracepoints? No sense writing anything here, bail out.
+ */
+ if (tps == NULL)
+ return -1;
output_fd = fd;
@@ -528,11 +536,11 @@ void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events)
page_size = getpagesize();
write_or_die(&page_size, 4);
- tps = get_tracepoints_path(pattrs, nb_events);
-
read_header_files();
read_ftrace_files(tps);
read_event_files(tps);
read_proc_kallsyms();
read_ftrace_printk();
+
+ return 0;
}
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 44292e06cca4..342dfdd43f87 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -471,11 +471,11 @@ void trace_report(int fd)
read_or_die(buf, 3);
if (memcmp(buf, test, 3) != 0)
- die("not an trace data file");
+ die("no trace data in the file");
read_or_die(buf, 7);
if (memcmp(buf, "tracing", 7) != 0)
- die("not a trace file (missing tracing)");
+ die("not a trace file (missing 'tracing' tag)");
version = read_string();
if (show_version)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index f6637c2fa1fe..dd51c6872a15 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -248,7 +248,7 @@ unsigned long long
raw_field_value(struct event *event, const char *name, void *data);
void *raw_field_ptr(struct event *event, const char *name, void *data);
-void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
+int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events);
/* taken from kernel/trace/trace.h */
enum trace_flag_type {
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index f2203a0946bc..e1c623e0c99e 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -84,6 +84,9 @@
#include <iconv.h>
#endif
+extern const char *graph_line;
+extern const char *graph_dotted_line;
+
/* On most systems <limits.h> would have given us this, but
* not on some systems (e.g. GNU/Hurd).
*/