summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/lockup-watchdogs.txt63
-rw-r--r--Documentation/nmi_watchdog.txt83
-rw-r--r--arch/Kconfig17
-rw-r--r--arch/arm/include/asm/perf_event.h4
-rw-r--r--arch/frv/include/asm/perf_event.h2
-rw-r--r--arch/hexagon/include/asm/perf_event.h2
-rw-r--r--arch/powerpc/include/asm/perf_event_server.h2
-rw-r--r--arch/powerpc/kernel/perf_event.c6
-rw-r--r--arch/s390/include/asm/perf_event.h1
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/include/asm/inat.h5
-rw-r--r--arch/x86/include/asm/insn.h18
-rw-r--r--arch/x86/include/asm/perf_event.h2
-rw-r--r--arch/x86/include/asm/uprobes.h43
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/amd.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event.c82
-rw-r--r--arch/x86/kernel/cpu/perf_event.h8
-rw-r--r--arch/x86/kernel/uprobes.c423
-rw-r--r--arch/x86/lib/inat.c36
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--include/linux/jump_label.h32
-rw-r--r--include/linux/perf_event.h11
-rw-r--r--include/linux/uprobes.h76
-rw-r--r--include/trace/events/signal.h85
-rw-r--r--kernel/events/Makefile3
-rw-r--r--kernel/events/core.c53
-rw-r--r--kernel/events/hw_breakpoint.c7
-rw-r--r--kernel/events/uprobes.c1029
-rw-r--r--kernel/signal.c28
-rw-r--r--kernel/watchdog.c24
-rw-r--r--lib/Kconfig.debug18
-rw-r--r--mm/mmap.c23
-rw-r--r--tools/perf/Documentation/Makefile86
-rw-r--r--tools/perf/Documentation/perf-lock.txt20
-rw-r--r--tools/perf/Documentation/perf-record.txt8
-rw-r--r--tools/perf/Documentation/perf-script.txt5
-rw-r--r--tools/perf/Documentation/perf-stat.txt4
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile12
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h8
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S6
-rw-r--r--tools/perf/bench/mem-memcpy.c12
-rw-r--r--tools/perf/bench/mem-memset-arch.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S13
-rw-r--r--tools/perf/bench/mem-memset.c297
-rw-r--r--tools/perf/builtin-bench.c3
-rw-r--r--tools/perf/builtin-lock.c4
-rw-r--r--tools/perf/builtin-probe.c12
-rw-r--r--tools/perf/builtin-record.c86
-rw-r--r--tools/perf/builtin-script.c80
-rw-r--r--tools/perf/builtin-stat.c41
-rw-r--r--tools/perf/builtin-test.c188
-rw-r--r--tools/perf/builtin-top.c64
-rw-r--r--tools/perf/perf.h9
-rwxr-xr-xtools/perf/python/twatch.py2
-rw-r--r--tools/perf/util/bitmap.c10
-rw-r--r--tools/perf/util/cpumap.c11
-rw-r--r--tools/perf/util/cpumap.h4
-rw-r--r--tools/perf/util/ctype.c2
-rw-r--r--tools/perf/util/debugfs.c141
-rw-r--r--tools/perf/util/debugfs.h6
-rw-r--r--tools/perf/util/evlist.c16
-rw-r--r--tools/perf/util/evlist.h9
-rw-r--r--tools/perf/util/evsel.c8
-rw-r--r--tools/perf/util/header.c421
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.h2
-rw-r--r--tools/perf/util/include/asm/dwarf2.h4
-rw-r--r--tools/perf/util/include/linux/bitmap.h11
-rw-r--r--tools/perf/util/map.c15
-rw-r--r--tools/perf/util/map.h1
-rw-r--r--tools/perf/util/probe-event.c32
-rw-r--r--tools/perf/util/probe-finder.c1
-rw-r--r--tools/perf/util/python-ext-sources19
-rw-r--r--tools/perf/util/python.c10
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c1
-rw-r--r--tools/perf/util/session.c49
-rw-r--r--tools/perf/util/session.h2
-rw-r--r--tools/perf/util/setup.py8
-rw-r--r--tools/perf/util/symbol.c24
-rw-r--r--tools/perf/util/symbol.h4
-rw-r--r--tools/perf/util/sysfs.c60
-rw-r--r--tools/perf/util/sysfs.h6
-rw-r--r--tools/perf/util/thread_map.c237
-rw-r--r--tools/perf/util/thread_map.h11
-rw-r--r--tools/perf/util/top.c13
-rw-r--r--tools/perf/util/top.h7
-rw-r--r--tools/perf/util/trace-event-parse.c1
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/perf/util/trace-event-scripting.c1
-rw-r--r--tools/perf/util/ui/browsers/hists.c3
-rw-r--r--tools/perf/util/ui/browsers/map.c2
-rw-r--r--tools/perf/util/usage.c39
-rw-r--r--tools/perf/util/util.c4
-rw-r--r--tools/perf/util/util.h6
99 files changed, 3601 insertions, 804 deletions
diff --git a/Documentation/lockup-watchdogs.txt b/Documentation/lockup-watchdogs.txt
new file mode 100644
index 000000000000..d2a36602ca8d
--- /dev/null
+++ b/Documentation/lockup-watchdogs.txt
@@ -0,0 +1,63 @@
+===============================================================
+Softlockup detector and hardlockup detector (aka nmi_watchdog)
+===============================================================
+
+The Linux kernel can act as a watchdog to detect both soft and hard
+lockups.
+
+A 'softlockup' is defined as a bug that causes the kernel to loop in
+kernel mode for more than 20 seconds (see "Implementation" below for
+details), without giving other tasks a chance to run. The current
+stack trace is displayed upon detection and, by default, the system
+will stay locked up. Alternatively, the kernel can be configured to
+panic; a sysctl, "kernel.softlockup_panic", a kernel parameter,
+"softlockup_panic" (see "Documentation/kernel-parameters.txt" for
+details), and a compile option, "BOOTPARAM_HARDLOCKUP_PANIC", are
+provided for this.
+
+A 'hardlockup' is defined as a bug that causes the CPU to loop in
+kernel mode for more than 10 seconds (see "Implementation" below for
+details), without letting other interrupts have a chance to run.
+Similarly to the softlockup case, the current stack trace is displayed
+upon detection and the system will stay locked up unless the default
+behavior is changed, which can be done through a compile time knob,
+"BOOTPARAM_HARDLOCKUP_PANIC", and a kernel parameter, "nmi_watchdog"
+(see "Documentation/kernel-parameters.txt" for details).
+
+The panic option can be used in combination with panic_timeout (this
+timeout is set through the confusingly named "kernel.panic" sysctl),
+to cause the system to reboot automatically after a specified amount
+of time.
+
+=== Implementation ===
+
+The soft and hard lockup detectors are built on top of the hrtimer and
+perf subsystems, respectively. A direct consequence of this is that,
+in principle, they should work in any architecture where these
+subsystems are present.
+
+A periodic hrtimer runs to generate interrupts and kick the watchdog
+task. An NMI perf event is generated every "watchdog_thresh"
+(compile-time initialized to 10 and configurable through sysctl of the
+same name) seconds to check for hardlockups. If any CPU in the system
+does not receive any hrtimer interrupt during that time the
+'hardlockup detector' (the handler for the NMI perf event) will
+generate a kernel warning or call panic, depending on the
+configuration.
+
+The watchdog task is a high priority kernel thread that updates a
+timestamp every time it is scheduled. If that timestamp is not updated
+for 2*watchdog_thresh seconds (the softlockup threshold) the
+'softlockup detector' (coded inside the hrtimer callback function)
+will dump useful debug information to the system log, after which it
+will call panic if it was instructed to do so or resume execution of
+other kernel code.
+
+The period of the hrtimer is 2*watchdog_thresh/5, which means it has
+two or three chances to generate an interrupt before the hardlockup
+detector kicks in.
+
+As explained above, a kernel knob is provided that allows
+administrators to configure the period of the hrtimer and the perf
+event. The right value for a particular environment is a trade-off
+between fast response to lockups and detection overhead.
diff --git a/Documentation/nmi_watchdog.txt b/Documentation/nmi_watchdog.txt
deleted file mode 100644
index bf9f80a98282..000000000000
--- a/Documentation/nmi_watchdog.txt
+++ /dev/null
@@ -1,83 +0,0 @@
-
-[NMI watchdog is available for x86 and x86-64 architectures]
-
-Is your system locking up unpredictably? No keyboard activity, just
-a frustrating complete hard lockup? Do you want to help us debugging
-such lockups? If all yes then this document is definitely for you.
-
-On many x86/x86-64 type hardware there is a feature that enables
-us to generate 'watchdog NMI interrupts'. (NMI: Non Maskable Interrupt
-which get executed even if the system is otherwise locked up hard).
-This can be used to debug hard kernel lockups. By executing periodic
-NMI interrupts, the kernel can monitor whether any CPU has locked up,
-and print out debugging messages if so.
-
-In order to use the NMI watchdog, you need to have APIC support in your
-kernel. For SMP kernels, APIC support gets compiled in automatically. For
-UP, enable either CONFIG_X86_UP_APIC (Processor type and features -> Local
-APIC support on uniprocessors) or CONFIG_X86_UP_IOAPIC (Processor type and
-features -> IO-APIC support on uniprocessors) in your kernel config.
-CONFIG_X86_UP_APIC is for uniprocessor machines without an IO-APIC.
-CONFIG_X86_UP_IOAPIC is for uniprocessor with an IO-APIC. [Note: certain
-kernel debugging options, such as Kernel Stack Meter or Kernel Tracer,
-may implicitly disable the NMI watchdog.]
-
-For x86-64, the needed APIC is always compiled in.
-
-Using local APIC (nmi_watchdog=2) needs the first performance register, so
-you can't use it for other purposes (such as high precision performance
-profiling.) However, at least oprofile and the perfctr driver disable the
-local APIC NMI watchdog automatically.
-
-To actually enable the NMI watchdog, use the 'nmi_watchdog=N' boot
-parameter. Eg. the relevant lilo.conf entry:
-
- append="nmi_watchdog=1"
-
-For SMP machines and UP machines with an IO-APIC use nmi_watchdog=1.
-For UP machines without an IO-APIC use nmi_watchdog=2, this only works
-for some processor types. If in doubt, boot with nmi_watchdog=1 and
-check the NMI count in /proc/interrupts; if the count is zero then
-reboot with nmi_watchdog=2 and check the NMI count. If it is still
-zero then log a problem, you probably have a processor that needs to be
-added to the nmi code.
-
-A 'lockup' is the following scenario: if any CPU in the system does not
-execute the period local timer interrupt for more than 5 seconds, then
-the NMI handler generates an oops and kills the process. This
-'controlled crash' (and the resulting kernel messages) can be used to
-debug the lockup. Thus whenever the lockup happens, wait 5 seconds and
-the oops will show up automatically. If the kernel produces no messages
-then the system has crashed so hard (eg. hardware-wise) that either it
-cannot even accept NMI interrupts, or the crash has made the kernel
-unable to print messages.
-
-Be aware that when using local APIC, the frequency of NMI interrupts
-it generates, depends on the system load. The local APIC NMI watchdog,
-lacking a better source, uses the "cycles unhalted" event. As you may
-guess it doesn't tick when the CPU is in the halted state (which happens
-when the system is idle), but if your system locks up on anything but the
-"hlt" processor instruction, the watchdog will trigger very soon as the
-"cycles unhalted" event will happen every clock tick. If it locks up on
-"hlt", then you are out of luck -- the event will not happen at all and the
-watchdog won't trigger. This is a shortcoming of the local APIC watchdog
--- unfortunately there is no "clock ticks" event that would work all the
-time. The I/O APIC watchdog is driven externally and has no such shortcoming.
-But its NMI frequency is much higher, resulting in a more significant hit
-to the overall system performance.
-
-On x86 nmi_watchdog is disabled by default so you have to enable it with
-a boot time parameter.
-
-It's possible to disable the NMI watchdog in run-time by writing "0" to
-/proc/sys/kernel/nmi_watchdog. Writing "1" to the same file will re-enable
-the NMI watchdog. Notice that you still need to use "nmi_watchdog=" parameter
-at boot time.
-
-NOTE: In kernels prior to 2.4.2-ac18 the NMI-oopser is enabled unconditionally
-on x86 SMP boxes.
-
-[ feel free to send bug reports, suggestions and patches to
- Ingo Molnar <mingo@redhat.com> or the Linux SMP mailing
- list at <linux-smp@vger.kernel.org> ]
-
diff --git a/arch/Kconfig b/arch/Kconfig
index 4f55c736be11..d0e37c9d5f6b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -65,6 +65,23 @@ config OPTPROBES
depends on KPROBES && HAVE_OPTPROBES
depends on !PREEMPT
+config UPROBES
+ bool "Transparent user-space probes (EXPERIMENTAL)"
+ depends on ARCH_SUPPORTS_UPROBES && PERF_EVENTS
+ default n
+ help
+ Uprobes is the user-space counterpart to kprobes: they
+ enable instrumentation applications (such as 'perf probe')
+ to establish unintrusive probes in user-space binaries and
+ libraries, by executing handler functions when the probes
+ are hit by user-space applications.
+
+ ( These probes come in the form of single-byte breakpoints,
+ managed by the kernel and kept transparent to the probed
+ application. )
+
+ If in doubt, say "N".
+
config HAVE_EFFICIENT_UNALIGNED_ACCESS
bool
help
diff --git a/arch/arm/include/asm/perf_event.h b/arch/arm/include/asm/perf_event.h
index 99cfe3607989..7523340afb8a 100644
--- a/arch/arm/include/asm/perf_event.h
+++ b/arch/arm/include/asm/perf_event.h
@@ -12,10 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
-/* ARM performance counters start from 1 (in the cp15 accesses) so use the
- * same indexes here for consistency. */
-#define PERF_EVENT_INDEX_OFFSET 1
-
/* ARM perf PMU IDs for use by internal perf clients. */
enum arm_perf_pmu_ids {
ARM_PERF_PMU_ID_XSCALE1 = 0,
diff --git a/arch/frv/include/asm/perf_event.h b/arch/frv/include/asm/perf_event.h
index a69e0155d146..c52ea5546b5b 100644
--- a/arch/frv/include/asm/perf_event.h
+++ b/arch/frv/include/asm/perf_event.h
@@ -12,6 +12,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/hexagon/include/asm/perf_event.h b/arch/hexagon/include/asm/perf_event.h
index 6c2910f91180..8b8526b491c7 100644
--- a/arch/hexagon/include/asm/perf_event.h
+++ b/arch/hexagon/include/asm/perf_event.h
@@ -19,6 +19,4 @@
#ifndef _ASM_PERF_EVENT_H
#define _ASM_PERF_EVENT_H
-#define PERF_EVENT_INDEX_OFFSET 0
-
#endif /* _ASM_PERF_EVENT_H */
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 8f1df1208d23..1a8093fa8f71 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -61,8 +61,6 @@ struct pt_regs;
extern unsigned long perf_misc_flags(struct pt_regs *regs);
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
-#define PERF_EVENT_INDEX_OFFSET 1
-
/*
* Only override the default definitions in include/linux/perf_event.h
* if we have hardware PMU support.
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 64483fde95c6..f04c2301725e 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -1193,6 +1193,11 @@ static int power_pmu_event_init(struct perf_event *event)
return err;
}
+static int power_pmu_event_idx(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
struct pmu power_pmu = {
.pmu_enable = power_pmu_enable,
.pmu_disable = power_pmu_disable,
@@ -1205,6 +1210,7 @@ struct pmu power_pmu = {
.start_txn = power_pmu_start_txn,
.cancel_txn = power_pmu_cancel_txn,
.commit_txn = power_pmu_commit_txn,
+ .event_idx = power_pmu_event_idx,
};
/*
diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h
index a75f168d2718..4eb444edbe49 100644
--- a/arch/s390/include/asm/perf_event.h
+++ b/arch/s390/include/asm/perf_event.h
@@ -6,4 +6,3 @@
/* Empty, just to avoid compiling error */
-#define PERF_EVENT_INDEX_OFFSET 0
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e2b38b4bffdc..d2a540f7d6cb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -84,7 +84,7 @@ config X86
select GENERIC_IOMAP
config INSTRUCTION_DECODER
- def_bool (KPROBES || PERF_EVENTS)
+ def_bool (KPROBES || PERF_EVENTS || UPROBES)
config OUTPUT_FORMAT
string
@@ -240,6 +240,9 @@ config ARCH_CPU_PROBE_RELEASE
def_bool y
depends on HOTPLUG_CPU
+config ARCH_SUPPORTS_UPROBES
+ def_bool y
+
source "init/Kconfig"
source "kernel/Kconfig.freezer"
diff --git a/arch/x86/include/asm/inat.h b/arch/x86/include/asm/inat.h
index 205b063e3e32..74a2e312e8a2 100644
--- a/arch/x86/include/asm/inat.h
+++ b/arch/x86/include/asm/inat.h
@@ -97,11 +97,12 @@
/* Attribute search APIs */
extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
- insn_byte_t last_pfx,
+ int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
- insn_byte_t last_pfx,
+ int lpfx_id,
insn_attr_t esc_attr);
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 74df3f1eddfd..48eb30a86062 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -96,12 +96,6 @@ struct insn {
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
-/* The last prefix is needed for two-byte and three-byte opcodes */
-static inline insn_byte_t insn_last_prefix(struct insn *insn)
-{
- return insn->prefixes.bytes[3];
-}
-
extern void insn_init(struct insn *insn, const void *kaddr, int x86_64);
extern void insn_get_prefixes(struct insn *insn);
extern void insn_get_opcode(struct insn *insn);
@@ -160,6 +154,18 @@ static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
return X86_VEX_P(insn->vex_prefix.bytes[2]);
}
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
/* Offset of each field from kaddr */
static inline int insn_offset_rex_prefix(struct insn *insn)
{
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 461ce432b1c2..e8fb2c7a5f4f 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -188,8 +188,6 @@ extern u32 get_ibs_caps(void);
#ifdef CONFIG_PERF_EVENTS
extern void perf_events_lapic_init(void);
-#define PERF_EVENT_INDEX_OFFSET 0
-
/*
* Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
* This flag is otherwise unused and ABI specified to be 0, so nobody should
diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
new file mode 100644
index 000000000000..0500391f57d0
--- /dev/null
+++ b/arch/x86/include/asm/uprobes.h
@@ -0,0 +1,43 @@
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+/*
+ * User-space Probes (UProbes) for x86
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2008-2011
+ * Authors:
+ * Srikar Dronamraju
+ * Jim Keniston
+ */
+
+typedef u8 uprobe_opcode_t;
+
+#define MAX_UINSN_BYTES 16
+#define UPROBE_XOL_SLOT_BYTES 128 /* to keep it cache aligned */
+
+#define UPROBE_SWBP_INSN 0xcc
+#define UPROBE_SWBP_INSN_SIZE 1
+
+struct arch_uprobe {
+ u16 fixups;
+ u8 insn[MAX_UINSN_BYTES];
+#ifdef CONFIG_X86_64
+ unsigned long rip_rela_target_address;
+#endif
+};
+
+extern int arch_uprobes_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm);
+#endif /* _ASM_UPROBES_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5369059c07a9..8c8c365a3bc3 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
+obj-$(CONFIG_UPROBES) += uprobes.o
###
# 64 bit specific files
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index f4773f4aae35..0a44b90602b0 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -5,6 +5,7 @@
#include <linux/mm.h>
#include <linux/io.h>
+#include <linux/sched.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
@@ -456,6 +457,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
if (c->x86_power & (1 << 8)) {
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
+ if (!check_tsc_unstable())
+ sched_clock_stable = 1;
}
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 63c0e058a405..1c52bdbb9b8b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -24,12 +24,14 @@
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/bitops.h>
+#include <linux/device.h>
#include <asm/apic.h>
#include <asm/stacktrace.h>
#include <asm/nmi.h>
#include <asm/smp.h>
#include <asm/alternative.h>
+#include <asm/timer.h>
#include "perf_event.h"
@@ -1209,6 +1211,8 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
case CPU_STARTING:
+ if (x86_pmu.attr_rdpmc)
+ set_in_cr4(X86_CR4_PCE);
if (x86_pmu.cpu_starting)
x86_pmu.cpu_starting(cpu);
break;
@@ -1318,6 +1322,8 @@ static int __init init_hw_perf_events(void)
}
}
+ x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */
+
pr_info("... version: %d\n", x86_pmu.version);
pr_info("... bit width: %d\n", x86_pmu.cntval_bits);
pr_info("... generic registers: %d\n", x86_pmu.num_counters);
@@ -1541,10 +1547,71 @@ static int x86_pmu_event_init(struct perf_event *event)
return err;
}
+static int x86_pmu_event_idx(struct perf_event *event)
+{
+ int idx = event->hw.idx;
+
+ if (x86_pmu.num_counters_fixed && idx >= X86_PMC_IDX_FIXED) {
+ idx -= X86_PMC_IDX_FIXED;
+ idx |= 1 << 30;
+ }
+
+ return idx + 1;
+}
+
+static ssize_t get_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", x86_pmu.attr_rdpmc);
+}
+
+static void change_rdpmc(void *info)
+{
+ bool enable = !!(unsigned long)info;
+
+ if (enable)
+ set_in_cr4(X86_CR4_PCE);
+ else
+ clear_in_cr4(X86_CR4_PCE);
+}
+
+static ssize_t set_attr_rdpmc(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long val = simple_strtoul(buf, NULL, 0);
+
+ if (!!val != !!x86_pmu.attr_rdpmc) {
+ x86_pmu.attr_rdpmc = !!val;
+ smp_call_function(change_rdpmc, (void *)val, 1);
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR(rdpmc, S_IRUSR | S_IWUSR, get_attr_rdpmc, set_attr_rdpmc);
+
+static struct attribute *x86_pmu_attrs[] = {
+ &dev_attr_rdpmc.attr,
+ NULL,
+};
+
+static struct attribute_group x86_pmu_attr_group = {
+ .attrs = x86_pmu_attrs,
+};
+
+static const struct attribute_group *x86_pmu_attr_groups[] = {
+ &x86_pmu_attr_group,
+ NULL,
+};
+
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
+ .attr_groups = x86_pmu_attr_groups,
+
.event_init = x86_pmu_event_init,
.add = x86_pmu_add,
@@ -1556,8 +1623,23 @@ static struct pmu pmu = {
.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,
+
+ .event_idx = x86_pmu_event_idx,
};
+void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+ return;
+
+ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
+ return;
+
+ userpg->time_mult = this_cpu_read(cyc2ns);
+ userpg->time_shift = CYC2NS_SCALE_FACTOR;
+ userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
+}
+
/*
* callchain support
*/
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index c30c807ddc72..82db83b5c3bc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -309,6 +309,14 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
+ /*
+ * sysfs attrs
+ */
+ int attr_rdpmc;
+
+ /*
+ * CPU Hotplug hooks
+ */
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
new file mode 100644
index 000000000000..851a11b0d38c
--- /dev/null
+++ b/arch/x86/kernel/uprobes.c
@@ -0,0 +1,423 @@
+/*
+ * User-space Probes (UProbes) for x86
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2008-2011
+ * Authors:
+ * Srikar Dronamraju
+ * Jim Keniston
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+
+#include <linux/kdebug.h>
+#include <asm/insn.h>
+
+/* Post-execution fixups. */
+
+/* No fixup needed */
+#define UPROBE_FIX_NONE 0x0
+/* Adjust IP back to vicinity of actual insn */
+#define UPROBE_FIX_IP 0x1
+/* Adjust the return address of a call insn */
+#define UPROBE_FIX_CALL 0x2
+
+#define UPROBE_FIX_RIP_AX 0x8000
+#define UPROBE_FIX_RIP_CX 0x4000
+
+/* Adaptations for mhiramat x86 decoder v14. */
+#define OPCODE1(insn) ((insn)->opcode.bytes[0])
+#define OPCODE2(insn) ((insn)->opcode.bytes[1])
+#define OPCODE3(insn) ((insn)->opcode.bytes[2])
+#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
+
+#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
+ (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
+ (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
+ (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
+ (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
+ << (row % 32))
+
+/*
+ * Good-instruction tables for 32-bit apps. This is non-const and volatile
+ * to keep gcc from statically optimizing it out, as variable_test_bit makes
+ * some versions of gcc to think only *(unsigned long*) is used.
+ */
+static volatile u32 good_insns_32[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ---------------------------------------------- */
+ W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
+ W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
+ W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
+ W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
+ W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
+ W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+ W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
+ W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+ W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+ W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
+ W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+ W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+ W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
+ W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
+ /* ---------------------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+/* Using this for both 64-bit and 32-bit apps */
+static volatile u32 good_2byte_insns[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ---------------------------------------------- */
+ W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
+ W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
+ W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
+ W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
+ W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
+ W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
+ W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
+ W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
+ W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+ W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+ W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
+ W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+ W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
+ W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
+ W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
+ /* ---------------------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+
+#ifdef CONFIG_X86_64
+/* Good-instruction tables for 64-bit apps */
+static volatile u32 good_insns_64[256 / 32] = {
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+ /* ---------------------------------------------- */
+ W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
+ W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
+ W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
+ W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
+ W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
+ W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
+ W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
+ W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
+ W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
+ W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
+ W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
+ W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
+ W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
+ W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
+ W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
+ W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
+ /* ---------------------------------------------- */
+ /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
+};
+#endif
+#undef W
+
+/*
+ * opcodes we'll probably never support:
+ *
+ * 6c-6d, e4-e5, ec-ed - in
+ * 6e-6f, e6-e7, ee-ef - out
+ * cc, cd - int3, int
+ * cf - iret
+ * d6 - illegal instruction
+ * f1 - int1/icebp
+ * f4 - hlt
+ * fa, fb - cli, sti
+ * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
+ *
+ * invalid opcodes in 64-bit mode:
+ *
+ * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
+ * 63 - we support this opcode in x86_64 but not in i386.
+ *
+ * opcodes we may need to refine support for:
+ *
+ * 0f - 2-byte instructions: For many of these instructions, the validity
+ * depends on the prefix and/or the reg field. On such instructions, we
+ * just consider the opcode combination valid if it corresponds to any
+ * valid instruction.
+ *
+ * 8f - Group 1 - only reg = 0 is OK
+ * c6-c7 - Group 11 - only reg = 0 is OK
+ * d9-df - fpu insns with some illegal encodings
+ * f2, f3 - repnz, repz prefixes. These are also the first byte for
+ * certain floating-point instructions, such as addsd.
+ *
+ * fe - Group 4 - only reg = 0 or 1 is OK
+ * ff - Group 5 - only reg = 0-6 is OK
+ *
+ * others -- Do we need to support these?
+ *
+ * 0f - (floating-point?) prefetch instructions
+ * 07, 17, 1f - pop es, pop ss, pop ds
+ * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
+ * but 64 and 65 (fs: and gs:) seem to be used, so we support them
+ * 67 - addr16 prefix
+ * ce - into
+ * f0 - lock prefix
+ */
+
+/*
+ * TODO:
+ * - Where necessary, examine the modrm byte and allow only valid instructions
+ * in the different Groups and fpu instructions.
+ */
+
+static bool is_prefix_bad(struct insn *insn)
+{
+ int i;
+
+ for (i = 0; i < insn->prefixes.nbytes; i++) {
+ switch (insn->prefixes.bytes[i]) {
+ case 0x26: /* INAT_PFX_ES */
+ case 0x2E: /* INAT_PFX_CS */
+ case 0x36: /* INAT_PFX_DS */
+ case 0x3E: /* INAT_PFX_SS */
+ case 0xF0: /* INAT_PFX_LOCK */
+ return true;
+ }
+ }
+ return false;
+}
+
+static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
+{
+ insn_init(insn, auprobe->insn, false);
+
+ /* Skip good instruction prefixes; reject "bad" ones. */
+ insn_get_opcode(insn);
+ if (is_prefix_bad(insn))
+ return -ENOTSUPP;
+
+ if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
+ return 0;
+
+ if (insn->opcode.nbytes == 2) {
+ if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
+ return 0;
+ }
+
+ return -ENOTSUPP;
+}
+
+/*
+ * Figure out which fixups post_xol() will need to perform, and annotate
+ * arch_uprobe->fixups accordingly. To start with,
+ * arch_uprobe->fixups is either zero or it reflects rip-related
+ * fixups.
+ */
+static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
+{
+ bool fix_ip = true, fix_call = false; /* defaults */
+ int reg;
+
+ insn_get_opcode(insn); /* should be a nop */
+
+ switch (OPCODE1(insn)) {
+ case 0xc3: /* ret/lret */
+ case 0xcb:
+ case 0xc2:
+ case 0xca:
+ /* ip is correct */
+ fix_ip = false;
+ break;
+ case 0xe8: /* call relative - Fix return addr */
+ fix_call = true;
+ break;
+ case 0x9a: /* call absolute - Fix return addr, not ip */
+ fix_call = true;
+ fix_ip = false;
+ break;
+ case 0xff:
+ insn_get_modrm(insn);
+ reg = MODRM_REG(insn);
+ if (reg == 2 || reg == 3) {
+ /* call or lcall, indirect */
+ /* Fix return addr; ip is correct. */
+ fix_call = true;
+ fix_ip = false;
+ } else if (reg == 4 || reg == 5) {
+ /* jmp or ljmp, indirect */
+ /* ip is correct. */
+ fix_ip = false;
+ }
+ break;
+ case 0xea: /* jmp absolute -- ip is correct */
+ fix_ip = false;
+ break;
+ default:
+ break;
+ }
+ if (fix_ip)
+ auprobe->fixups |= UPROBE_FIX_IP;
+ if (fix_call)
+ auprobe->fixups |= UPROBE_FIX_CALL;
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * If arch_uprobe->insn doesn't use rip-relative addressing, return
+ * immediately. Otherwise, rewrite the instruction so that it accesses
+ * its memory operand indirectly through a scratch register. Set
+ * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
+ * accordingly. (The contents of the scratch register will be saved
+ * before we single-step the modified instruction, and restored
+ * afterward.)
+ *
+ * We do this because a rip-relative instruction can access only a
+ * relatively small area (+/- 2 GB from the instruction), and the XOL
+ * area typically lies beyond that area. At least for instructions
+ * that store to memory, we can't execute the original instruction
+ * and "fix things up" later, because the misdirected store could be
+ * disastrous.
+ *
+ * Some useful facts about rip-relative instructions:
+ *
+ * - There's always a modrm byte.
+ * - There's never a SIB byte.
+ * - The displacement is always 4 bytes.
+ */
+static void
+handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+{
+ u8 *cursor;
+ u8 reg;
+
+ if (mm->context.ia32_compat)
+ return;
+
+ auprobe->rip_rela_target_address = 0x0;
+ if (!insn_rip_relative(insn))
+ return;
+
+ /*
+ * insn_rip_relative() would have decoded rex_prefix, modrm.
+ * Clear REX.b bit (extension of MODRM.rm field):
+ * we want to encode rax/rcx, not r8/r9.
+ */
+ if (insn->rex_prefix.nbytes) {
+ cursor = auprobe->insn + insn_offset_rex_prefix(insn);
+ *cursor &= 0xfe; /* Clearing REX.B bit */
+ }
+
+ /*
+ * Point cursor at the modrm byte. The next 4 bytes are the
+ * displacement. Beyond the displacement, for some instructions,
+ * is the immediate operand.
+ */
+ cursor = auprobe->insn + insn_offset_modrm(insn);
+ insn_get_length(insn);
+
+ /*
+ * Convert from rip-relative addressing to indirect addressing
+ * via a scratch register. Change the r/m field from 0x5 (%rip)
+ * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
+ */
+ reg = MODRM_REG(insn);
+ if (reg == 0) {
+ /*
+ * The register operand (if any) is either the A register
+ * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
+ * REX prefix) %r8. In any case, we know the C register
+ * is NOT the register operand, so we use %rcx (register
+ * #1) for the scratch register.
+ */
+ auprobe->fixups = UPROBE_FIX_RIP_CX;
+ /* Change modrm from 00 000 101 to 00 000 001. */
+ *cursor = 0x1;
+ } else {
+ /* Use %rax (register #0) for the scratch register. */
+ auprobe->fixups = UPROBE_FIX_RIP_AX;
+ /* Change modrm from 00 xxx 101 to 00 xxx 000 */
+ *cursor = (reg << 3);
+ }
+
+ /* Target address = address of next instruction + (signed) offset */
+ auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
+
+ /* Displacement field is gone; slide immediate field (if any) over. */
+ if (insn->immediate.nbytes) {
+ cursor++;
+ memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
+ }
+ return;
+}
+
+static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
+{
+ insn_init(insn, auprobe->insn, true);
+
+ /* Skip good instruction prefixes; reject "bad" ones. */
+ insn_get_opcode(insn);
+ if (is_prefix_bad(insn))
+ return -ENOTSUPP;
+
+ if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
+ return 0;
+
+ if (insn->opcode.nbytes == 2) {
+ if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
+ return 0;
+ }
+ return -ENOTSUPP;
+}
+
+static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+{
+ if (mm->context.ia32_compat)
+ return validate_insn_32bits(auprobe, insn);
+ return validate_insn_64bits(auprobe, insn);
+}
+#else /* 32-bit: */
+static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+{
+ /* No RIP-relative addressing on 32-bit */
+}
+
+static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
+{
+ return validate_insn_32bits(auprobe, insn);
+}
+#endif /* CONFIG_X86_64 */
+
+/**
+ * arch_uprobes_analyze_insn - instruction analysis including validity and fixups.
+ * @mm: the probed address space.
+ * @arch_uprobe: the probepoint information.
+ * Return 0 on success or a -ve number on error.
+ */
+int arch_uprobes_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm)
+{
+ int ret;
+ struct insn insn;
+
+ auprobe->fixups = 0;
+ ret = validate_insn_bits(auprobe, mm, &insn);
+ if (ret != 0)
+ return ret;
+
+ handle_riprel_insn(auprobe, mm, &insn);
+ prepare_fixups(auprobe, &insn);
+
+ return 0;
+}
diff --git a/arch/x86/lib/inat.c b/arch/x86/lib/inat.c
index 88ad5fbda6e1..c1f01a8e9f65 100644
--- a/arch/x86/lib/inat.c
+++ b/arch/x86/lib/inat.c
@@ -29,46 +29,46 @@ insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
return inat_primary_table[opcode];
}
-insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, insn_byte_t last_pfx,
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
insn_attr_t esc_attr)
{
const insn_attr_t *table;
- insn_attr_t lpfx_attr;
- int n, m = 0;
+ int n;
n = inat_escape_id(esc_attr);
- if (last_pfx) {
- lpfx_attr = inat_get_opcode_attribute(last_pfx);
- m = inat_last_prefix_id(lpfx_attr);
- }
+
table = inat_escape_tables[n][0];
if (!table)
return 0;
- if (inat_has_variant(table[opcode]) && m) {
- table = inat_escape_tables[n][m];
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
if (!table)
return 0;
}
return table[opcode];
}
-insn_attr_t inat_get_group_attribute(insn_byte_t modrm, insn_byte_t last_pfx,
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
insn_attr_t grp_attr)
{
const insn_attr_t *table;
- insn_attr_t lpfx_attr;
- int n, m = 0;
+ int n;
n = inat_group_id(grp_attr);
- if (last_pfx) {
- lpfx_attr = inat_get_opcode_attribute(last_pfx);
- m = inat_last_prefix_id(lpfx_attr);
- }
+
table = inat_group_tables[n][0];
if (!table)
return inat_group_common_attribute(grp_attr);
- if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && m) {
- table = inat_group_tables[n][m];
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
if (!table)
return inat_group_common_attribute(grp_attr);
}
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 5a1f9f3e3fbb..25feb1ae71c5 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -185,7 +185,8 @@ err_out:
void insn_get_opcode(struct insn *insn)
{
struct insn_field *opcode = &insn->opcode;
- insn_byte_t op, pfx;
+ insn_byte_t op;
+ int pfx_id;
if (opcode->got)
return;
if (!insn->prefixes.got)
@@ -212,8 +213,8 @@ void insn_get_opcode(struct insn *insn)
/* Get escaped opcode */
op = get_next(insn_byte_t, insn);
opcode->bytes[opcode->nbytes++] = op;
- pfx = insn_last_prefix(insn);
- insn->attr = inat_get_escape_attribute(op, pfx, insn->attr);
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
}
if (inat_must_vex(insn->attr))
insn->attr = 0; /* This instruction is bad */
@@ -235,7 +236,7 @@ err_out:
void insn_get_modrm(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
- insn_byte_t pfx, mod;
+ insn_byte_t pfx_id, mod;
if (modrm->got)
return;
if (!insn->opcode.got)
@@ -246,8 +247,8 @@ void insn_get_modrm(struct insn *insn)
modrm->value = mod;
modrm->nbytes = 1;
if (inat_is_group(insn->attr)) {
- pfx = insn_last_prefix(insn);
- insn->attr = inat_get_group_attribute(mod, pfx,
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
insn->attr = 0; /* This is bad */
diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 5ce8b140428f..f7c69580fea7 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -1,6 +1,38 @@
#ifndef _LINUX_JUMP_LABEL_H
#define _LINUX_JUMP_LABEL_H
+/*
+ * Jump label support
+ *
+ * Copyright (C) 2009-2012 Jason Baron <jbaron@redhat.com>
+ * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
+ *
+ * Jump labels provide an interface to generate dynamic branches using
+ * self-modifying code. Assuming toolchain and architecture support the result
+ * of a "if (static_branch(&key))" statement is a unconditional branch (which
+ * defaults to false - and the true block is placed out of line).
+ *
+ * However at runtime we can change the 'static' branch target using
+ * jump_label_{inc,dec}(). These function as a 'reference' count on the key
+ * object and for as long as there are references all branches referring to
+ * that particular key will point to the (out of line) true block.
+ *
+ * Since this relies on modifying code the jump_label_{inc,dec}() functions
+ * must be considered absolute slow paths (machine wide synchronization etc.).
+ * OTOH, since the affected branches are unconditional their runtime overhead
+ * will be absolutely minimal, esp. in the default (off) case where the total
+ * effect is a single NOP of appropriate size. The on case will patch in a jump
+ * to the out-of-line block.
+ *
+ * When the control is directly exposed to userspace it is prudent to delay the
+ * decrement to avoid high frequency code modifications which can (and do)
+ * cause significant performance degradation. Struct jump_label_key_deferred and
+ * jump_label_dec_deferred() provide for this.
+ *
+ * Lacking toolchain and or architecture support, it falls back to a simple
+ * conditional branch.
+ */
+
#include <linux/types.h>
#include <linux/compiler.h>
#include <linux/workqueue.h>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index abb2776be1ba..412b790f5da6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -291,12 +291,14 @@ struct perf_event_mmap_page {
__s64 offset; /* add to hardware event value */
__u64 time_enabled; /* time event active */
__u64 time_running; /* time event on cpu */
+ __u32 time_mult, time_shift;
+ __u64 time_offset;
/*
* Hole for extension of the self monitor capabilities
*/
- __u64 __reserved[123]; /* align to 1k */
+ __u64 __reserved[121]; /* align to 1k */
/*
* Control data for the mmap() data buffer.
@@ -616,6 +618,7 @@ struct pmu {
struct list_head entry;
struct device *dev;
+ const struct attribute_group **attr_groups;
char *name;
int type;
@@ -681,6 +684,12 @@ struct pmu {
* for each successful ->add() during the transaction.
*/
void (*cancel_txn) (struct pmu *pmu); /* optional */
+
+ /*
+ * Will return the value for perf_event_mmap_page::index for this event,
+ * if no implementation is provided it will default to: event->hw.idx + 1.
+ */
+ int (*event_idx) (struct perf_event *event); /*optional */
};
/**
diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
new file mode 100644
index 000000000000..eac525f41b94
--- /dev/null
+++ b/include/linux/uprobes.h
@@ -0,0 +1,76 @@
+#ifndef _LINUX_UPROBES_H
+#define _LINUX_UPROBES_H
+/*
+ * User-space Probes (UProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2008-2012
+ * Authors:
+ * Srikar Dronamraju
+ * Jim Keniston
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/rbtree.h>
+
+struct vm_area_struct;
+#ifdef CONFIG_ARCH_SUPPORTS_UPROBES
+#include <asm/uprobes.h>
+#endif
+
+/* flags that denote/change uprobes behaviour */
+
+/* Have a copy of original instruction */
+#define UPROBE_COPY_INSN 0x1
+
+/* Dont run handlers when first register/ last unregister in progress*/
+#define UPROBE_RUN_HANDLER 0x2
+
+struct uprobe_consumer {
+ int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
+ /*
+ * filter is optional; If a filter exists, handler is run
+ * if and only if filter returns true.
+ */
+ bool (*filter)(struct uprobe_consumer *self, struct task_struct *task);
+
+ struct uprobe_consumer *next;
+};
+
+#ifdef CONFIG_UPROBES
+extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr, bool verify);
+extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
+extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
+extern int uprobe_mmap(struct vm_area_struct *vma);
+#else /* CONFIG_UPROBES is not defined */
+static inline int
+uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
+{
+ return -ENOSYS;
+}
+static inline void
+uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
+{
+}
+static inline int uprobe_mmap(struct vm_area_struct *vma)
+{
+ return 0;
+}
+#endif /* CONFIG_UPROBES */
+#endif /* _LINUX_UPROBES_H */
diff --git a/include/trace/events/signal.h b/include/trace/events/signal.h
index 17df43464df0..39a8a430d90f 100644
--- a/include/trace/events/signal.h
+++ b/include/trace/events/signal.h
@@ -23,11 +23,23 @@
} \
} while (0)
+#ifndef TRACE_HEADER_MULTI_READ
+enum {
+ TRACE_SIGNAL_DELIVERED,
+ TRACE_SIGNAL_IGNORED,
+ TRACE_SIGNAL_ALREADY_PENDING,
+ TRACE_SIGNAL_OVERFLOW_FAIL,
+ TRACE_SIGNAL_LOSE_INFO,
+};
+#endif
+
/**
* signal_generate - called when a signal is generated
* @sig: signal number
* @info: pointer to struct siginfo
* @task: pointer to struct task_struct
+ * @group: shared or private
+ * @result: TRACE_SIGNAL_*
*
* Current process sends a 'sig' signal to 'task' process with
* 'info' siginfo. If 'info' is SEND_SIG_NOINFO or SEND_SIG_PRIV,
@@ -37,9 +49,10 @@
*/
TRACE_EVENT(signal_generate,
- TP_PROTO(int sig, struct siginfo *info, struct task_struct *task),
+ TP_PROTO(int sig, struct siginfo *info, struct task_struct *task,
+ int group, int result),
- TP_ARGS(sig, info, task),
+ TP_ARGS(sig, info, task, group, result),
TP_STRUCT__entry(
__field( int, sig )
@@ -47,6 +60,8 @@ TRACE_EVENT(signal_generate,
__field( int, code )
__array( char, comm, TASK_COMM_LEN )
__field( pid_t, pid )
+ __field( int, group )
+ __field( int, result )
),
TP_fast_assign(
@@ -54,11 +69,14 @@ TRACE_EVENT(signal_generate,
TP_STORE_SIGINFO(__entry, info);
memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
__entry->pid = task->pid;
+ __entry->group = group;
+ __entry->result = result;
),
- TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d",
+ TP_printk("sig=%d errno=%d code=%d comm=%s pid=%d grp=%d res=%d",
__entry->sig, __entry->errno, __entry->code,
- __entry->comm, __entry->pid)
+ __entry->comm, __entry->pid, __entry->group,
+ __entry->result)
);
/**
@@ -101,65 +119,6 @@ TRACE_EVENT(signal_deliver,
__entry->sa_handler, __entry->sa_flags)
);
-DECLARE_EVENT_CLASS(signal_queue_overflow,
-
- TP_PROTO(int sig, int group, struct siginfo *info),
-
- TP_ARGS(sig, group, info),
-
- TP_STRUCT__entry(
- __field( int, sig )
- __field( int, group )
- __field( int, errno )
- __field( int, code )
- ),
-
- TP_fast_assign(
- __entry->sig = sig;
- __entry->group = group;
- TP_STORE_SIGINFO(__entry, info);
- ),
-
- TP_printk("sig=%d group=%d errno=%d code=%d",
- __entry->sig, __entry->group, __entry->errno, __entry->code)
-);
-
-/**
- * signal_overflow_fail - called when signal queue is overflow
- * @sig: signal number
- * @group: signal to process group or not (bool)
- * @info: pointer to struct siginfo
- *
- * Kernel fails to generate 'sig' signal with 'info' siginfo, because
- * siginfo queue is overflow, and the signal is dropped.
- * 'group' is not 0 if the signal will be sent to a process group.
- * 'sig' is always one of RT signals.
- */
-DEFINE_EVENT(signal_queue_overflow, signal_overflow_fail,
-
- TP_PROTO(int sig, int group, struct siginfo *info),
-
- TP_ARGS(sig, group, info)
-);
-
-/**
- * signal_lose_info - called when siginfo is lost
- * @sig: signal number
- * @group: signal to process group or not (bool)
- * @info: pointer to struct siginfo
- *
- * Kernel generates 'sig' signal but loses 'info' siginfo, because siginfo
- * queue is overflow.
- * 'group' is not 0 if the signal will be sent to a process group.
- * 'sig' is always one of non-RT signals.
- */
-DEFINE_EVENT(signal_queue_overflow, signal_lose_info,
-
- TP_PROTO(int sig, int group, struct siginfo *info),
-
- TP_ARGS(sig, group, info)
-);
-
#endif /* _TRACE_SIGNAL_H */
/* This part must be outside protection */
diff --git a/kernel/events/Makefile b/kernel/events/Makefile
index 22d901f9caf4..103f5d147b2f 100644
--- a/kernel/events/Makefile
+++ b/kernel/events/Makefile
@@ -3,4 +3,7 @@ CFLAGS_REMOVE_core.o = -pg
endif
obj-y := core.o ring_buffer.o callchain.o
+
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
+obj-$(CONFIG_UPROBES) += uprobes.o
+
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1b5c081d8b9f..94afe5b91c6a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3238,10 +3238,6 @@ int perf_event_task_disable(void)
return 0;
}
-#ifndef PERF_EVENT_INDEX_OFFSET
-# define PERF_EVENT_INDEX_OFFSET 0
-#endif
-
static int perf_event_index(struct perf_event *event)
{
if (event->hw.state & PERF_HES_STOPPED)
@@ -3250,21 +3246,26 @@ static int perf_event_index(struct perf_event *event)
if (event->state != PERF_EVENT_STATE_ACTIVE)
return 0;
- return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET;
+ return event->pmu->event_idx(event);
}
static void calc_timer_values(struct perf_event *event,
+ u64 *now,
u64 *enabled,
u64 *running)
{
- u64 now, ctx_time;
+ u64 ctx_time;
- now = perf_clock();
- ctx_time = event->shadow_ctx_time + now;
+ *now = perf_clock();
+ ctx_time = event->shadow_ctx_time + *now;
*enabled = ctx_time - event->tstamp_enabled;
*running = ctx_time - event->tstamp_running;
}
+void __weak perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
+{
+}
+
/*
* Callers need to ensure there can be no nesting of this function, otherwise
* the seqlock logic goes bad. We can not serialize this because the arch
@@ -3274,7 +3275,7 @@ void perf_event_update_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
struct ring_buffer *rb;
- u64 enabled, running;
+ u64 enabled, running, now;
rcu_read_lock();
/*
@@ -3286,7 +3287,7 @@ void perf_event_update_userpage(struct perf_event *event)
* because of locking issue as we can be called in
* NMI context
*/
- calc_timer_values(event, &enabled, &running);
+ calc_timer_values(event, &now, &enabled, &running);
rb = rcu_dereference(event->rb);
if (!rb)
goto unlock;
@@ -3302,7 +3303,7 @@ void perf_event_update_userpage(struct perf_event *event)
barrier();
userpg->index = perf_event_index(event);
userpg->offset = perf_event_count(event);
- if (event->state == PERF_EVENT_STATE_ACTIVE)
+ if (userpg->index)
userpg->offset -= local64_read(&event->hw.prev_count);
userpg->time_enabled = enabled +
@@ -3311,6 +3312,8 @@ void perf_event_update_userpage(struct perf_event *event)
userpg->time_running = running +
atomic64_read(&event->child_total_time_running);
+ perf_update_user_clock(userpg, now);
+
barrier();
++userpg->lock;
preempt_enable();
@@ -3568,6 +3571,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
event->mmap_user = get_current_user();
vma->vm_mm->pinned_vm += event->mmap_locked;
+ perf_event_update_userpage(event);
+
unlock:
if (!ret)
atomic_inc(&event->mmap_count);
@@ -3799,7 +3804,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
static void perf_output_read(struct perf_output_handle *handle,
struct perf_event *event)
{
- u64 enabled = 0, running = 0;
+ u64 enabled = 0, running = 0, now;
u64 read_format = event->attr.read_format;
/*
@@ -3812,7 +3817,7 @@ static void perf_output_read(struct perf_output_handle *handle,
* NMI context
*/
if (read_format & PERF_FORMAT_TOTAL_TIMES)
- calc_timer_values(event, &enabled, &running);
+ calc_timer_values(event, &now, &enabled, &running);
if (event->attr.read_format & PERF_FORMAT_GROUP)
perf_output_read_group(handle, event, enabled, running);
@@ -5031,6 +5036,11 @@ static int perf_swevent_init(struct perf_event *event)
return 0;
}
+static int perf_swevent_event_idx(struct perf_event *event)
+{
+ return 0;
+}
+
static struct pmu perf_swevent = {
.task_ctx_nr = perf_sw_context,
@@ -5040,6 +5050,8 @@ static struct pmu perf_swevent = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
+
+ .event_idx = perf_swevent_event_idx,
};
#ifdef CONFIG_EVENT_TRACING
@@ -5126,6 +5138,8 @@ static struct pmu perf_tracepoint = {
.start = perf_swevent_start,
.stop = perf_swevent_stop,
.read = perf_swevent_read,
+
+ .event_idx = perf_swevent_event_idx,
};
static inline void perf_tp_register(void)
@@ -5345,6 +5359,8 @@ static struct pmu perf_cpu_clock = {
.start = cpu_clock_event_start,
.stop = cpu_clock_event_stop,
.read = cpu_clock_event_read,
+
+ .event_idx = perf_swevent_event_idx,
};
/*
@@ -5417,6 +5433,8 @@ static struct pmu perf_task_clock = {
.start = task_clock_event_start,
.stop = task_clock_event_stop,
.read = task_clock_event_read,
+
+ .event_idx = perf_swevent_event_idx,
};
static void perf_pmu_nop_void(struct pmu *pmu)
@@ -5444,6 +5462,11 @@ static void perf_pmu_cancel_txn(struct pmu *pmu)
perf_pmu_enable(pmu);
}
+static int perf_event_idx_default(struct perf_event *event)
+{
+ return event->hw.idx + 1;
+}
+
/*
* Ensures all contexts with the same task_ctx_nr have the same
* pmu_cpu_context too.
@@ -5530,6 +5553,7 @@ static int pmu_dev_alloc(struct pmu *pmu)
if (!pmu->dev)
goto out;
+ pmu->dev->groups = pmu->attr_groups;
device_initialize(pmu->dev);
ret = dev_set_name(pmu->dev, "%s", pmu->name);
if (ret)
@@ -5633,6 +5657,9 @@ got_cpu_context:
pmu->pmu_disable = perf_pmu_nop_void;
}
+ if (!pmu->event_idx)
+ pmu->event_idx = perf_event_idx_default;
+
list_add_rcu(&pmu->entry, &pmus);
ret = 0;
unlock:
diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index ee706ce44aa0..3330022a7ac1 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -613,6 +613,11 @@ static void hw_breakpoint_stop(struct perf_event *bp, int flags)
bp->hw.state = PERF_HES_STOPPED;
}
+static int hw_breakpoint_event_idx(struct perf_event *bp)
+{
+ return 0;
+}
+
static struct pmu perf_breakpoint = {
.task_ctx_nr = perf_sw_context, /* could eventually get its own */
@@ -622,6 +627,8 @@ static struct pmu perf_breakpoint = {
.start = hw_breakpoint_start,
.stop = hw_breakpoint_stop,
.read = hw_breakpoint_pmu_read,
+
+ .event_idx = hw_breakpoint_event_idx,
};
int __init init_hw_breakpoint(void)
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
new file mode 100644
index 000000000000..e56e56aa7535
--- /dev/null
+++ b/kernel/events/uprobes.c
@@ -0,0 +1,1029 @@
+/*
+ * User-space Probes (UProbes)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2008-2012
+ * Authors:
+ * Srikar Dronamraju
+ * Jim Keniston
+ * Copyright (C) 2011-2012 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h> /* read_mapping_page */
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/rmap.h> /* anon_vma_prepare */
+#include <linux/mmu_notifier.h> /* set_pte_at_notify */
+#include <linux/swap.h> /* try_to_free_swap */
+
+#include <linux/uprobes.h>
+
+static struct rb_root uprobes_tree = RB_ROOT;
+
+static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */
+
+#define UPROBES_HASH_SZ 13
+
+/* serialize (un)register */
+static struct mutex uprobes_mutex[UPROBES_HASH_SZ];
+
+#define uprobes_hash(v) (&uprobes_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
+
+/* serialize uprobe->pending_list */
+static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
+#define uprobes_mmap_hash(v) (&uprobes_mmap_mutex[((unsigned long)(v)) % UPROBES_HASH_SZ])
+
+/*
+ * uprobe_events allows us to skip the uprobe_mmap if there are no uprobe
+ * events active at this time. Probably a fine grained per inode count is
+ * better?
+ */
+static atomic_t uprobe_events = ATOMIC_INIT(0);
+
+/*
+ * Maintain a temporary per vma info that can be used to search if a vma
+ * has already been handled. This structure is introduced since extending
+ * vm_area_struct wasnt recommended.
+ */
+struct vma_info {
+ struct list_head probe_list;
+ struct mm_struct *mm;
+ loff_t vaddr;
+};
+
+struct uprobe {
+ struct rb_node rb_node; /* node in the rb tree */
+ atomic_t ref;
+ struct rw_semaphore consumer_rwsem;
+ struct list_head pending_list;
+ struct uprobe_consumer *consumers;
+ struct inode *inode; /* Also hold a ref to inode */
+ loff_t offset;
+ int flags;
+ struct arch_uprobe arch;
+};
+
+/*
+ * valid_vma: Verify if the specified vma is an executable vma
+ * Relax restrictions while unregistering: vm_flags might have
+ * changed after breakpoint was inserted.
+ * - is_register: indicates if we are in register context.
+ * - Return 1 if the specified virtual address is in an
+ * executable vma.
+ */
+static bool valid_vma(struct vm_area_struct *vma, bool is_register)
+{
+ if (!vma->vm_file)
+ return false;
+
+ if (!is_register)
+ return true;
+
+ if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC))
+ return true;
+
+ return false;
+}
+
+static loff_t vma_address(struct vm_area_struct *vma, loff_t offset)
+{
+ loff_t vaddr;
+
+ vaddr = vma->vm_start + offset;
+ vaddr -= vma->vm_pgoff << PAGE_SHIFT;
+
+ return vaddr;
+}
+
+/**
+ * __replace_page - replace page in vma by new page.
+ * based on replace_page in mm/ksm.c
+ *
+ * @vma: vma that holds the pte pointing to page
+ * @page: the cowed page we are replacing by kpage
+ * @kpage: the modified page we replace page by
+ *
+ * Returns 0 on success, -EFAULT on failure.
+ */
+static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *ptep;
+ spinlock_t *ptl;
+ unsigned long addr;
+ int err = -EFAULT;
+
+ addr = page_address_in_vma(page, vma);
+ if (addr == -EFAULT)
+ goto out;
+
+ pgd = pgd_offset(mm, addr);
+ if (!pgd_present(*pgd))
+ goto out;
+
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud))
+ goto out;
+
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ goto out;
+
+ ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
+ if (!ptep)
+ goto out;
+
+ get_page(kpage);
+ page_add_new_anon_rmap(kpage, vma, addr);
+
+ flush_cache_page(vma, addr, pte_pfn(*ptep));
+ ptep_clear_flush(vma, addr, ptep);
+ set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
+
+ page_remove_rmap(page);
+ if (!page_mapped(page))
+ try_to_free_swap(page);
+ put_page(page);
+ pte_unmap_unlock(ptep, ptl);
+ err = 0;
+
+out:
+ return err;
+}
+
+/**
+ * is_swbp_insn - check if instruction is breakpoint instruction.
+ * @insn: instruction to be checked.
+ * Default implementation of is_swbp_insn
+ * Returns true if @insn is a breakpoint instruction.
+ */
+bool __weak is_swbp_insn(uprobe_opcode_t *insn)
+{
+ return *insn == UPROBE_SWBP_INSN;
+}
+
+/*
+ * NOTE:
+ * Expect the breakpoint instruction to be the smallest size instruction for
+ * the architecture. If an arch has variable length instruction and the
+ * breakpoint instruction is not of the smallest length instruction
+ * supported by that architecture then we need to modify read_opcode /
+ * write_opcode accordingly. This would never be a problem for archs that
+ * have fixed length instructions.
+ */
+
+/*
+ * write_opcode - write the opcode at a given virtual address.
+ * @auprobe: arch breakpointing information.
+ * @mm: the probed process address space.
+ * @vaddr: the virtual address to store the opcode.
+ * @opcode: opcode to be written at @vaddr.
+ *
+ * Called with mm->mmap_sem held (for read and with a reference to
+ * mm).
+ *
+ * For mm @mm, write the opcode at @vaddr.
+ * Return 0 (success) or a negative errno.
+ */
+static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long vaddr, uprobe_opcode_t opcode)
+{
+ struct page *old_page, *new_page;
+ struct address_space *mapping;
+ void *vaddr_old, *vaddr_new;
+ struct vm_area_struct *vma;
+ struct uprobe *uprobe;
+ loff_t addr;
+ int ret;
+
+ /* Read the page with vaddr into memory */
+ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma);
+ if (ret <= 0)
+ return ret;
+
+ ret = -EINVAL;
+
+ /*
+ * We are interested in text pages only. Our pages of interest
+ * should be mapped for read and execute only. We desist from
+ * adding probes in write mapped pages since the breakpoints
+ * might end up in the file copy.
+ */
+ if (!valid_vma(vma, is_swbp_insn(&opcode)))
+ goto put_out;
+
+ uprobe = container_of(auprobe, struct uprobe, arch);
+ mapping = uprobe->inode->i_mapping;
+ if (mapping != vma->vm_file->f_mapping)
+ goto put_out;
+
+ addr = vma_address(vma, uprobe->offset);
+ if (vaddr != (unsigned long)addr)
+ goto put_out;
+
+ ret = -ENOMEM;
+ new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr);
+ if (!new_page)
+ goto put_out;
+
+ __SetPageUptodate(new_page);
+
+ /*
+ * lock page will serialize against do_wp_page()'s
+ * PageAnon() handling
+ */
+ lock_page(old_page);
+ /* copy the page now that we've got it stable */
+ vaddr_old = kmap_atomic(old_page);
+ vaddr_new = kmap_atomic(new_page);
+
+ memcpy(vaddr_new, vaddr_old, PAGE_SIZE);
+
+ /* poke the new insn in, ASSUMES we don't cross page boundary */
+ vaddr &= ~PAGE_MASK;
+ BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE);
+ memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE);
+
+ kunmap_atomic(vaddr_new);
+ kunmap_atomic(vaddr_old);
+
+ ret = anon_vma_prepare(vma);
+ if (ret)
+ goto unlock_out;
+
+ lock_page(new_page);
+ ret = __replace_page(vma, old_page, new_page);
+ unlock_page(new_page);
+
+unlock_out:
+ unlock_page(old_page);
+ page_cache_release(new_page);
+
+put_out:
+ put_page(old_page);
+
+ return ret;
+}
+
+/**
+ * read_opcode - read the opcode at a given virtual address.
+ * @mm: the probed process address space.
+ * @vaddr: the virtual address to read the opcode.
+ * @opcode: location to store the read opcode.
+ *
+ * Called with mm->mmap_sem held (for read and with a reference to
+ * mm.
+ *
+ * For mm @mm, read the opcode at @vaddr and store it in @opcode.
+ * Return 0 (success) or a negative errno.
+ */
+static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t *opcode)
+{
+ struct page *page;
+ void *vaddr_new;
+ int ret;
+
+ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL);
+ if (ret <= 0)
+ return ret;
+
+ lock_page(page);
+ vaddr_new = kmap_atomic(page);
+ vaddr &= ~PAGE_MASK;
+ memcpy(opcode, vaddr_new + vaddr, UPROBE_SWBP_INSN_SIZE);
+ kunmap_atomic(vaddr_new);
+ unlock_page(page);
+
+ put_page(page);
+
+ return 0;
+}
+
+static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr)
+{
+ uprobe_opcode_t opcode;
+ int result;
+
+ result = read_opcode(mm, vaddr, &opcode);
+ if (result)
+ return result;
+
+ if (is_swbp_insn(&opcode))
+ return 1;
+
+ return 0;
+}
+
+/**
+ * set_swbp - store breakpoint at a given address.
+ * @auprobe: arch specific probepoint information.
+ * @mm: the probed process address space.
+ * @vaddr: the virtual address to insert the opcode.
+ *
+ * For mm @mm, store the breakpoint instruction at @vaddr.
+ * Return 0 (success) or a negative errno.
+ */
+int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr)
+{
+ int result;
+
+ result = is_swbp_at_addr(mm, vaddr);
+ if (result == 1)
+ return -EEXIST;
+
+ if (result)
+ return result;
+
+ return write_opcode(auprobe, mm, vaddr, UPROBE_SWBP_INSN);
+}
+
+/**
+ * set_orig_insn - Restore the original instruction.
+ * @mm: the probed process address space.
+ * @auprobe: arch specific probepoint information.
+ * @vaddr: the virtual address to insert the opcode.
+ * @verify: if true, verify existance of breakpoint instruction.
+ *
+ * For mm @mm, restore the original opcode (opcode) at @vaddr.
+ * Return 0 (success) or a negative errno.
+ */
+int __weak
+set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr, bool verify)
+{
+ if (verify) {
+ int result;
+
+ result = is_swbp_at_addr(mm, vaddr);
+ if (!result)
+ return -EINVAL;
+
+ if (result != 1)
+ return result;
+ }
+ return write_opcode(auprobe, mm, vaddr, *(uprobe_opcode_t *)auprobe->insn);
+}
+
+static int match_uprobe(struct uprobe *l, struct uprobe *r)
+{
+ if (l->inode < r->inode)
+ return -1;
+
+ if (l->inode > r->inode)
+ return 1;
+
+ if (l->offset < r->offset)
+ return -1;
+
+ if (l->offset > r->offset)
+ return 1;
+
+ return 0;
+}
+
+static struct uprobe *__find_uprobe(struct inode *inode, loff_t offset)
+{
+ struct uprobe u = { .inode = inode, .offset = offset };
+ struct rb_node *n = uprobes_tree.rb_node;
+ struct uprobe *uprobe;
+ int match;
+
+ while (n) {
+ uprobe = rb_entry(n, struct uprobe, rb_node);
+ match = match_uprobe(&u, uprobe);
+ if (!match) {
+ atomic_inc(&uprobe->ref);
+ return uprobe;
+ }
+
+ if (match < 0)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+ return NULL;
+}
+
+/*
+ * Find a uprobe corresponding to a given inode:offset
+ * Acquires uprobes_treelock
+ */
+static struct uprobe *find_uprobe(struct inode *inode, loff_t offset)
+{
+ struct uprobe *uprobe;
+ unsigned long flags;
+
+ spin_lock_irqsave(&uprobes_treelock, flags);
+ uprobe = __find_uprobe(inode, offset);
+ spin_unlock_irqrestore(&uprobes_treelock, flags);
+
+ return uprobe;
+}
+
+static struct uprobe *__insert_uprobe(struct uprobe *uprobe)
+{
+ struct rb_node **p = &uprobes_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct uprobe *u;
+ int match;
+
+ while (*p) {
+ parent = *p;
+ u = rb_entry(parent, struct uprobe, rb_node);
+ match = match_uprobe(uprobe, u);
+ if (!match) {
+ atomic_inc(&u->ref);
+ return u;
+ }
+
+ if (match < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+
+ }
+
+ u = NULL;
+ rb_link_node(&uprobe->rb_node, parent, p);
+ rb_insert_color(&uprobe->rb_node, &uprobes_tree);
+ /* get access + creation ref */
+ atomic_set(&uprobe->ref, 2);
+
+ return u;
+}
+
+/*
+ * Acquire uprobes_treelock.
+ * Matching uprobe already exists in rbtree;
+ * increment (access refcount) and return the matching uprobe.
+ *
+ * No matching uprobe; insert the uprobe in rb_tree;
+ * get a double refcount (access + creation) and return NULL.
+ */
+static struct uprobe *insert_uprobe(struct uprobe *uprobe)
+{
+ unsigned long flags;
+ struct uprobe *u;
+
+ spin_lock_irqsave(&uprobes_treelock, flags);
+ u = __insert_uprobe(uprobe);
+ spin_unlock_irqrestore(&uprobes_treelock, flags);
+
+ return u;
+}
+
+static void put_uprobe(struct uprobe *uprobe)
+{
+ if (atomic_dec_and_test(&uprobe->ref))
+ kfree(uprobe);
+}
+
+static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)
+{
+ struct uprobe *uprobe, *cur_uprobe;
+
+ uprobe = kzalloc(sizeof(struct uprobe), GFP_KERNEL);
+ if (!uprobe)
+ return NULL;
+
+ uprobe->inode = igrab(inode);
+ uprobe->offset = offset;
+ init_rwsem(&uprobe->consumer_rwsem);
+ INIT_LIST_HEAD(&uprobe->pending_list);
+
+ /* add to uprobes_tree, sorted on inode:offset */
+ cur_uprobe = insert_uprobe(uprobe);
+
+ /* a uprobe exists for this inode:offset combination */
+ if (cur_uprobe) {
+ kfree(uprobe);
+ uprobe = cur_uprobe;
+ iput(inode);
+ } else {
+ atomic_inc(&uprobe_events);
+ }
+
+ return uprobe;
+}
+
+/* Returns the previous consumer */
+static struct uprobe_consumer *
+consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
+{
+ down_write(&uprobe->consumer_rwsem);
+ uc->next = uprobe->consumers;
+ uprobe->consumers = uc;
+ up_write(&uprobe->consumer_rwsem);
+
+ return uc->next;
+}
+
+/*
+ * For uprobe @uprobe, delete the consumer @uc.
+ * Return true if the @uc is deleted successfully
+ * or return false.
+ */
+static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc)
+{
+ struct uprobe_consumer **con;
+ bool ret = false;
+
+ down_write(&uprobe->consumer_rwsem);
+ for (con = &uprobe->consumers; *con; con = &(*con)->next) {
+ if (*con == uc) {
+ *con = uc->next;
+ ret = true;
+ break;
+ }
+ }
+ up_write(&uprobe->consumer_rwsem);
+
+ return ret;
+}
+
+static int
+__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn,
+ unsigned long nbytes, unsigned long offset)
+{
+ struct file *filp = vma->vm_file;
+ struct page *page;
+ void *vaddr;
+ unsigned long off1;
+ unsigned long idx;
+
+ if (!filp)
+ return -EINVAL;
+
+ idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT);
+ off1 = offset &= ~PAGE_MASK;
+
+ /*
+ * Ensure that the page that has the original instruction is
+ * populated and in page-cache.
+ */
+ page = read_mapping_page(mapping, idx, filp);
+ if (IS_ERR(page))
+ return PTR_ERR(page);
+
+ vaddr = kmap_atomic(page);
+ memcpy(insn, vaddr + off1, nbytes);
+ kunmap_atomic(vaddr);
+ page_cache_release(page);
+
+ return 0;
+}
+
+static int
+copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr)
+{
+ struct address_space *mapping;
+ unsigned long nbytes;
+ int bytes;
+
+ addr &= ~PAGE_MASK;
+ nbytes = PAGE_SIZE - addr;
+ mapping = uprobe->inode->i_mapping;
+
+ /* Instruction at end of binary; copy only available bytes */
+ if (uprobe->offset + MAX_UINSN_BYTES > uprobe->inode->i_size)
+ bytes = uprobe->inode->i_size - uprobe->offset;
+ else
+ bytes = MAX_UINSN_BYTES;
+
+ /* Instruction at the page-boundary; copy bytes in second page */
+ if (nbytes < bytes) {
+ if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes,
+ bytes - nbytes, uprobe->offset + nbytes))
+ return -ENOMEM;
+
+ bytes = nbytes;
+ }
+ return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset);
+}
+
+static int
+install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm,
+ struct vm_area_struct *vma, loff_t vaddr)
+{
+ unsigned long addr;
+ int ret;
+
+ /*
+ * If probe is being deleted, unregister thread could be done with
+ * the vma-rmap-walk through. Adding a probe now can be fatal since
+ * nobody will be able to cleanup. Also we could be from fork or
+ * mremap path, where the probe might have already been inserted.
+ * Hence behave as if probe already existed.
+ */
+ if (!uprobe->consumers)
+ return -EEXIST;
+
+ addr = (unsigned long)vaddr;
+
+ if (!(uprobe->flags & UPROBE_COPY_INSN)) {
+ ret = copy_insn(uprobe, vma, addr);
+ if (ret)
+ return ret;
+
+ if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn))
+ return -EEXIST;
+
+ ret = arch_uprobes_analyze_insn(&uprobe->arch, mm);
+ if (ret)
+ return ret;
+
+ uprobe->flags |= UPROBE_COPY_INSN;
+ }
+ ret = set_swbp(&uprobe->arch, mm, addr);
+
+ return ret;
+}
+
+static void
+remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr)
+{
+ set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true);
+}
+
+static void delete_uprobe(struct uprobe *uprobe)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&uprobes_treelock, flags);
+ rb_erase(&uprobe->rb_node, &uprobes_tree);
+ spin_unlock_irqrestore(&uprobes_treelock, flags);
+ iput(uprobe->inode);
+ put_uprobe(uprobe);
+ atomic_dec(&uprobe_events);
+}
+
+static struct vma_info *
+__find_next_vma_info(struct address_space *mapping, struct list_head *head,
+ struct vma_info *vi, loff_t offset, bool is_register)
+{
+ struct prio_tree_iter iter;
+ struct vm_area_struct *vma;
+ struct vma_info *tmpvi;
+ unsigned long pgoff;
+ int existing_vma;
+ loff_t vaddr;
+
+ pgoff = offset >> PAGE_SHIFT;
+
+ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ if (!valid_vma(vma, is_register))
+ continue;
+
+ existing_vma = 0;
+ vaddr = vma_address(vma, offset);
+
+ list_for_each_entry(tmpvi, head, probe_list) {
+ if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) {
+ existing_vma = 1;
+ break;
+ }
+ }
+
+ /*
+ * Another vma needs a probe to be installed. However skip
+ * installing the probe if the vma is about to be unlinked.
+ */
+ if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) {
+ vi->mm = vma->vm_mm;
+ vi->vaddr = vaddr;
+ list_add(&vi->probe_list, head);
+
+ return vi;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Iterate in the rmap prio tree and find a vma where a probe has not
+ * yet been inserted.
+ */
+static struct vma_info *
+find_next_vma_info(struct address_space *mapping, struct list_head *head,
+ loff_t offset, bool is_register)
+{
+ struct vma_info *vi, *retvi;
+
+ vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL);
+ if (!vi)
+ return ERR_PTR(-ENOMEM);
+
+ mutex_lock(&mapping->i_mmap_mutex);
+ retvi = __find_next_vma_info(mapping, head, vi, offset, is_register);
+ mutex_unlock(&mapping->i_mmap_mutex);
+
+ if (!retvi)
+ kfree(vi);
+
+ return retvi;
+}
+
+static int register_for_each_vma(struct uprobe *uprobe, bool is_register)
+{
+ struct list_head try_list;
+ struct vm_area_struct *vma;
+ struct address_space *mapping;
+ struct vma_info *vi, *tmpvi;
+ struct mm_struct *mm;
+ loff_t vaddr;
+ int ret;
+
+ mapping = uprobe->inode->i_mapping;
+ INIT_LIST_HEAD(&try_list);
+
+ ret = 0;
+
+ for (;;) {
+ vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register);
+ if (!vi)
+ break;
+
+ if (IS_ERR(vi)) {
+ ret = PTR_ERR(vi);
+ break;
+ }
+
+ mm = vi->mm;
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, (unsigned long)vi->vaddr);
+ if (!vma || !valid_vma(vma, is_register)) {
+ list_del(&vi->probe_list);
+ kfree(vi);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ continue;
+ }
+ vaddr = vma_address(vma, uprobe->offset);
+ if (vma->vm_file->f_mapping->host != uprobe->inode ||
+ vaddr != vi->vaddr) {
+ list_del(&vi->probe_list);
+ kfree(vi);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ continue;
+ }
+
+ if (is_register)
+ ret = install_breakpoint(uprobe, mm, vma, vi->vaddr);
+ else
+ remove_breakpoint(uprobe, mm, vi->vaddr);
+
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ if (is_register) {
+ if (ret && ret == -EEXIST)
+ ret = 0;
+ if (ret)
+ break;
+ }
+ }
+
+ list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) {
+ list_del(&vi->probe_list);
+ kfree(vi);
+ }
+
+ return ret;
+}
+
+static int __uprobe_register(struct uprobe *uprobe)
+{
+ return register_for_each_vma(uprobe, true);
+}
+
+static void __uprobe_unregister(struct uprobe *uprobe)
+{
+ if (!register_for_each_vma(uprobe, false))
+ delete_uprobe(uprobe);
+
+ /* TODO : cant unregister? schedule a worker thread */
+}
+
+/*
+ * uprobe_register - register a probe
+ * @inode: the file in which the probe has to be placed.
+ * @offset: offset from the start of the file.
+ * @uc: information on howto handle the probe..
+ *
+ * Apart from the access refcount, uprobe_register() takes a creation
+ * refcount (thro alloc_uprobe) if and only if this @uprobe is getting
+ * inserted into the rbtree (i.e first consumer for a @inode:@offset
+ * tuple). Creation refcount stops uprobe_unregister from freeing the
+ * @uprobe even before the register operation is complete. Creation
+ * refcount is released when the last @uc for the @uprobe
+ * unregisters.
+ *
+ * Return errno if it cannot successully install probes
+ * else return 0 (success)
+ */
+int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
+{
+ struct uprobe *uprobe;
+ int ret;
+
+ if (!inode || !uc || uc->next)
+ return -EINVAL;
+
+ if (offset > i_size_read(inode))
+ return -EINVAL;
+
+ ret = 0;
+ mutex_lock(uprobes_hash(inode));
+ uprobe = alloc_uprobe(inode, offset);
+
+ if (uprobe && !consumer_add(uprobe, uc)) {
+ ret = __uprobe_register(uprobe);
+ if (ret) {
+ uprobe->consumers = NULL;
+ __uprobe_unregister(uprobe);
+ } else {
+ uprobe->flags |= UPROBE_RUN_HANDLER;
+ }
+ }
+
+ mutex_unlock(uprobes_hash(inode));
+ put_uprobe(uprobe);
+
+ return ret;
+}
+
+/*
+ * uprobe_unregister - unregister a already registered probe.
+ * @inode: the file in which the probe has to be removed.
+ * @offset: offset from the start of the file.
+ * @uc: identify which probe if multiple probes are colocated.
+ */
+void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consumer *uc)
+{
+ struct uprobe *uprobe;
+
+ if (!inode || !uc)
+ return;
+
+ uprobe = find_uprobe(inode, offset);
+ if (!uprobe)
+ return;
+
+ mutex_lock(uprobes_hash(inode));
+
+ if (consumer_del(uprobe, uc)) {
+ if (!uprobe->consumers) {
+ __uprobe_unregister(uprobe);
+ uprobe->flags &= ~UPROBE_RUN_HANDLER;
+ }
+ }
+
+ mutex_unlock(uprobes_hash(inode));
+ if (uprobe)
+ put_uprobe(uprobe);
+}
+
+/*
+ * Of all the nodes that correspond to the given inode, return the node
+ * with the least offset.
+ */
+static struct rb_node *find_least_offset_node(struct inode *inode)
+{
+ struct uprobe u = { .inode = inode, .offset = 0};
+ struct rb_node *n = uprobes_tree.rb_node;
+ struct rb_node *close_node = NULL;
+ struct uprobe *uprobe;
+ int match;
+
+ while (n) {
+ uprobe = rb_entry(n, struct uprobe, rb_node);
+ match = match_uprobe(&u, uprobe);
+
+ if (uprobe->inode == inode)
+ close_node = n;
+
+ if (!match)
+ return close_node;
+
+ if (match < 0)
+ n = n->rb_left;
+ else
+ n = n->rb_right;
+ }
+
+ return close_node;
+}
+
+/*
+ * For a given inode, build a list of probes that need to be inserted.
+ */
+static void build_probe_list(struct inode *inode, struct list_head *head)
+{
+ struct uprobe *uprobe;
+ unsigned long flags;
+ struct rb_node *n;
+
+ spin_lock_irqsave(&uprobes_treelock, flags);
+
+ n = find_least_offset_node(inode);
+
+ for (; n; n = rb_next(n)) {
+ uprobe = rb_entry(n, struct uprobe, rb_node);
+ if (uprobe->inode != inode)
+ break;
+
+ list_add(&uprobe->pending_list, head);
+ atomic_inc(&uprobe->ref);
+ }
+
+ spin_unlock_irqrestore(&uprobes_treelock, flags);
+}
+
+/*
+ * Called from mmap_region.
+ * called with mm->mmap_sem acquired.
+ *
+ * Return -ve no if we fail to insert probes and we cannot
+ * bail-out.
+ * Return 0 otherwise. i.e:
+ *
+ * - successful insertion of probes
+ * - (or) no possible probes to be inserted.
+ * - (or) insertion of probes failed but we can bail-out.
+ */
+int uprobe_mmap(struct vm_area_struct *vma)
+{
+ struct list_head tmp_list;
+ struct uprobe *uprobe, *u;
+ struct inode *inode;
+ int ret;
+
+ if (!atomic_read(&uprobe_events) || !valid_vma(vma, true))
+ return 0;
+
+ inode = vma->vm_file->f_mapping->host;
+ if (!inode)
+ return 0;
+
+ INIT_LIST_HEAD(&tmp_list);
+ mutex_lock(uprobes_mmap_hash(inode));
+ build_probe_list(inode, &tmp_list);
+
+ ret = 0;
+
+ list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) {
+ loff_t vaddr;
+
+ list_del(&uprobe->pending_list);
+ if (!ret) {
+ vaddr = vma_address(vma, uprobe->offset);
+ if (vaddr >= vma->vm_start && vaddr < vma->vm_end) {
+ ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr);
+ /* Ignore double add: */
+ if (ret == -EEXIST)
+ ret = 0;
+ }
+ }
+ put_uprobe(uprobe);
+ }
+
+ mutex_unlock(uprobes_mmap_hash(inode));
+
+ return ret;
+}
+
+static int __init init_uprobes(void)
+{
+ int i;
+
+ for (i = 0; i < UPROBES_HASH_SZ; i++) {
+ mutex_init(&uprobes_mutex[i]);
+ mutex_init(&uprobes_mmap_mutex[i]);
+ }
+ return 0;
+}
+
+static void __exit exit_uprobes(void)
+{
+}
+
+module_init(init_uprobes);
+module_exit(exit_uprobes);
diff --git a/kernel/signal.c b/kernel/signal.c
index c73c4284160e..8511e39813c7 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1054,13 +1054,13 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
struct sigpending *pending;
struct sigqueue *q;
int override_rlimit;
-
- trace_signal_generate(sig, info, t);
+ int ret = 0, result;
assert_spin_locked(&t->sighand->siglock);
+ result = TRACE_SIGNAL_IGNORED;
if (!prepare_signal(sig, t, from_ancestor_ns))
- return 0;
+ goto ret;
pending = group ? &t->signal->shared_pending : &t->pending;
/*
@@ -1068,8 +1068,11 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
* exactly one non-rt signal, so that we can get more
* detailed information about the cause of the signal.
*/
+ result = TRACE_SIGNAL_ALREADY_PENDING;
if (legacy_queue(pending, sig))
- return 0;
+ goto ret;
+
+ result = TRACE_SIGNAL_DELIVERED;
/*
* fast-pathed signals for kernel-internal things like SIGSTOP
* or SIGKILL.
@@ -1127,14 +1130,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
* signal was rt and sent by user using something
* other than kill().
*/
- trace_signal_overflow_fail(sig, group, info);
- return -EAGAIN;
+ result = TRACE_SIGNAL_OVERFLOW_FAIL;
+ ret = -EAGAIN;
+ goto ret;
} else {
/*
* This is a silent loss of information. We still
* send the signal, but the *info bits are lost.
*/
- trace_signal_lose_info(sig, group, info);
+ result = TRACE_SIGNAL_LOSE_INFO;
}
}
@@ -1142,7 +1146,9 @@ out_set:
signalfd_notify(t, sig);
sigaddset(&pending->signal, sig);
complete_signal(sig, t, group);
- return 0;
+ret:
+ trace_signal_generate(sig, info, t, group, result);
+ return ret;
}
static int send_signal(int sig, struct siginfo *info, struct task_struct *t,
@@ -1585,7 +1591,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
int sig = q->info.si_signo;
struct sigpending *pending;
unsigned long flags;
- int ret;
+ int ret, result;
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
@@ -1594,6 +1600,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
goto ret;
ret = 1; /* the signal is ignored */
+ result = TRACE_SIGNAL_IGNORED;
if (!prepare_signal(sig, t, 0))
goto out;
@@ -1605,6 +1612,7 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
*/
BUG_ON(q->info.si_code != SI_TIMER);
q->info.si_overrun++;
+ result = TRACE_SIGNAL_ALREADY_PENDING;
goto out;
}
q->info.si_overrun = 0;
@@ -1614,7 +1622,9 @@ int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
list_add_tail(&q->list, &pending->list);
sigaddset(&pending->signal, sig);
complete_signal(sig, t, group);
+ result = TRACE_SIGNAL_DELIVERED;
out:
+ trace_signal_generate(sig, &q->info, t, group, result);
unlock_task_sighand(t, &flags);
ret:
return ret;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index d117262deba3..14bc092fb12c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -3,12 +3,9 @@
*
* started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
*
- * this code detects hard lockups: incidents in where on a CPU
- * the kernel does not respond to anything except NMI.
- *
- * Note: Most of this code is borrowed heavily from softlockup.c,
- * so thanks to Ingo for the initial implementation.
- * Some chunks also taken from arch/x86/kernel/apic/nmi.c, thanks
+ * Note: Most of this code is borrowed heavily from the original softlockup
+ * detector, so thanks to Ingo for the initial implementation.
+ * Some chunks also taken from the old x86-specific nmi watchdog code, thanks
* to those contributors as well.
*/
@@ -117,9 +114,10 @@ static unsigned long get_sample_period(void)
{
/*
* convert watchdog_thresh from seconds to ns
- * the divide by 5 is to give hrtimer 5 chances to
- * increment before the hardlockup detector generates
- * a warning
+ * the divide by 5 is to give hrtimer several chances (two
+ * or three with the current relation between the soft
+ * and hard thresholds) to increment before the
+ * hardlockup detector generates a warning
*/
return get_softlockup_thresh() * (NSEC_PER_SEC / 5);
}
@@ -336,9 +334,11 @@ static int watchdog(void *unused)
set_current_state(TASK_INTERRUPTIBLE);
/*
- * Run briefly once per second to reset the softlockup timestamp.
- * If this gets delayed for more than 60 seconds then the
- * debug-printout triggers in watchdog_timer_fn().
+ * Run briefly (kicked by the hrtimer callback function) once every
+ * get_sample_period() seconds (4 seconds by default) to reset the
+ * softlockup timestamp. If this gets delayed for more than
+ * 2*watchdog_thresh seconds then the debug-printout triggers in
+ * watchdog_timer_fn().
*/
while (!kthread_should_stop()) {
__touch_watchdog();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8745ac7d1f75..9739c0b45e93 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -166,18 +166,21 @@ config LOCKUP_DETECTOR
hard and soft lockups.
Softlockups are bugs that cause the kernel to loop in kernel
- mode for more than 60 seconds, without giving other tasks a
+ mode for more than 20 seconds, without giving other tasks a
chance to run. The current stack trace is displayed upon
detection and the system will stay locked up.
Hardlockups are bugs that cause the CPU to loop in kernel mode
- for more than 60 seconds, without letting other interrupts have a
+ for more than 10 seconds, without letting other interrupts have a
chance to run. The current stack trace is displayed upon detection
and the system will stay locked up.
The overhead should be minimal. A periodic hrtimer runs to
- generate interrupts and kick the watchdog task every 10-12 seconds.
- An NMI is generated every 60 seconds or so to check for hardlockups.
+ generate interrupts and kick the watchdog task every 4 seconds.
+ An NMI is generated every 10 seconds or so to check for hardlockups.
+
+ The frequency of hrtimer and NMI events and the soft and hard lockup
+ thresholds can be controlled through the sysctl watchdog_thresh.
config HARDLOCKUP_DETECTOR
def_bool LOCKUP_DETECTOR && PERF_EVENTS && HAVE_PERF_EVENTS_NMI && \
@@ -189,7 +192,8 @@ config BOOTPARAM_HARDLOCKUP_PANIC
help
Say Y here to enable the kernel to panic on "hard lockups",
which are bugs that cause the kernel to loop in kernel
- mode with interrupts disabled for more than 60 seconds.
+ mode with interrupts disabled for more than 10 seconds (configurable
+ using the watchdog_thresh sysctl).
Say N if unsure.
@@ -206,8 +210,8 @@ config BOOTPARAM_SOFTLOCKUP_PANIC
help
Say Y here to enable the kernel to panic on "soft lockups",
which are bugs that cause the kernel to loop in kernel
- mode for more than 60 seconds, without giving other tasks a
- chance to run.
+ mode for more than 20 seconds (configurable using the watchdog_thresh
+ sysctl), without giving other tasks a chance to run.
The panic can be used in combination with panic_timeout,
to cause the system to reboot automatically after a
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f758c7f4c81..5a863d328a44 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -30,6 +30,7 @@
#include <linux/perf_event.h>
#include <linux/audit.h>
#include <linux/khugepaged.h>
+#include <linux/uprobes.h>
#include <asm/uaccess.h>
#include <asm/cacheflush.h>
@@ -616,6 +617,13 @@ again: remove_next = 1 + (end > next->vm_end);
if (mapping)
mutex_unlock(&mapping->i_mmap_mutex);
+ if (root) {
+ uprobe_mmap(vma);
+
+ if (adjust_next)
+ uprobe_mmap(next);
+ }
+
if (remove_next) {
if (file) {
fput(file);
@@ -637,6 +645,8 @@ again: remove_next = 1 + (end > next->vm_end);
goto again;
}
}
+ if (insert && file)
+ uprobe_mmap(insert);
validate_mm(mm);
@@ -1329,6 +1339,11 @@ out:
mm->locked_vm += (len >> PAGE_SHIFT);
} else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
make_pages_present(addr, addr + len);
+
+ if (file && uprobe_mmap(vma))
+ /* matching probes but cannot insert */
+ goto unmap_and_free_vma;
+
return addr;
unmap_and_free_vma:
@@ -2285,6 +2300,10 @@ int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
if ((vma->vm_flags & VM_ACCOUNT) &&
security_vm_enough_memory_mm(mm, vma_pages(vma)))
return -ENOMEM;
+
+ if (vma->vm_file && uprobe_mmap(vma))
+ return -EINVAL;
+
vma_link(mm, vma, prev, rb_link, rb_parent);
return 0;
}
@@ -2354,6 +2373,10 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
new_vma->vm_pgoff = pgoff;
if (new_vma->vm_file) {
get_file(new_vma->vm_file);
+
+ if (uprobe_mmap(new_vma))
+ goto out_free_mempol;
+
if (vma->vm_flags & VM_EXECUTABLE)
added_exe_file_vma(mm);
}
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 4626a398836a..ca600e09c8d4 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,3 +1,10 @@
+OUTPUT := ./
+ifeq ("$(origin O)", "command line")
+ ifneq ($(O),)
+ OUTPUT := $(O)/
+ endif
+endif
+
MAN1_TXT= \
$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
$(wildcard perf-*.txt)) \
@@ -6,10 +13,11 @@ MAN5_TXT=
MAN7_TXT=
MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
-MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
-MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
-DOC_HTML=$(MAN_HTML)
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
ARTICLES =
# with their own formatting rules.
@@ -18,11 +26,17 @@ API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technica
SP_ARTICLES += $(API_DOCS)
SP_ARTICLES += technical/api-index
-DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+_DOC_HTML = $(_MAN_HTML)
+_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
-DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
-DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
-DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
+DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
+DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
# Make the path relative to DESTDIR, not prefix
ifndef DESTDIR
@@ -150,9 +164,9 @@ man1: $(DOC_MAN1)
man5: $(DOC_MAN5)
man7: $(DOC_MAN7)
-info: perf.info perfman.info
+info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
-pdf: user-manual.pdf
+pdf: $(OUTPUT)user-manual.pdf
install: install-man
@@ -166,7 +180,7 @@ install-man: man
install-info: info
$(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
- $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir)
+ $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
if test -r $(DESTDIR)$(infodir)/dir; then \
$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
$(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
@@ -176,7 +190,7 @@ install-info: info
install-pdf: pdf
$(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
- $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir)
+ $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
#install-html: html
# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
@@ -189,14 +203,14 @@ install-pdf: pdf
#
# Determine "include::" file references in asciidoc files.
#
-doc.dep : $(wildcard *.txt) build-docdep.perl
+$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
$(QUIET_GEN)$(RM) $@+ $@ && \
$(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
mv $@+ $@
--include doc.dep
+-include $(OUPTUT)doc.dep
-cmds_txt = cmds-ancillaryinterrogators.txt \
+_cmds_txt = cmds-ancillaryinterrogators.txt \
cmds-ancillarymanipulators.txt \
cmds-mainporcelain.txt \
cmds-plumbinginterrogators.txt \
@@ -205,32 +219,36 @@ cmds_txt = cmds-ancillaryinterrogators.txt \
cmds-synchelpers.txt \
cmds-purehelpers.txt \
cmds-foreignscminterface.txt
+cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
-$(cmds_txt): cmd-list.made
+$(cmds_txt): $(OUTPUT)cmd-list.made
-cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
$(QUIET_GEN)$(RM) $@ && \
$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
date >$@
clean:
- $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7
- $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info
- $(RM) howto-index.txt howto/*.html doc.dep
- $(RM) technical/api-*.html technical/api-index.txt
- $(RM) $(cmds_txt) *.made
-
-$(MAN_HTML): %.html : %.txt
+ $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML))
+ $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML))
+ $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)
+ $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++
+ $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info
+ $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep
+ $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
+ $(RM) $(cmds_txt) $(OUTPUT)*.made
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
mv $@+ $@
-%.1 %.5 %.7 : %.xml
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(QUIET_XMLTO)$(RM) $@ && \
- xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+ xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
-%.xml : %.txt
+$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
$(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
@@ -239,25 +257,25 @@ $(MAN_HTML): %.html : %.txt
XSLT = docbook.xsl
XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
-user-manual.html: user-manual.xml
+$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
$(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
-perf.info: user-manual.texi
- $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi
+$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
+ $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
-user-manual.texi: user-manual.xml
+$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
- $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+ $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
$(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
rm $@++ && \
mv $@+ $@
-user-manual.pdf: user-manual.xml
+$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
$(QUIET_DBLATEX)$(RM) $@+ $@ && \
$(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
mv $@+ $@
-perfman.texi: $(MAN_XML) cat-texi.perl
+$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
$(QUIET_DB2TEXI)$(RM) $@+ $@ && \
($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
--to-stdout $(xml) &&) true) > $@++ && \
@@ -265,7 +283,7 @@ perfman.texi: $(MAN_XML) cat-texi.perl
rm $@++ && \
mv $@+ $@
-perfman.info: perfman.texi
+$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
$(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index d6b2a4f2108b..c7f5f55634ac 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -8,7 +8,7 @@ perf-lock - Analyze lock events
SYNOPSIS
--------
[verse]
-'perf lock' {record|report|trace}
+'perf lock' {record|report|script|info}
DESCRIPTION
-----------
@@ -20,10 +20,13 @@ and statistics with this 'perf lock' command.
produces the file "perf.data" which contains tracing
results of lock events.
- 'perf lock trace' shows raw lock events.
-
'perf lock report' reports statistical data.
+ 'perf lock script' shows raw lock events.
+
+ 'perf lock info' shows metadata like threads or addresses
+ of lock instances.
+
COMMON OPTIONS
--------------
@@ -47,6 +50,17 @@ REPORT OPTIONS
Sorting key. Possible values: acquired (default), contended,
wait_total, wait_max, wait_min.
+INFO OPTIONS
+------------
+
+-t::
+--threads::
+ dump thread list in perf.data
+
+-m::
+--map::
+ dump map of lock instances (address:name table)
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 2937f7e14bb7..a5766b4b0125 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -52,11 +52,15 @@ OPTIONS
-p::
--pid=::
- Record events on existing process ID.
+ Record events on existing process ID (comma separated list).
-t::
--tid=::
- Record events on existing thread ID.
+ Record events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
-r::
--realtime=::
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2f6cef43da25..e9cbfcddfa3f 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,7 @@ OPTIONS
-f::
--fields::
Comma separated list of fields to print. Options are:
- comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr.
+ comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -200,6 +200,9 @@ OPTIONS
It currently includes: cpu and numa topology of the host system.
It can only be used with the perf script report mode.
+--show-kernel-path::
+ Try to resolve the path of [kernel.kallsyms]
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 8966b9ab2014..2fa173b51970 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -35,11 +35,11 @@ OPTIONS
child tasks do not inherit counters
-p::
--pid=<pid>::
- stat events on existing process id
+ stat events on existing process id (comma separated list)
-t::
--tid=<tid>::
- stat events on existing thread id
+ stat events on existing thread id (comma separated list)
-a::
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index b1a5bbbfebef..4a5680cb242e 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -72,11 +72,15 @@ Default is to monitor all CPUS.
-p <pid>::
--pid=<pid>::
- Profile events on existing Process ID.
+ Profile events on existing Process ID (comma separated list).
-t <tid>::
--tid=<tid>::
- Profile events on existing thread ID.
+ Profile events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
-r <priority>::
--realtime=<priority>::
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 1078c5fadd5b..5476bc0a1eac 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -9,6 +9,7 @@ lib/rbtree.c
include/linux/swab.h
arch/*/include/asm/unistd*.h
arch/*/lib/memcpy*.S
+arch/*/lib/memset*.S
include/linux/poison.h
include/linux/magic.h
include/linux/hw_breakpoint.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 7c12650165ae..e011b5060f92 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -61,7 +61,7 @@ ifeq ($(ARCH),x86_64)
ifeq (${IS_X86_64}, 1)
RAW_ARCH := x86_64
ARCH_CFLAGS := -DARCH_X86_64
- ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S
+ ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
endif
endif
@@ -183,7 +183,10 @@ SCRIPT_SH += perf-archive.sh
grep-libs = $(filter -l%,$(1))
strip-libs = $(filter-out -l%,$(1))
-$(OUTPUT)python/perf.so: $(PYRF_OBJS)
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
+
+$(OUTPUT)python/perf.so: $(PYRF_OBJS) $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
$(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
--quiet build_ext; \
mkdir -p $(OUTPUT)python && \
@@ -256,6 +259,7 @@ LIB_H += util/callchain.h
LIB_H += util/build-id.h
LIB_H += util/debug.h
LIB_H += util/debugfs.h
+LIB_H += util/sysfs.h
LIB_H += util/event.h
LIB_H += util/evsel.h
LIB_H += util/evlist.h
@@ -302,6 +306,7 @@ LIB_OBJS += $(OUTPUT)util/build-id.o
LIB_OBJS += $(OUTPUT)util/config.o
LIB_OBJS += $(OUTPUT)util/ctype.o
LIB_OBJS += $(OUTPUT)util/debugfs.o
+LIB_OBJS += $(OUTPUT)util/sysfs.o
LIB_OBJS += $(OUTPUT)util/environment.o
LIB_OBJS += $(OUTPUT)util/event.o
LIB_OBJS += $(OUTPUT)util/evlist.o
@@ -359,8 +364,10 @@ BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
ifeq ($(RAW_ARCH),x86_64)
BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
endif
BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
@@ -792,7 +799,6 @@ help:
@echo ' quick-install-html - install the html documentation quickly'
@echo ''
@echo 'Perf maintainer targets:'
- @echo ' distclean - alias to clean'
@echo ' clean - clean all binary objects and build output'
doc:
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index f7781c6267c0..a09bece6dad2 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -4,6 +4,7 @@
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
+extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index d588b87696fc..d66ab799b35f 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -2,3 +2,11 @@
MEMCPY_FN(__memcpy,
"x86-64-unrolled",
"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c,
+ "x86-64-movsq",
+ "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_c_e,
+ "x86-64-movsb",
+ "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
index 185a96d66dd1..fcd9cf00600a 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm.S
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -1,4 +1,8 @@
-
+#define memcpy MEMCPY /* don't hide glibc's memcpy() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemcpy_c globl memcpy_c; memcpy_c
+#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e
#include "../../../arch/x86/lib/memcpy_64.S"
/*
* We need to provide note.GNU-stack section, saying that we want
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index db82021f4b91..71557225bf92 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -5,7 +5,6 @@
*
* Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
*/
-#include <ctype.h>
#include "../perf.h"
#include "../util/util.h"
@@ -24,6 +23,7 @@
static const char *length_str = "1MB";
static const char *routine = "default";
+static int iterations = 1;
static bool use_clock;
static int clock_fd;
static bool only_prefault;
@@ -35,6 +35,8 @@ static const struct option options[] = {
"available unit: B, MB, GB (upper and lower)"),
OPT_STRING('r', "routine", &routine, "default",
"Specify routine to copy"),
+ OPT_INTEGER('i', "iterations", &iterations,
+ "repeat memcpy() invocation this number of times"),
OPT_BOOLEAN('c', "clock", &use_clock,
"Use CPU clock for measuring"),
OPT_BOOLEAN('o', "only-prefault", &only_prefault,
@@ -121,6 +123,7 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
{
u64 clock_start = 0ULL, clock_end = 0ULL;
void *src = NULL, *dst = NULL;
+ int i;
alloc_mem(&src, &dst, len);
@@ -128,7 +131,8 @@ static u64 do_memcpy_clock(memcpy_t fn, size_t len, bool prefault)
fn(dst, src, len);
clock_start = get_clock();
- fn(dst, src, len);
+ for (i = 0; i < iterations; ++i)
+ fn(dst, src, len);
clock_end = get_clock();
free(src);
@@ -140,6 +144,7 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
{
struct timeval tv_start, tv_end, tv_diff;
void *src = NULL, *dst = NULL;
+ int i;
alloc_mem(&src, &dst, len);
@@ -147,7 +152,8 @@ static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
fn(dst, src, len);
BUG_ON(gettimeofday(&tv_start, NULL));
- fn(dst, src, len);
+ for (i = 0; i < iterations; ++i)
+ fn(dst, src, len);
BUG_ON(gettimeofday(&tv_end, NULL));
timersub(&tv_end, &tv_start, &tv_diff);
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644
index 000000000000..a040fa77665b
--- /dev/null
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -0,0 +1,12 @@
+
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc) \
+ extern void *fn(void *, int, size_t);
+
+#include "mem-memset-x86-64-asm-def.h"
+
+#undef MEMSET_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 000000000000..a71dff97c1f5
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,12 @@
+
+MEMSET_FN(__memset,
+ "x86-64-unrolled",
+ "unrolled memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c,
+ "x86-64-stosq",
+ "movsq-based memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_c_e,
+ "x86-64-stosb",
+ "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 000000000000..9e5af89ed13a
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,13 @@
+#define memset MEMSET /* don't hide glibc's memset() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define Lmemset_c globl memset_c; memset_c
+#define Lmemset_c_e globl memset_c_e; memset_c_e
+#include "../../../arch/x86/lib/memset_64.S"
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
new file mode 100644
index 000000000000..e9079185bd72
--- /dev/null
+++ b/tools/perf/bench/mem-memset.c
@@ -0,0 +1,297 @@
+/*
+ * mem-memset.c
+ *
+ * memset: Simple memory set in various ways
+ *
+ * Trivial clone of mem-memcpy.c.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int iterations = 1;
+static bool use_clock;
+static int clock_fd;
+static bool only_prefault;
+static bool no_prefault;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_INTEGER('i', "iterations", &iterations,
+ "repeat memset() invocation this number of times"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_BOOLEAN('o', "only-prefault", &only_prefault,
+ "Show only the result with page faults before memset()"),
+ OPT_BOOLEAN('n', "no-prefault", &no_prefault,
+ "Show only the result without page faults before memset()"),
+ OPT_END()
+};
+
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct routine {
+ const char *name;
+ const char *desc;
+ memset_t fn;
+};
+
+static const struct routine routines[] = {
+ { "default",
+ "Default memset() provided by glibc",
+ memset },
+#ifdef ARCH_X86_64
+
+#define MEMSET_FN(fn, name, desc) { name, desc, fn },
+#include "mem-memset-x86-64-asm-def.h"
+#undef MEMSET_FN
+
+#endif
+
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memset_usage[] = {
+ "perf bench mem memset <options>",
+ NULL
+};
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+ if (clock_fd < 0 && errno == ENOSYS)
+ die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+ else
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+static void alloc_mem(void **dst, size_t length)
+{
+ *dst = zalloc(length);
+ if (!dst)
+ die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memset_clock(memset_t fn, size_t len, bool prefault)
+{
+ u64 clock_start = 0ULL, clock_end = 0ULL;
+ void *dst = NULL;
+ int i;
+
+ alloc_mem(&dst, len);
+
+ if (prefault)
+ fn(dst, -1, len);
+
+ clock_start = get_clock();
+ for (i = 0; i < iterations; ++i)
+ fn(dst, i, len);
+ clock_end = get_clock();
+
+ free(dst);
+ return clock_end - clock_start;
+}
+
+static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
+{
+ struct timeval tv_start, tv_end, tv_diff;
+ void *dst = NULL;
+ int i;
+
+ alloc_mem(&dst, len);
+
+ if (prefault)
+ fn(dst, -1, len);
+
+ BUG_ON(gettimeofday(&tv_start, NULL));
+ for (i = 0; i < iterations; ++i)
+ fn(dst, i, len);
+ BUG_ON(gettimeofday(&tv_end, NULL));
+
+ timersub(&tv_end, &tv_start, &tv_diff);
+
+ free(dst);
+ return (double)((double)len / timeval2double(&tv_diff));
+}
+
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do { \
+ if (x < K) \
+ printf(" %14lf B/Sec", x); \
+ else if (x < K * K) \
+ printf(" %14lfd KB/Sec", x / K); \
+ else if (x < K * K * K) \
+ printf(" %14lf MB/Sec", x / K / K); \
+ else \
+ printf(" %14lf GB/Sec", x / K / K / K); \
+ } while (0)
+
+int bench_mem_memset(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ size_t len;
+ double result_bps[2];
+ u64 result_clock[2];
+
+ argc = parse_options(argc, argv, options,
+ bench_mem_memset_usage, 0);
+
+ if (use_clock)
+ init_clock();
+
+ len = (size_t)perf_atoll((char *)length_str);
+
+ result_clock[0] = result_clock[1] = 0ULL;
+ result_bps[0] = result_bps[1] = 0.0;
+
+ if ((s64)len <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ /* same to without specifying either of prefault and no-prefault */
+ if (only_prefault && no_prefault)
+ only_prefault = no_prefault = false;
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("# Copying %s Bytes ...\n\n", length_str);
+
+ if (!only_prefault && !no_prefault) {
+ /* show both of results */
+ if (use_clock) {
+ result_clock[0] =
+ do_memset_clock(routines[i].fn, len, false);
+ result_clock[1] =
+ do_memset_clock(routines[i].fn, len, true);
+ } else {
+ result_bps[0] =
+ do_memset_gettimeofday(routines[i].fn,
+ len, false);
+ result_bps[1] =
+ do_memset_gettimeofday(routines[i].fn,
+ len, true);
+ }
+ } else {
+ if (use_clock) {
+ result_clock[pf] =
+ do_memset_clock(routines[i].fn,
+ len, only_prefault);
+ } else {
+ result_bps[pf] =
+ do_memset_gettimeofday(routines[i].fn,
+ len, only_prefault);
+ }
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (!only_prefault && !no_prefault) {
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)result_clock[0]
+ / (double)len);
+ printf(" %14lf Clock/Byte (with prefault)\n ",
+ (double)result_clock[1]
+ / (double)len);
+ } else {
+ print_bps(result_bps[0]);
+ printf("\n");
+ print_bps(result_bps[1]);
+ printf(" (with prefault)\n");
+ }
+ } else {
+ if (use_clock) {
+ printf(" %14lf Clock/Byte",
+ (double)result_clock[pf]
+ / (double)len);
+ } else
+ print_bps(result_bps[pf]);
+
+ printf("%s\n", only_prefault ? " (with prefault)" : "");
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (!only_prefault && !no_prefault) {
+ if (use_clock) {
+ printf("%lf %lf\n",
+ (double)result_clock[0] / (double)len,
+ (double)result_clock[1] / (double)len);
+ } else {
+ printf("%lf %lf\n",
+ result_bps[0], result_bps[1]);
+ }
+ } else {
+ if (use_clock) {
+ printf("%lf\n", (double)result_clock[pf]
+ / (double)len);
+ } else
+ printf("%lf\n", result_bps[pf]);
+ }
+ break;
+ default:
+ /* reaching this means there's some disaster: */
+ die("unknown format: %d\n", bench_format);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index fcb96269852a..b0e74ab2d7a2 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -52,6 +52,9 @@ static struct bench_suite mem_suites[] = {
{ "memcpy",
"Simple memory copy in various ways",
bench_mem_memcpy },
+ { "memset",
+ "Simple memory set in various ways",
+ bench_mem_memset },
suite_all,
{ NULL,
NULL,
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 2296c391d0f5..12c814838993 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -922,12 +922,12 @@ static const struct option info_options[] = {
OPT_BOOLEAN('t', "threads", &info_threads,
"dump thread list in perf.data"),
OPT_BOOLEAN('m', "map", &info_map,
- "map of lock instances (name:address table)"),
+ "map of lock instances (address:name table)"),
OPT_END()
};
static const char * const lock_usage[] = {
- "perf lock [<options>] {record|trace|report}",
+ "perf lock [<options>] {record|report|script|info}",
NULL
};
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index fb8566181f27..4935c09dd5b5 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -58,7 +58,7 @@ static struct {
struct perf_probe_event events[MAX_PROBES];
struct strlist *dellist;
struct line_range line_range;
- const char *target_module;
+ const char *target;
int max_probe_points;
struct strfilter *filter;
} params;
@@ -246,7 +246,7 @@ static const struct option options[] = {
"file", "vmlinux pathname"),
OPT_STRING('s', "source", &symbol_conf.source_prefix,
"directory", "path to kernel source"),
- OPT_STRING('m', "module", &params.target_module,
+ OPT_STRING('m', "module", &params.target,
"modname|path",
"target module name (for online) or path (for offline)"),
#endif
@@ -333,7 +333,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
if (!params.filter)
params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
NULL);
- ret = show_available_funcs(params.target_module,
+ ret = show_available_funcs(params.target,
params.filter);
strfilter__delete(params.filter);
if (ret < 0)
@@ -354,7 +354,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
usage_with_options(probe_usage, options);
}
- ret = show_line_range(&params.line_range, params.target_module);
+ ret = show_line_range(&params.line_range, params.target);
if (ret < 0)
pr_err(" Error: Failed to show lines. (%d)\n", ret);
return ret;
@@ -371,7 +371,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
ret = show_available_vars(params.events, params.nevents,
params.max_probe_points,
- params.target_module,
+ params.target,
params.filter,
params.show_ext_vars);
strfilter__delete(params.filter);
@@ -393,7 +393,7 @@ int cmd_probe(int argc, const char **argv, const char *prefix __used)
if (params.nevents) {
ret = add_perf_probe_events(params.events, params.nevents,
params.max_probe_points,
- params.target_module,
+ params.target,
params.force_add);
if (ret < 0) {
pr_err(" Error: Failed to add events. (%d)\n", ret);
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0abfb18b911f..75d230fef202 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -44,6 +44,7 @@ struct perf_record {
struct perf_evlist *evlist;
struct perf_session *session;
const char *progname;
+ const char *uid_str;
int output;
unsigned int page_size;
int realtime_prio;
@@ -204,8 +205,11 @@ static void perf_record__open(struct perf_record *rec)
if (opts->group && pos != first)
group_fd = first->fd;
+fallback_missing_features:
+ if (opts->exclude_guest_missing)
+ attr->exclude_guest = attr->exclude_host = 0;
retry_sample_id:
- attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
+ attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
try_again:
if (perf_evsel__open(pos, evlist->cpus, evlist->threads,
opts->group, group_fd) < 0) {
@@ -217,15 +221,23 @@ try_again:
} else if (err == ENODEV && opts->cpu_list) {
die("No such device - did you specify"
" an out-of-range profile CPU?\n");
- } else if (err == EINVAL && opts->sample_id_all_avail) {
- /*
- * Old kernel, no attr->sample_id_type_all field
- */
- opts->sample_id_all_avail = false;
- if (!opts->sample_time && !opts->raw_samples && !time_needed)
- attr->sample_type &= ~PERF_SAMPLE_TIME;
-
- goto retry_sample_id;
+ } else if (err == EINVAL) {
+ if (!opts->exclude_guest_missing &&
+ (attr->exclude_guest || attr->exclude_host)) {
+ pr_debug("Old kernel, cannot exclude "
+ "guest or host samples.\n");
+ opts->exclude_guest_missing = true;
+ goto fallback_missing_features;
+ } else if (!opts->sample_id_all_missing) {
+ /*
+ * Old kernel, no attr->sample_id_type_all field
+ */
+ opts->sample_id_all_missing = true;
+ if (!opts->sample_time && !opts->raw_samples && !time_needed)
+ attr->sample_type &= ~PERF_SAMPLE_TIME;
+
+ goto retry_sample_id;
+ }
}
/*
@@ -385,7 +397,7 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
{
struct stat st;
int flags;
- int err, output;
+ int err, output, feat;
unsigned long waking = 0;
const bool forks = argc > 0;
struct machine *machine;
@@ -452,8 +464,14 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
rec->session = session;
- if (!rec->no_buildid)
- perf_header__set_feat(&session->header, HEADER_BUILD_ID);
+ for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+ perf_header__set_feat(&session->header, feat);
+
+ if (rec->no_buildid)
+ perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+
+ if (!have_tracepoints(&evsel_list->entries))
+ perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
if (!rec->file_new) {
err = perf_session__read_header(session, output);
@@ -461,22 +479,6 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
goto out_delete_session;
}
- if (have_tracepoints(&evsel_list->entries))
- perf_header__set_feat(&session->header, HEADER_TRACE_INFO);
-
- perf_header__set_feat(&session->header, HEADER_HOSTNAME);
- perf_header__set_feat(&session->header, HEADER_OSRELEASE);
- perf_header__set_feat(&session->header, HEADER_ARCH);
- perf_header__set_feat(&session->header, HEADER_CPUDESC);
- perf_header__set_feat(&session->header, HEADER_NRCPUS);
- perf_header__set_feat(&session->header, HEADER_EVENT_DESC);
- perf_header__set_feat(&session->header, HEADER_CMDLINE);
- perf_header__set_feat(&session->header, HEADER_VERSION);
- perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
- perf_header__set_feat(&session->header, HEADER_TOTAL_MEM);
- perf_header__set_feat(&session->header, HEADER_NUMA_TOPOLOGY);
- perf_header__set_feat(&session->header, HEADER_CPUID);
-
if (forks) {
err = perf_evlist__prepare_workload(evsel_list, opts, argv);
if (err < 0) {
@@ -503,9 +505,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
return err;
}
- if (!!rec->no_buildid
+ if (!rec->no_buildid
&& !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
- pr_err("Couldn't generating buildids. "
+ pr_err("Couldn't generate buildids. "
"Use --no-buildid to profile anyway.\n");
return -1;
}
@@ -654,13 +656,10 @@ static const char * const record_usage[] = {
*/
static struct perf_record record = {
.opts = {
- .target_pid = -1,
- .target_tid = -1,
.mmap_pages = UINT_MAX,
.user_freq = UINT_MAX,
.user_interval = ULLONG_MAX,
.freq = 1000,
- .sample_id_all_avail = true,
},
.write_mode = WRITE_FORCE,
.file_new = true,
@@ -679,9 +678,9 @@ const struct option record_options[] = {
parse_events_option),
OPT_CALLBACK(0, "filter", &record.evlist, "filter",
"event filter", parse_filter),
- OPT_INTEGER('p', "pid", &record.opts.target_pid,
+ OPT_STRING('p', "pid", &record.opts.target_pid, "pid",
"record events on existing process id"),
- OPT_INTEGER('t', "tid", &record.opts.target_tid,
+ OPT_STRING('t', "tid", &record.opts.target_tid, "tid",
"record events on existing thread id"),
OPT_INTEGER('r', "realtime", &record.realtime_prio,
"collect data with this RT SCHED_FIFO priority"),
@@ -727,6 +726,7 @@ const struct option record_options[] = {
OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
"monitor event in cgroup name only",
parse_cgroups),
+ OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
OPT_END()
};
@@ -747,8 +747,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
- if (!argc && rec->opts.target_pid == -1 && rec->opts.target_tid == -1 &&
- !rec->opts.system_wide && !rec->opts.cpu_list)
+ if (!argc && !rec->opts.target_pid && !rec->opts.target_tid &&
+ !rec->opts.system_wide && !rec->opts.cpu_list && !rec->uid_str)
usage_with_options(record_usage, record_options);
if (rec->force && rec->append_file) {
@@ -788,11 +788,17 @@ int cmd_record(int argc, const char **argv, const char *prefix __used)
goto out_symbol_exit;
}
- if (rec->opts.target_pid != -1)
+ rec->opts.uid = parse_target_uid(rec->uid_str, rec->opts.target_tid,
+ rec->opts.target_pid);
+ if (rec->uid_str != NULL && rec->opts.uid == UINT_MAX - 1)
+ goto out_free_fd;
+
+ if (rec->opts.target_pid)
rec->opts.target_tid = rec->opts.target_pid;
if (perf_evlist__create_maps(evsel_list, rec->opts.target_pid,
- rec->opts.target_tid, rec->opts.cpu_list) < 0)
+ rec->opts.target_tid, rec->opts.uid,
+ rec->opts.cpu_list) < 0)
usage_with_options(record_usage, record_options);
list_for_each_entry(pos, &evsel_list->entries, node) {
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bb68ddf257b7..d4ce733b9eba 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -40,6 +40,7 @@ enum perf_output_field {
PERF_OUTPUT_SYM = 1U << 8,
PERF_OUTPUT_DSO = 1U << 9,
PERF_OUTPUT_ADDR = 1U << 10,
+ PERF_OUTPUT_SYMOFFSET = 1U << 11,
};
struct output_option {
@@ -57,6 +58,7 @@ struct output_option {
{.str = "sym", .field = PERF_OUTPUT_SYM},
{.str = "dso", .field = PERF_OUTPUT_DSO},
{.str = "addr", .field = PERF_OUTPUT_ADDR},
+ {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
};
/* default set to maintain compatibility with current format */
@@ -193,6 +195,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
"to symbols.\n");
return -EINVAL;
}
+ if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) {
+ pr_err("Display of offsets requested but symbol is not"
+ "selected.\n");
+ return -EINVAL;
+ }
if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
pr_err("Display of DSO requested but neither sample IP nor "
"sample address\nis selected. Hence, no addresses to convert "
@@ -300,10 +307,17 @@ static void print_sample_start(struct perf_sample *sample,
} else
evname = __event_name(attr->type, attr->config);
- printf("%s: ", evname ? evname : "(unknown)");
+ printf("%s: ", evname ? evname : "[unknown]");
}
}
+static bool is_bts_event(struct perf_event_attr *attr)
+{
+ return ((attr->type == PERF_TYPE_HARDWARE) &&
+ (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+ (attr->sample_period == 1));
+}
+
static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
{
if ((attr->type == PERF_TYPE_SOFTWARE) &&
@@ -312,6 +326,9 @@ static bool sample_addr_correlates_sym(struct perf_event_attr *attr)
(attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)))
return true;
+ if (is_bts_event(attr))
+ return true;
+
return false;
}
@@ -323,7 +340,6 @@ static void print_sample_addr(union perf_event *event,
{
struct addr_location al;
u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- const char *symname, *dsoname;
printf("%16" PRIx64, sample->addr);
@@ -343,22 +359,46 @@ static void print_sample_addr(union perf_event *event,
al.sym = map__find_symbol(al.map, al.addr, NULL);
if (PRINT_FIELD(SYM)) {
- if (al.sym && al.sym->name)
- symname = al.sym->name;
+ printf(" ");
+ if (PRINT_FIELD(SYMOFFSET))
+ symbol__fprintf_symname_offs(al.sym, &al, stdout);
else
- symname = "";
-
- printf(" %16s", symname);
+ symbol__fprintf_symname(al.sym, stdout);
}
if (PRINT_FIELD(DSO)) {
- if (al.map && al.map->dso && al.map->dso->name)
- dsoname = al.map->dso->name;
- else
- dsoname = "";
+ printf(" (");
+ map__fprintf_dsoname(al.map, stdout);
+ printf(")");
+ }
+}
- printf(" (%s)", dsoname);
+static void print_sample_bts(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ struct thread *thread)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+
+ /* print branch_from information */
+ if (PRINT_FIELD(IP)) {
+ if (!symbol_conf.use_callchain)
+ printf(" ");
+ else
+ printf("\n");
+ perf_event__print_ip(event, sample, machine, evsel,
+ PRINT_FIELD(SYM), PRINT_FIELD(DSO),
+ PRINT_FIELD(SYMOFFSET));
}
+
+ printf(" => ");
+
+ /* print branch_to information */
+ if (PRINT_FIELD(ADDR))
+ print_sample_addr(event, sample, machine, thread, attr);
+
+ printf("\n");
}
static void process_event(union perf_event *event __unused,
@@ -374,6 +414,11 @@ static void process_event(union perf_event *event __unused,
print_sample_start(sample, thread, attr);
+ if (is_bts_event(attr)) {
+ print_sample_bts(event, sample, evsel, machine, thread);
+ return;
+ }
+
if (PRINT_FIELD(TRACE))
print_trace_event(sample->cpu, sample->raw_data,
sample->raw_size);
@@ -387,7 +432,8 @@ static void process_event(union perf_event *event __unused,
else
printf("\n");
perf_event__print_ip(event, sample, machine, evsel,
- PRINT_FIELD(SYM), PRINT_FIELD(DSO));
+ PRINT_FIELD(SYM), PRINT_FIELD(DSO),
+ PRINT_FIELD(SYMOFFSET));
}
printf("\n");
@@ -1097,7 +1143,10 @@ static const struct option options[] = {
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_CALLBACK('f', "fields", NULL, "str",
- "comma separated output fields prepend with 'type:'. Valid types: hw,sw,trace,raw. Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr",
+ "comma separated output fields prepend with 'type:'. "
+ "Valid types: hw,sw,trace,raw. "
+ "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
+ "addr,symoff",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
@@ -1106,6 +1155,9 @@ static const struct option options[] = {
"only display events for these comms"),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
+ OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
+ "Show the path of [kernel.kallsyms]"),
+
OPT_END()
};
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f5d2a63eba66..ea40e4e8b227 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -182,8 +182,8 @@ static int run_count = 1;
static bool no_inherit = false;
static bool scale = true;
static bool no_aggr = false;
-static pid_t target_pid = -1;
-static pid_t target_tid = -1;
+static const char *target_pid;
+static const char *target_tid;
static pid_t child_pid = -1;
static bool null_run = false;
static int detailed_run = 0;
@@ -296,7 +296,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel,
if (system_wide)
return perf_evsel__open_per_cpu(evsel, evsel_list->cpus,
group, group_fd);
- if (target_pid == -1 && target_tid == -1) {
+ if (!target_pid && !target_tid) {
attr->disabled = 1;
attr->enable_on_exec = 1;
}
@@ -446,7 +446,7 @@ static int run_perf_stat(int argc __used, const char **argv)
exit(-1);
}
- if (target_tid == -1 && target_pid == -1 && !system_wide)
+ if (!target_tid && !target_pid && !system_wide)
evsel_list->threads->map[0] = child_pid;
/*
@@ -576,6 +576,8 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
if (perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK))
fprintf(output, " # %8.3f CPUs utilized ",
avg / avg_stats(&walltime_nsecs_stats));
+ else
+ fprintf(output, " ");
}
/* used for get_ratio_color() */
@@ -844,12 +846,18 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
fprintf(output, " # %8.3f GHz ", ratio);
} else if (runtime_nsecs_stats[cpu].n != 0) {
+ char unit = 'M';
+
total = avg_stats(&runtime_nsecs_stats[cpu]);
if (total)
ratio = 1000.0 * avg / total;
+ if (ratio < 0.001) {
+ ratio *= 1000;
+ unit = 'K';
+ }
- fprintf(output, " # %8.3f M/sec ", ratio);
+ fprintf(output, " # %8.3f %c/sec ", ratio, unit);
} else {
fprintf(output, " ");
}
@@ -960,14 +968,14 @@ static void print_stat(int argc, const char **argv)
if (!csv_output) {
fprintf(output, "\n");
fprintf(output, " Performance counter stats for ");
- if(target_pid == -1 && target_tid == -1) {
+ if (!target_pid && !target_tid) {
fprintf(output, "\'%s", argv[0]);
for (i = 1; i < argc; i++)
fprintf(output, " %s", argv[i]);
- } else if (target_pid != -1)
- fprintf(output, "process id \'%d", target_pid);
+ } else if (target_pid)
+ fprintf(output, "process id \'%s", target_pid);
else
- fprintf(output, "thread id \'%d", target_tid);
+ fprintf(output, "thread id \'%s", target_tid);
fprintf(output, "\'");
if (run_count > 1)
@@ -1041,10 +1049,10 @@ static const struct option options[] = {
"event filter", parse_filter),
OPT_BOOLEAN('i', "no-inherit", &no_inherit,
"child tasks do not inherit counters"),
- OPT_INTEGER('p', "pid", &target_pid,
- "stat events on existing process id"),
- OPT_INTEGER('t', "tid", &target_tid,
- "stat events on existing thread id"),
+ OPT_STRING('p', "pid", &target_pid, "pid",
+ "stat events on existing process id"),
+ OPT_STRING('t', "tid", &target_tid, "tid",
+ "stat events on existing thread id"),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_BOOLEAN('g', "group", &group,
@@ -1182,7 +1190,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
} else if (big_num_opt == 0) /* User passed --no-big-num */
big_num = false;
- if (!argc && target_pid == -1 && target_tid == -1)
+ if (!argc && !target_pid && !target_tid)
usage_with_options(stat_usage, options);
if (run_count <= 0)
usage_with_options(stat_usage, options);
@@ -1198,10 +1206,11 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
if (add_default_attributes())
goto out;
- if (target_pid != -1)
+ if (target_pid)
target_tid = target_pid;
- evsel_list->threads = thread_map__new(target_pid, target_tid);
+ evsel_list->threads = thread_map__new_str(target_pid,
+ target_tid, UINT_MAX);
if (evsel_list->threads == NULL) {
pr_err("Problems finding threads of monitor\n");
usage_with_options(stat_usage, options);
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 3854e869dce1..3e087ce8daa6 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -15,6 +15,8 @@
#include "util/thread_map.h"
#include "../../include/linux/hw_breakpoint.h"
+#include <sys/mman.h>
+
static int vmlinux_matches_kallsyms_filter(struct map *map __used, struct symbol *sym)
{
bool *visited = symbol__priv(sym);
@@ -276,7 +278,7 @@ static int test__open_syscall_event(void)
return -1;
}
- threads = thread_map__new(-1, getpid());
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
@@ -342,7 +344,7 @@ static int test__open_syscall_event_on_all_cpus(void)
return -1;
}
- threads = thread_map__new(-1, getpid());
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
@@ -490,7 +492,7 @@ static int test__basic_mmap(void)
expected_nr_events[i] = random() % 257;
}
- threads = thread_map__new(-1, getpid());
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
if (threads == NULL) {
pr_debug("thread_map__new\n");
return -1;
@@ -1008,12 +1010,9 @@ realloc:
static int test__PERF_RECORD(void)
{
struct perf_record_opts opts = {
- .target_pid = -1,
- .target_tid = -1,
.no_delay = true,
.freq = 10,
.mmap_pages = 256,
- .sample_id_all_avail = true,
};
cpu_set_t *cpu_mask = NULL;
size_t cpu_mask_size = 0;
@@ -1054,7 +1053,7 @@ static int test__PERF_RECORD(void)
* we're monitoring, the one forked there.
*/
err = perf_evlist__create_maps(evlist, opts.target_pid,
- opts.target_tid, opts.cpu_list);
+ opts.target_tid, UINT_MAX, opts.cpu_list);
if (err < 0) {
pr_debug("Not enough memory to create thread/cpu maps\n");
goto out_delete_evlist;
@@ -1296,6 +1295,173 @@ out:
return (err < 0 || errs > 0) ? -1 : 0;
}
+
+#if defined(__x86_64__) || defined(__i386__)
+
+#define barrier() asm volatile("" ::: "memory")
+
+static u64 rdpmc(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+ struct perf_event_mmap_page *pc = addr;
+ u32 seq, idx, time_mult = 0, time_shift = 0;
+ u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+ do {
+ seq = pc->lock;
+ barrier();
+
+ enabled = pc->time_enabled;
+ running = pc->time_running;
+
+ if (enabled != running) {
+ cyc = rdtsc();
+ time_mult = pc->time_mult;
+ time_shift = pc->time_shift;
+ time_offset = pc->time_offset;
+ }
+
+ idx = pc->index;
+ count = pc->offset;
+ if (idx)
+ count += rdpmc(idx - 1);
+
+ barrier();
+ } while (pc->lock != seq);
+
+ if (enabled != running) {
+ u64 quot, rem;
+
+ quot = (cyc >> time_shift);
+ rem = cyc & ((1 << time_shift) - 1);
+ delta = time_offset + quot * time_mult +
+ ((rem * time_mult) >> time_shift);
+
+ enabled += delta;
+ if (idx)
+ running += delta;
+
+ quot = count / running;
+ rem = count % running;
+ count = quot * enabled + (rem * enabled) / running;
+ }
+
+ return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __used, siginfo_t *info __used, void *uc __used)
+{
+ exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+ long page_size = sysconf(_SC_PAGE_SIZE);
+ volatile int tmp = 0;
+ u64 i, loops = 1000;
+ int n;
+ int fd;
+ void *addr;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .exclude_kernel = 1,
+ };
+ u64 delta_sum = 0;
+ struct sigaction sa;
+
+ sigfillset(&sa.sa_mask);
+ sa.sa_sigaction = segfault_handler;
+ sigaction(SIGSEGV, &sa, NULL);
+
+ fprintf(stderr, "\n\n");
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd < 0) {
+ die("Error: sys_perf_event_open() syscall returned "
+ "with %d (%s)\n", fd, strerror(errno));
+ }
+
+ addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (addr == (void *)(-1)) {
+ die("Error: mmap() syscall returned "
+ "with (%s)\n", strerror(errno));
+ }
+
+ for (n = 0; n < 6; n++) {
+ u64 stamp, now, delta;
+
+ stamp = mmap_read_self(addr);
+
+ for (i = 0; i < loops; i++)
+ tmp++;
+
+ now = mmap_read_self(addr);
+ loops *= 10;
+
+ delta = now - stamp;
+ fprintf(stderr, "%14d: %14Lu\n", n, (long long)delta);
+
+ delta_sum += delta;
+ }
+
+ munmap(addr, page_size);
+ close(fd);
+
+ fprintf(stderr, " ");
+
+ if (!delta_sum)
+ return -1;
+
+ return 0;
+}
+
+static int test__rdpmc(void)
+{
+ int status = 0;
+ int wret = 0;
+ int ret;
+ int pid;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (!pid) {
+ ret = __test__rdpmc();
+
+ exit(ret);
+ }
+
+ wret = waitpid(pid, &status, 0);
+ if (wret < 0 || status)
+ return -1;
+
+ return 0;
+}
+
+#endif
+
static struct test {
const char *desc;
int (*func)(void);
@@ -1320,6 +1486,12 @@ static struct test {
.desc = "parse events tests",
.func = test__parse_events,
},
+#if defined(__x86_64__) || defined(__i386__)
+ {
+ .desc = "x86 rdpmc test",
+ .func = test__rdpmc,
+ },
+#endif
{
.desc = "Validate PERF_RECORD_* events & perf_sample fields",
.func = test__PERF_RECORD,
@@ -1412,7 +1584,5 @@ int cmd_test(int argc, const char **argv, const char *prefix __used)
if (symbol__init() < 0)
return -1;
- setup_pager();
-
return __cmd_test(argc, argv);
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dd162aa24baa..e3c63aef8efc 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -64,7 +64,6 @@
#include <linux/unistd.h>
#include <linux/types.h>
-
void get_term_dimensions(struct winsize *ws)
{
char *s = getenv("LINES");
@@ -544,10 +543,20 @@ static void perf_top__sort_new_samples(void *arg)
static void *display_thread_tui(void *arg)
{
+ struct perf_evsel *pos;
struct perf_top *top = arg;
const char *help = "For a higher level overview, try: perf top --sort comm,dso";
perf_top__sort_new_samples(top);
+
+ /*
+ * Initialize the uid_filter_str, in the future the TUI will allow
+ * Zooming in/out UIDs. For now juse use whatever the user passed
+ * via --uid.
+ */
+ list_for_each_entry(pos, &top->evlist->entries, node)
+ pos->hists.uid_filter_str = top->uid_str;
+
perf_evlist__tui_browse_hists(top->evlist, help,
perf_top__sort_new_samples,
top, top->delay_secs);
@@ -668,6 +677,12 @@ static void perf_event__process_sample(struct perf_tool *tool,
return;
}
+ if (!machine) {
+ pr_err("%u unprocessable samples recorded.",
+ top->session->hists.stats.nr_unprocessable_samples++);
+ return;
+ }
+
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
top->exact_samples++;
@@ -857,8 +872,11 @@ static void perf_top__start_counters(struct perf_top *top)
attr->mmap = 1;
attr->comm = 1;
attr->inherit = top->inherit;
+fallback_missing_features:
+ if (top->exclude_guest_missing)
+ attr->exclude_guest = attr->exclude_host = 0;
retry_sample_id:
- attr->sample_id_all = top->sample_id_all_avail ? 1 : 0;
+ attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
try_again:
if (perf_evsel__open(counter, top->evlist->cpus,
top->evlist->threads, top->group,
@@ -868,12 +886,20 @@ try_again:
if (err == EPERM || err == EACCES) {
ui__error_paranoid();
goto out_err;
- } else if (err == EINVAL && top->sample_id_all_avail) {
- /*
- * Old kernel, no attr->sample_id_type_all field
- */
- top->sample_id_all_avail = false;
- goto retry_sample_id;
+ } else if (err == EINVAL) {
+ if (!top->exclude_guest_missing &&
+ (attr->exclude_guest || attr->exclude_host)) {
+ pr_debug("Old kernel, cannot exclude "
+ "guest or host samples.\n");
+ top->exclude_guest_missing = true;
+ goto fallback_missing_features;
+ } else if (!top->sample_id_all_missing) {
+ /*
+ * Old kernel, no attr->sample_id_type_all field
+ */
+ top->sample_id_all_missing = true;
+ goto retry_sample_id;
+ }
}
/*
* If it's cycles then fall back to hrtimer
@@ -956,7 +982,7 @@ static int __cmd_top(struct perf_top *top)
if (ret)
goto out_delete;
- if (top->target_tid != -1)
+ if (top->target_tid || top->uid != UINT_MAX)
perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
perf_event__process,
&top->session->host_machine);
@@ -1094,10 +1120,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
struct perf_top top = {
.count_filter = 5,
.delay_secs = 2,
- .target_pid = -1,
- .target_tid = -1,
+ .uid = UINT_MAX,
.freq = 1000, /* 1 KHz */
- .sample_id_all_avail = true,
.mmap_pages = 128,
.sym_pcnt_filter = 5,
};
@@ -1108,9 +1132,9 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
parse_events_option),
OPT_INTEGER('c', "count", &top.default_interval,
"event period to sample"),
- OPT_INTEGER('p', "pid", &top.target_pid,
+ OPT_STRING('p', "pid", &top.target_pid, "pid",
"profile events on existing process id"),
- OPT_INTEGER('t', "tid", &top.target_tid,
+ OPT_STRING('t', "tid", &top.target_tid, "tid",
"profile events on existing thread id"),
OPT_BOOLEAN('a', "all-cpus", &top.system_wide,
"system-wide collection from all CPUs"),
@@ -1169,6 +1193,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
"Display raw encoding of assembly instructions (default)"),
OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
"Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING('u', "uid", &top.uid_str, "user", "user to profile"),
OPT_END()
};
@@ -1194,18 +1219,22 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
setup_browser(false);
+ top.uid = parse_target_uid(top.uid_str, top.target_tid, top.target_pid);
+ if (top.uid_str != NULL && top.uid == UINT_MAX - 1)
+ goto out_delete_evlist;
+
/* CPU and PID are mutually exclusive */
- if (top.target_tid > 0 && top.cpu_list) {
+ if (top.target_tid && top.cpu_list) {
printf("WARNING: PID switch overriding CPU\n");
sleep(1);
top.cpu_list = NULL;
}
- if (top.target_pid != -1)
+ if (top.target_pid)
top.target_tid = top.target_pid;
if (perf_evlist__create_maps(top.evlist, top.target_pid,
- top.target_tid, top.cpu_list) < 0)
+ top.target_tid, top.uid, top.cpu_list) < 0)
usage_with_options(top_usage, options);
if (!top.evlist->nr_entries &&
@@ -1269,6 +1298,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
status = __cmd_top(&top);
+out_delete_evlist:
perf_evlist__delete(top.evlist);
return status;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 64f8bee31ced..f0227e93665d 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -167,7 +167,6 @@ sys_perf_event_open(struct perf_event_attr *attr,
pid_t pid, int cpu, int group_fd,
unsigned long flags)
{
- attr->size = sizeof(*attr);
return syscall(__NR_perf_event_open, attr, pid, cpu,
group_fd, flags);
}
@@ -186,8 +185,9 @@ extern const char perf_version_string[];
void pthread__unblock_sigwinch(void);
struct perf_record_opts {
- pid_t target_pid;
- pid_t target_tid;
+ const char *target_pid;
+ const char *target_tid;
+ uid_t uid;
bool call_graph;
bool group;
bool inherit_stat;
@@ -198,7 +198,8 @@ struct perf_record_opts {
bool raw_samples;
bool sample_address;
bool sample_time;
- bool sample_id_all_avail;
+ bool sample_id_all_missing;
+ bool exclude_guest_missing;
bool system_wide;
bool period;
unsigned int freq;
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
index df638c438a9f..b11cca584238 100755
--- a/tools/perf/python/twatch.py
+++ b/tools/perf/python/twatch.py
@@ -19,7 +19,7 @@ def main():
cpus = perf.cpu_map()
threads = perf.thread_map()
evsel = perf.evsel(task = 1, comm = 1, mmap = 0,
- wakeup_events = 1, sample_period = 1,
+ wakeup_events = 1, watermark = 1,
sample_id_all = 1,
sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_TID)
evsel.open(cpus = cpus, threads = threads);
diff --git a/tools/perf/util/bitmap.c b/tools/perf/util/bitmap.c
index 5e230acae1e9..0a1adc1111fd 100644
--- a/tools/perf/util/bitmap.c
+++ b/tools/perf/util/bitmap.c
@@ -19,3 +19,13 @@ int __bitmap_weight(const unsigned long *bitmap, int bits)
return w;
}
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] | bitmap2[k];
+}
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 6893eec693ab..adc72f09914d 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -166,6 +166,17 @@ out:
return cpus;
}
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
+{
+ int i;
+ size_t printed = fprintf(fp, "%d cpu%s: ",
+ map->nr, map->nr > 1 ? "s" : "");
+ for (i = 0; i < map->nr; ++i)
+ printed += fprintf(fp, "%s%d", i ? ", " : "", map->map[i]);
+
+ return printed + fprintf(fp, "\n");
+}
+
struct cpu_map *cpu_map__dummy_new(void)
{
struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 072c0a374794..c41518573c6a 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -1,6 +1,8 @@
#ifndef __PERF_CPUMAP_H
#define __PERF_CPUMAP_H
+#include <stdio.h>
+
struct cpu_map {
int nr;
int map[];
@@ -10,4 +12,6 @@ struct cpu_map *cpu_map__new(const char *cpu_list);
struct cpu_map *cpu_map__dummy_new(void);
void cpu_map__delete(struct cpu_map *map);
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
+
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
index 35073621e5de..aada3ac5e891 100644
--- a/tools/perf/util/ctype.c
+++ b/tools/perf/util/ctype.c
@@ -3,7 +3,7 @@
*
* No surprises, and works with signed and unsigned chars.
*/
-#include "cache.h"
+#include "util.h"
enum {
S = GIT_SPACE,
diff --git a/tools/perf/util/debugfs.c b/tools/perf/util/debugfs.c
index ffc35e748e89..dd8b19319c03 100644
--- a/tools/perf/util/debugfs.c
+++ b/tools/perf/util/debugfs.c
@@ -15,32 +15,6 @@ static const char *debugfs_known_mountpoints[] = {
0,
};
-/* use this to force a umount */
-void debugfs_force_cleanup(void)
-{
- debugfs_find_mountpoint();
- debugfs_premounted = 0;
- debugfs_umount();
-}
-
-/* construct a full path to a debugfs element */
-int debugfs_make_path(const char *element, char *buffer, int size)
-{
- int len;
-
- if (strlen(debugfs_mountpoint) == 0) {
- buffer[0] = '\0';
- return -1;
- }
-
- len = strlen(debugfs_mountpoint) + strlen(element) + 1;
- if (len >= size)
- return len+1;
-
- snprintf(buffer, size-1, "%s/%s", debugfs_mountpoint, element);
- return 0;
-}
-
static int debugfs_found;
/* find the path to the mounted debugfs */
@@ -97,17 +71,6 @@ int debugfs_valid_mountpoint(const char *debugfs)
return 0;
}
-
-int debugfs_valid_entry(const char *path)
-{
- struct stat st;
-
- if (stat(path, &st))
- return -errno;
-
- return 0;
-}
-
static void debugfs_set_tracing_events_path(const char *mountpoint)
{
snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s",
@@ -149,107 +112,3 @@ void debugfs_set_path(const char *mountpoint)
snprintf(debugfs_mountpoint, sizeof(debugfs_mountpoint), "%s", mountpoint);
debugfs_set_tracing_events_path(mountpoint);
}
-
-/* umount the debugfs */
-
-int debugfs_umount(void)
-{
- char umountcmd[128];
- int ret;
-
- /* if it was already mounted, leave it */
- if (debugfs_premounted)
- return 0;
-
- /* make sure it's a valid mount point */
- ret = debugfs_valid_mountpoint(debugfs_mountpoint);
- if (ret)
- return ret;
-
- snprintf(umountcmd, sizeof(umountcmd),
- "/bin/umount %s", debugfs_mountpoint);
- return system(umountcmd);
-}
-
-int debugfs_write(const char *entry, const char *value)
-{
- char path[PATH_MAX + 1];
- int ret, count;
- int fd;
-
- /* construct the path */
- snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
-
- /* verify that it exists */
- ret = debugfs_valid_entry(path);
- if (ret)
- return ret;
-
- /* get how many chars we're going to write */
- count = strlen(value);
-
- /* open the debugfs entry */
- fd = open(path, O_RDWR);
- if (fd < 0)
- return -errno;
-
- while (count > 0) {
- /* write it */
- ret = write(fd, value, count);
- if (ret <= 0) {
- if (ret == EAGAIN)
- continue;
- close(fd);
- return -errno;
- }
- count -= ret;
- }
-
- /* close it */
- close(fd);
-
- /* return success */
- return 0;
-}
-
-/*
- * read a debugfs entry
- * returns the number of chars read or a negative errno
- */
-int debugfs_read(const char *entry, char *buffer, size_t size)
-{
- char path[PATH_MAX + 1];
- int ret;
- int fd;
-
- /* construct the path */
- snprintf(path, sizeof(path), "%s/%s", debugfs_mountpoint, entry);
-
- /* verify that it exists */
- ret = debugfs_valid_entry(path);
- if (ret)
- return ret;
-
- /* open the debugfs entry */
- fd = open(path, O_RDONLY);
- if (fd < 0)
- return -errno;
-
- do {
- /* read it */
- ret = read(fd, buffer, size);
- if (ret == 0) {
- close(fd);
- return EOF;
- }
- } while (ret < 0 && errno == EAGAIN);
-
- /* close it */
- close(fd);
-
- /* make *sure* there's a null character at the end */
- buffer[ret] = '\0';
-
- /* return the number of chars read */
- return ret;
-}
diff --git a/tools/perf/util/debugfs.h b/tools/perf/util/debugfs.h
index 4a878f735eb0..68f3e87ec57f 100644
--- a/tools/perf/util/debugfs.h
+++ b/tools/perf/util/debugfs.h
@@ -3,14 +3,8 @@
const char *debugfs_find_mountpoint(void);
int debugfs_valid_mountpoint(const char *debugfs);
-int debugfs_valid_entry(const char *path);
char *debugfs_mount(const char *mountpoint);
-int debugfs_umount(void);
void debugfs_set_path(const char *mountpoint);
-int debugfs_write(const char *entry, const char *value);
-int debugfs_read(const char *entry, char *buffer, size_t size);
-void debugfs_force_cleanup(void);
-int debugfs_make_path(const char *element, char *buffer, int size);
extern char debugfs_mountpoint[];
extern char tracing_events_path[];
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ea32a061f1c8..f8da9fada002 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -97,9 +97,9 @@ void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
++evlist->nr_entries;
}
-static void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
- struct list_head *list,
- int nr_entries)
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+ struct list_head *list,
+ int nr_entries)
{
list_splice_tail(list, &evlist->entries);
evlist->nr_entries += nr_entries;
@@ -597,15 +597,15 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
return perf_evlist__mmap_per_cpu(evlist, prot, mask);
}
-int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
- pid_t target_tid, const char *cpu_list)
+int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
+ const char *target_tid, uid_t uid, const char *cpu_list)
{
- evlist->threads = thread_map__new(target_pid, target_tid);
+ evlist->threads = thread_map__new_str(target_pid, target_tid, uid);
if (evlist->threads == NULL)
return -1;
- if (cpu_list == NULL && target_tid != -1)
+ if (uid != UINT_MAX || (cpu_list == NULL && target_tid))
evlist->cpus = cpu_map__dummy_new();
else
evlist->cpus = cpu_map__new(cpu_list);
@@ -824,7 +824,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist,
exit(-1);
}
- if (!opts->system_wide && opts->target_tid == -1 && opts->target_pid == -1)
+ if (!opts->system_wide && !opts->target_tid && !opts->target_pid)
evlist->threads->map[0] = evlist->workload.pid;
close(child_ready_pipe[1]);
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8922aeed0467..21f1c9e57f13 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -106,8 +106,8 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist,
evlist->threads = threads;
}
-int perf_evlist__create_maps(struct perf_evlist *evlist, pid_t target_pid,
- pid_t target_tid, const char *cpu_list);
+int perf_evlist__create_maps(struct perf_evlist *evlist, const char *target_pid,
+ const char *tid, uid_t uid, const char *cpu_list);
void perf_evlist__delete_maps(struct perf_evlist *evlist);
int perf_evlist__set_filters(struct perf_evlist *evlist);
@@ -117,4 +117,9 @@ u16 perf_evlist__id_hdr_size(const struct perf_evlist *evlist);
bool perf_evlist__valid_sample_type(const struct perf_evlist *evlist);
bool perf_evlist__valid_sample_id_all(const struct perf_evlist *evlist);
+
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+ struct list_head *list,
+ int nr_entries);
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 7132ee834e0e..302d49a9f985 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -68,7 +68,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
struct perf_event_attr *attr = &evsel->attr;
int track = !evsel->idx; /* only the first counter needs these */
- attr->sample_id_all = opts->sample_id_all_avail ? 1 : 0;
+ attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
attr->inherit = !opts->no_inherit;
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING |
@@ -111,7 +111,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
if (opts->period)
attr->sample_type |= PERF_SAMPLE_PERIOD;
- if (opts->sample_id_all_avail &&
+ if (!opts->sample_id_all_missing &&
(opts->sample_time || opts->system_wide ||
!opts->no_inherit || opts->cpu_list))
attr->sample_type |= PERF_SAMPLE_TIME;
@@ -130,7 +130,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
attr->mmap = track;
attr->comm = track;
- if (opts->target_pid == -1 && opts->target_tid == -1 && !opts->system_wide) {
+ if (!opts->target_pid && !opts->target_tid && !opts->system_wide) {
attr->disabled = 1;
attr->enable_on_exec = 1;
}
@@ -536,7 +536,7 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
}
if (type & PERF_SAMPLE_READ) {
- fprintf(stderr, "PERF_SAMPLE_READ is unsuported for now\n");
+ fprintf(stderr, "PERF_SAMPLE_READ is unsupported for now\n");
return -1;
}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ecd7f4dd7eea..9f867d96c6a5 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -63,9 +63,20 @@ char *perf_header__find_event(u64 id)
return NULL;
}
-static const char *__perf_magic = "PERFFILE";
+/*
+ * magic2 = "PERFILE2"
+ * must be a numerical value to let the endianness
+ * determine the memory layout. That way we are able
+ * to detect endianness when reading the perf.data file
+ * back.
+ *
+ * we check for legacy (PERFFILE) format.
+ */
+static const char *__perf_magic1 = "PERFFILE";
+static const u64 __perf_magic2 = 0x32454c4946524550ULL;
+static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
-#define PERF_MAGIC (*(u64 *)__perf_magic)
+#define PERF_MAGIC __perf_magic2
struct perf_file_attr {
struct perf_event_attr attr;
@@ -1305,25 +1316,198 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
free(str);
}
+static int __event_process_build_id(struct build_id_event *bev,
+ char *filename,
+ struct perf_session *session)
+{
+ int err = -1;
+ struct list_head *head;
+ struct machine *machine;
+ u16 misc;
+ struct dso *dso;
+ enum dso_kernel_type dso_type;
+
+ machine = perf_session__findnew_machine(session, bev->pid);
+ if (!machine)
+ goto out;
+
+ misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ switch (misc) {
+ case PERF_RECORD_MISC_KERNEL:
+ dso_type = DSO_TYPE_KERNEL;
+ head = &machine->kernel_dsos;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ dso_type = DSO_TYPE_GUEST_KERNEL;
+ head = &machine->kernel_dsos;
+ break;
+ case PERF_RECORD_MISC_USER:
+ case PERF_RECORD_MISC_GUEST_USER:
+ dso_type = DSO_TYPE_USER;
+ head = &machine->user_dsos;
+ break;
+ default:
+ goto out;
+ }
+
+ dso = __dsos__findnew(head, filename);
+ if (dso != NULL) {
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ dso__set_build_id(dso, &bev->build_id);
+
+ if (filename[0] == '[')
+ dso->kernel = dso_type;
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id),
+ sbuild_id);
+ pr_debug("build id event received for %s: %s\n",
+ dso->long_name, sbuild_id);
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+ int input, u64 offset, u64 size)
+{
+ struct perf_session *session = container_of(header, struct perf_session, header);
+ struct {
+ struct perf_event_header header;
+ u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+ char filename[0];
+ } old_bev;
+ struct build_id_event bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size;
+
+ while (offset < limit) {
+ ssize_t len;
+
+ if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+ return -1;
+
+ if (header->needs_swap)
+ perf_event_header__bswap(&old_bev.header);
+
+ len = old_bev.header.size - sizeof(old_bev);
+ if (read(input, filename, len) != len)
+ return -1;
+
+ bev.header = old_bev.header;
+
+ /*
+ * As the pid is the missing value, we need to fill
+ * it properly. The header.misc value give us nice hint.
+ */
+ bev.pid = HOST_KERNEL_ID;
+ if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+ bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+ bev.pid = DEFAULT_GUEST_KERNEL_ID;
+
+ memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+ __event_process_build_id(&bev, filename, session);
+
+ offset += bev.header.size;
+ }
+
+ return 0;
+}
+
+static int perf_header__read_build_ids(struct perf_header *header,
+ int input, u64 offset, u64 size)
+{
+ struct perf_session *session = container_of(header, struct perf_session, header);
+ struct build_id_event bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size, orig_offset = offset;
+ int err = -1;
+
+ while (offset < limit) {
+ ssize_t len;
+
+ if (read(input, &bev, sizeof(bev)) != sizeof(bev))
+ goto out;
+
+ if (header->needs_swap)
+ perf_event_header__bswap(&bev.header);
+
+ len = bev.header.size - sizeof(bev);
+ if (read(input, filename, len) != len)
+ goto out;
+ /*
+ * The a1645ce1 changeset:
+ *
+ * "perf: 'perf kvm' tool for monitoring guest performance from host"
+ *
+ * Added a field to struct build_id_event that broke the file
+ * format.
+ *
+ * Since the kernel build-id is the first entry, process the
+ * table using the old format if the well known
+ * '[kernel.kallsyms]' string for the kernel build-id has the
+ * first 4 characters chopped off (where the pid_t sits).
+ */
+ if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+ if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+ return -1;
+ return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+ }
+
+ __event_process_build_id(&bev, filename, session);
+
+ offset += bev.header.size;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+static int process_trace_info(struct perf_file_section *section __unused,
+ struct perf_header *ph __unused,
+ int feat __unused, int fd)
+{
+ trace_report(fd, false);
+ return 0;
+}
+
+static int process_build_id(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat __unused, int fd)
+{
+ if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
+ pr_debug("Failed to read buildids, continuing...\n");
+ return 0;
+}
+
struct feature_ops {
int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
void (*print)(struct perf_header *h, int fd, FILE *fp);
+ int (*process)(struct perf_file_section *section,
+ struct perf_header *h, int feat, int fd);
const char *name;
bool full_only;
};
#define FEAT_OPA(n, func) \
[n] = { .name = #n, .write = write_##func, .print = print_##func }
+#define FEAT_OPP(n, func) \
+ [n] = { .name = #n, .write = write_##func, .print = print_##func, \
+ .process = process_##func }
#define FEAT_OPF(n, func) \
- [n] = { .name = #n, .write = write_##func, .print = print_##func, .full_only = true }
+ [n] = { .name = #n, .write = write_##func, .print = print_##func, \
+ .full_only = true }
/* feature_ops not implemented: */
#define print_trace_info NULL
#define print_build_id NULL
static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
- FEAT_OPA(HEADER_TRACE_INFO, trace_info),
- FEAT_OPA(HEADER_BUILD_ID, build_id),
+ FEAT_OPP(HEADER_TRACE_INFO, trace_info),
+ FEAT_OPP(HEADER_BUILD_ID, build_id),
FEAT_OPA(HEADER_HOSTNAME, hostname),
FEAT_OPA(HEADER_OSRELEASE, osrelease),
FEAT_OPA(HEADER_VERSION, version),
@@ -1620,24 +1804,59 @@ out_free:
return err;
}
+static int check_magic_endian(u64 *magic, struct perf_file_header *header,
+ struct perf_header *ph)
+{
+ int ret;
+
+ /* check for legacy format */
+ ret = memcmp(magic, __perf_magic1, sizeof(*magic));
+ if (ret == 0) {
+ pr_debug("legacy perf.data format\n");
+ if (!header)
+ return -1;
+
+ if (header->attr_size != sizeof(struct perf_file_attr)) {
+ u64 attr_size = bswap_64(header->attr_size);
+
+ if (attr_size != sizeof(struct perf_file_attr))
+ return -1;
+
+ ph->needs_swap = true;
+ }
+ return 0;
+ }
+
+ /* check magic number with same endianness */
+ if (*magic == __perf_magic2)
+ return 0;
+
+ /* check magic number but opposite endianness */
+ if (*magic != __perf_magic2_sw)
+ return -1;
+
+ ph->needs_swap = true;
+
+ return 0;
+}
+
int perf_file_header__read(struct perf_file_header *header,
struct perf_header *ph, int fd)
{
+ int ret;
+
lseek(fd, 0, SEEK_SET);
- if (readn(fd, header, sizeof(*header)) <= 0 ||
- memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
+ ret = readn(fd, header, sizeof(*header));
+ if (ret <= 0)
return -1;
- if (header->attr_size != sizeof(struct perf_file_attr)) {
- u64 attr_size = bswap_64(header->attr_size);
-
- if (attr_size != sizeof(struct perf_file_attr))
- return -1;
+ if (check_magic_endian(&header->magic, header, ph) < 0)
+ return -1;
+ if (ph->needs_swap) {
mem_bswap_64(header, offsetof(struct perf_file_header,
- adds_features));
- ph->needs_swap = true;
+ adds_features));
}
if (header->size != sizeof(*header)) {
@@ -1689,156 +1908,6 @@ int perf_file_header__read(struct perf_file_header *header,
return 0;
}
-static int __event_process_build_id(struct build_id_event *bev,
- char *filename,
- struct perf_session *session)
-{
- int err = -1;
- struct list_head *head;
- struct machine *machine;
- u16 misc;
- struct dso *dso;
- enum dso_kernel_type dso_type;
-
- machine = perf_session__findnew_machine(session, bev->pid);
- if (!machine)
- goto out;
-
- misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
- switch (misc) {
- case PERF_RECORD_MISC_KERNEL:
- dso_type = DSO_TYPE_KERNEL;
- head = &machine->kernel_dsos;
- break;
- case PERF_RECORD_MISC_GUEST_KERNEL:
- dso_type = DSO_TYPE_GUEST_KERNEL;
- head = &machine->kernel_dsos;
- break;
- case PERF_RECORD_MISC_USER:
- case PERF_RECORD_MISC_GUEST_USER:
- dso_type = DSO_TYPE_USER;
- head = &machine->user_dsos;
- break;
- default:
- goto out;
- }
-
- dso = __dsos__findnew(head, filename);
- if (dso != NULL) {
- char sbuild_id[BUILD_ID_SIZE * 2 + 1];
-
- dso__set_build_id(dso, &bev->build_id);
-
- if (filename[0] == '[')
- dso->kernel = dso_type;
-
- build_id__sprintf(dso->build_id, sizeof(dso->build_id),
- sbuild_id);
- pr_debug("build id event received for %s: %s\n",
- dso->long_name, sbuild_id);
- }
-
- err = 0;
-out:
- return err;
-}
-
-static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
- int input, u64 offset, u64 size)
-{
- struct perf_session *session = container_of(header, struct perf_session, header);
- struct {
- struct perf_event_header header;
- u8 build_id[ALIGN(BUILD_ID_SIZE, sizeof(u64))];
- char filename[0];
- } old_bev;
- struct build_id_event bev;
- char filename[PATH_MAX];
- u64 limit = offset + size;
-
- while (offset < limit) {
- ssize_t len;
-
- if (read(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
- return -1;
-
- if (header->needs_swap)
- perf_event_header__bswap(&old_bev.header);
-
- len = old_bev.header.size - sizeof(old_bev);
- if (read(input, filename, len) != len)
- return -1;
-
- bev.header = old_bev.header;
-
- /*
- * As the pid is the missing value, we need to fill
- * it properly. The header.misc value give us nice hint.
- */
- bev.pid = HOST_KERNEL_ID;
- if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
- bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
- bev.pid = DEFAULT_GUEST_KERNEL_ID;
-
- memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
- __event_process_build_id(&bev, filename, session);
-
- offset += bev.header.size;
- }
-
- return 0;
-}
-
-static int perf_header__read_build_ids(struct perf_header *header,
- int input, u64 offset, u64 size)
-{
- struct perf_session *session = container_of(header, struct perf_session, header);
- struct build_id_event bev;
- char filename[PATH_MAX];
- u64 limit = offset + size, orig_offset = offset;
- int err = -1;
-
- while (offset < limit) {
- ssize_t len;
-
- if (read(input, &bev, sizeof(bev)) != sizeof(bev))
- goto out;
-
- if (header->needs_swap)
- perf_event_header__bswap(&bev.header);
-
- len = bev.header.size - sizeof(bev);
- if (read(input, filename, len) != len)
- goto out;
- /*
- * The a1645ce1 changeset:
- *
- * "perf: 'perf kvm' tool for monitoring guest performance from host"
- *
- * Added a field to struct build_id_event that broke the file
- * format.
- *
- * Since the kernel build-id is the first entry, process the
- * table using the old format if the well known
- * '[kernel.kallsyms]' string for the kernel build-id has the
- * first 4 characters chopped off (where the pid_t sits).
- */
- if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
- if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
- return -1;
- return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
- }
-
- __event_process_build_id(&bev, filename, session);
-
- offset += bev.header.size;
- }
- err = 0;
-out:
- return err;
-}
-
static int perf_file_section__process(struct perf_file_section *section,
struct perf_header *ph,
int feat, int fd, void *data __used)
@@ -1854,27 +1923,23 @@ static int perf_file_section__process(struct perf_file_section *section,
return 0;
}
- switch (feat) {
- case HEADER_TRACE_INFO:
- trace_report(fd, false);
- break;
- case HEADER_BUILD_ID:
- if (perf_header__read_build_ids(ph, fd, section->offset, section->size))
- pr_debug("Failed to read buildids, continuing...\n");
- break;
- default:
- break;
- }
+ if (!feat_ops[feat].process)
+ return 0;
- return 0;
+ return feat_ops[feat].process(section, ph, feat, fd);
}
static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
struct perf_header *ph, int fd,
bool repipe)
{
- if (readn(fd, header, sizeof(*header)) <= 0 ||
- memcmp(&header->magic, __perf_magic, sizeof(header->magic)))
+ int ret;
+
+ ret = readn(fd, header, sizeof(*header));
+ if (ret <= 0)
+ return -1;
+
+ if (check_magic_endian(&header->magic, NULL, ph) < 0)
return -1;
if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index ac4ec956024e..e68f617d082f 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -11,6 +11,7 @@
enum {
HEADER_RESERVED = 0, /* always cleared */
+ HEADER_FIRST_FEATURE = 1,
HEADER_TRACE_INFO = 1,
HEADER_BUILD_ID,
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index f55f0a8d1f81..48e5acd1e862 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -32,6 +32,7 @@ struct events_stats {
u32 nr_unknown_events;
u32 nr_invalid_chains;
u32 nr_unknown_id;
+ u32 nr_unprocessable_samples;
};
enum hist_column {
@@ -55,6 +56,7 @@ struct hists {
u64 nr_entries;
const struct thread *thread_filter;
const struct dso *dso_filter;
+ const char *uid_filter_str;
pthread_mutex_t lock;
struct events_stats stats;
u64 event_stream;
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
index bb4198e7837a..afe38199e922 100644
--- a/tools/perf/util/include/asm/dwarf2.h
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -2,10 +2,12 @@
#ifndef PERF_DWARF2_H
#define PERF_DWARF2_H
-/* dwarf2.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
#define CFI_STARTPROC
#define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
#endif /* PERF_DWARF2_H */
diff --git a/tools/perf/util/include/linux/bitmap.h b/tools/perf/util/include/linux/bitmap.h
index eda4416efa0a..bb162e40c76c 100644
--- a/tools/perf/util/include/linux/bitmap.h
+++ b/tools/perf/util/include/linux/bitmap.h
@@ -5,6 +5,8 @@
#include <linux/bitops.h>
int __bitmap_weight(const unsigned long *bitmap, int bits);
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
#define BITMAP_LAST_WORD_MASK(nbits) \
( \
@@ -32,4 +34,13 @@ static inline int bitmap_weight(const unsigned long *src, int nbits)
return __bitmap_weight(src, nbits);
}
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_const_nbits(nbits))
+ *dst = *src1 | *src2;
+ else
+ __bitmap_or(dst, src1, src2, nbits);
+}
+
#endif /* _PERF_BITOPS_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 316aa0ab7122..dea6d1c1a954 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -212,6 +212,21 @@ size_t map__fprintf(struct map *self, FILE *fp)
self->start, self->end, self->pgoff, self->dso->name);
}
+size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+{
+ const char *dsoname;
+
+ if (map && map->dso && (map->dso->name || map->dso->long_name)) {
+ if (symbol_conf.show_kernel_path && map->dso->long_name)
+ dsoname = map->dso->long_name;
+ else if (map->dso->name)
+ dsoname = map->dso->name;
+ } else
+ dsoname = "[unknown]";
+
+ return fprintf(fp, "%s", dsoname);
+}
+
/*
* objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
* map->dso->adjust_symbols==1 for ET_EXEC-like cases.
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 2b8017f8a930..b100c20b7f94 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -118,6 +118,7 @@ void map__delete(struct map *self);
struct map *map__clone(struct map *self);
int map__overlap(struct map *l, struct map *r);
size_t map__fprintf(struct map *self, FILE *fp);
+size_t map__fprintf_dsoname(struct map *map, FILE *fp);
int map__load(struct map *self, symbol_filter_t filter);
struct symbol *map__find_symbol(struct map *self,
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e33554a562b3..15f9bb1b5f0a 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -273,10 +273,10 @@ static int add_module_to_probe_trace_events(struct probe_trace_event *tevs,
/* Try to find perf_probe_event with debuginfo */
static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs,
- int max_tevs, const char *module)
+ int max_tevs, const char *target)
{
bool need_dwarf = perf_probe_event_need_dwarf(pev);
- struct debuginfo *dinfo = open_debuginfo(module);
+ struct debuginfo *dinfo = open_debuginfo(target);
int ntevs, ret = 0;
if (!dinfo) {
@@ -295,9 +295,9 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
if (ntevs > 0) { /* Succeeded to find trace events */
pr_debug("find %d probe_trace_events.\n", ntevs);
- if (module)
+ if (target)
ret = add_module_to_probe_trace_events(*tevs, ntevs,
- module);
+ target);
return ret < 0 ? ret : ntevs;
}
@@ -1729,7 +1729,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
}
ret = 0;
- printf("Add new event%s\n", (ntevs > 1) ? "s:" : ":");
+ printf("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
for (i = 0; i < ntevs; i++) {
tev = &tevs[i];
if (pev->event)
@@ -1784,7 +1784,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
if (ret >= 0) {
/* Show how to use the event. */
- printf("\nYou can now use it on all perf tools, such as:\n\n");
+ printf("\nYou can now use it in all perf tools, such as:\n\n");
printf("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
tev->event);
}
@@ -1796,14 +1796,14 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
static int convert_to_probe_trace_events(struct perf_probe_event *pev,
struct probe_trace_event **tevs,
- int max_tevs, const char *module)
+ int max_tevs, const char *target)
{
struct symbol *sym;
int ret = 0, i;
struct probe_trace_event *tev;
/* Convert perf_probe_event with debuginfo */
- ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, module);
+ ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
if (ret != 0)
return ret; /* Found in debuginfo or got an error */
@@ -1819,8 +1819,8 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
goto error;
}
- if (module) {
- tev->point.module = strdup(module);
+ if (target) {
+ tev->point.module = strdup(target);
if (tev->point.module == NULL) {
ret = -ENOMEM;
goto error;
@@ -1890,7 +1890,7 @@ struct __event_package {
};
int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
- int max_tevs, const char *module, bool force_add)
+ int max_tevs, const char *target, bool force_add)
{
int i, j, ret;
struct __event_package *pkgs;
@@ -1913,7 +1913,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
ret = convert_to_probe_trace_events(pkgs[i].pev,
&pkgs[i].tevs,
max_tevs,
- module);
+ target);
if (ret < 0)
goto end;
pkgs[i].ntevs = ret;
@@ -1965,7 +1965,7 @@ static int __del_trace_probe_event(int fd, struct str_node *ent)
goto error;
}
- printf("Remove event: %s\n", ent->s);
+ printf("Removed event: %s\n", ent->s);
return 0;
error:
pr_warning("Failed to delete event: %s\n", strerror(-ret));
@@ -2069,7 +2069,7 @@ static int filter_available_functions(struct map *map __unused,
return 1;
}
-int show_available_funcs(const char *module, struct strfilter *_filter)
+int show_available_funcs(const char *target, struct strfilter *_filter)
{
struct map *map;
int ret;
@@ -2080,9 +2080,9 @@ int show_available_funcs(const char *module, struct strfilter *_filter)
if (ret < 0)
return ret;
- map = kernel_get_module_map(module);
+ map = kernel_get_module_map(target);
if (!map) {
- pr_err("Failed to find %s map.\n", (module) ? : "kernel");
+ pr_err("Failed to find %s map.\n", (target) ? : "kernel");
return -EINVAL;
}
available_func_filter = _filter;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 74bd2e63c4b4..2cc162d3b78c 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -30,7 +30,6 @@
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
-#include <ctype.h>
#include <dwarf-regs.h>
#include <linux/bitops.h>
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644
index 000000000000..2884e67ee625
--- /dev/null
+++ b/tools/perf/util/python-ext-sources
@@ -0,0 +1,19 @@
+#
+# List of files needed by perf python extention
+#
+# Each source file must be placed on its own line so that it can be
+# processed by Makefile and util/setup.py accordingly.
+#
+
+util/python.c
+util/ctype.c
+util/evlist.c
+util/evsel.c
+util/cpumap.c
+util/thread_map.c
+util/util.c
+util/xyarray.c
+util/cgroup.c
+util/debugfs.c
+util/strlist.c
+../../lib/rbtree.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 9dd47a4f2596..e03b58a48424 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -425,14 +425,14 @@ struct pyrf_thread_map {
static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
PyObject *args, PyObject *kwargs)
{
- static char *kwlist[] = { "pid", "tid", NULL };
- int pid = -1, tid = -1;
+ static char *kwlist[] = { "pid", "tid", "uid", NULL };
+ int pid = -1, tid = -1, uid = UINT_MAX;
- if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii",
- kwlist, &pid, &tid))
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+ kwlist, &pid, &tid, &uid))
return -1;
- pthreads->threads = thread_map__new(pid, tid);
+ pthreads->threads = thread_map__new(pid, tid, uid);
if (pthreads->threads == NULL)
return -1;
return 0;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 0b2a48783172..c2623c6f9b51 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -24,7 +24,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
#include <errno.h>
#include "../../perf.h"
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b5ca2558c7bb..9f833cf9c6a9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -796,6 +796,10 @@ static int perf_session_deliver_event(struct perf_session *session,
++session->hists.stats.nr_unknown_id;
return -1;
}
+ if (machine == NULL) {
+ ++session->hists.stats.nr_unprocessable_samples;
+ return -1;
+ }
return tool->sample(tool, event, sample, evsel, machine);
case PERF_RECORD_MMAP:
return tool->mmap(tool, event, sample, machine);
@@ -964,6 +968,12 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
session->hists.stats.nr_invalid_chains,
session->hists.stats.nr_events[PERF_RECORD_SAMPLE]);
}
+
+ if (session->hists.stats.nr_unprocessable_samples != 0) {
+ ui__warning("%u unprocessable samples recorded.\n"
+ "Do you have a KVM guest running and not using 'perf kvm'?\n",
+ session->hists.stats.nr_unprocessable_samples);
+ }
}
#define session_done() (*(volatile int *)(&session_done))
@@ -1293,10 +1303,9 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
struct machine *machine, struct perf_evsel *evsel,
- int print_sym, int print_dso)
+ int print_sym, int print_dso, int print_symoffset)
{
struct addr_location al;
- const char *symname, *dsoname;
struct callchain_cursor *cursor = &evsel->hists.callchain_cursor;
struct callchain_cursor_node *node;
@@ -1324,20 +1333,13 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
printf("\t%16" PRIx64, node->ip);
if (print_sym) {
- if (node->sym && node->sym->name)
- symname = node->sym->name;
- else
- symname = "";
-
- printf(" %s", symname);
+ printf(" ");
+ symbol__fprintf_symname(node->sym, stdout);
}
if (print_dso) {
- if (node->map && node->map->dso && node->map->dso->name)
- dsoname = node->map->dso->name;
- else
- dsoname = "";
-
- printf(" (%s)", dsoname);
+ printf(" (");
+ map__fprintf_dsoname(al.map, stdout);
+ printf(")");
}
printf("\n");
@@ -1347,21 +1349,18 @@ void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
} else {
printf("%16" PRIx64, sample->ip);
if (print_sym) {
- if (al.sym && al.sym->name)
- symname = al.sym->name;
+ printf(" ");
+ if (print_symoffset)
+ symbol__fprintf_symname_offs(al.sym, &al,
+ stdout);
else
- symname = "";
-
- printf(" %s", symname);
+ symbol__fprintf_symname(al.sym, stdout);
}
if (print_dso) {
- if (al.map && al.map->dso && al.map->dso->name)
- dsoname = al.map->dso->name;
- else
- dsoname = "";
-
- printf(" (%s)", dsoname);
+ printf(" (");
+ map__fprintf_dsoname(al.map, stdout);
+ printf(")");
}
}
}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 37bc38381fb6..c8d90178e7de 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -147,7 +147,7 @@ struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
void perf_event__print_ip(union perf_event *event, struct perf_sample *sample,
struct machine *machine, struct perf_evsel *evsel,
- int print_sym, int print_dso);
+ int print_sym, int print_dso, int print_symoffset);
int perf_session__cpu_bitmap(struct perf_session *session,
const char *cpu_list, unsigned long *cpu_bitmap);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 36d4c5619575..d0f9f29cf181 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -24,11 +24,11 @@ cflags += getenv('CFLAGS', '').split()
build_lib = getenv('PYTHON_EXTBUILD_LIB')
build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+ext_sources = [f.strip() for f in file('util/python-ext-sources')
+ if len(f.strip()) > 0 and f[0] != '#']
+
perf = Extension('perf',
- sources = ['util/python.c', 'util/ctype.c', 'util/evlist.c',
- 'util/evsel.c', 'util/cpumap.c', 'util/thread_map.c',
- 'util/util.c', 'util/xyarray.c', 'util/cgroup.c',
- 'util/debugfs.c'],
+ sources = ext_sources,
include_dirs = ['util/include'],
extra_compile_args = cflags,
)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 0975438c3e72..5dd83c3e2c0c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1,4 +1,3 @@
-#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <libgen.h>
@@ -12,6 +11,7 @@
#include <unistd.h>
#include <inttypes.h>
#include "build-id.h"
+#include "util.h"
#include "debug.h"
#include "symbol.h"
#include "strlist.h"
@@ -263,6 +263,28 @@ static size_t symbol__fprintf(struct symbol *sym, FILE *fp)
sym->name);
}
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al, FILE *fp)
+{
+ unsigned long offset;
+ size_t length;
+
+ if (sym && sym->name) {
+ length = fprintf(fp, "%s", sym->name);
+ if (al) {
+ offset = al->addr - sym->start;
+ length += fprintf(fp, "+0x%lx", offset);
+ }
+ return length;
+ } else
+ return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+ return symbol__fprintf_symname_offs(sym, NULL, fp);
+}
+
void dso__set_long_name(struct dso *dso, char *name)
{
if (name == NULL)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 123c2e14353e..2a683d4fc918 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -70,6 +70,7 @@ struct symbol_conf {
unsigned short priv_size;
unsigned short nr_events;
bool try_vmlinux_path,
+ show_kernel_path,
use_modules,
sort_by_name,
show_nr_samples,
@@ -241,6 +242,9 @@ void machines__destroy_guest_kernel_maps(struct rb_root *machines);
int symbol__init(void);
void symbol__exit(void);
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al, FILE *fp);
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
bool symbol_type__is_a(char symbol_type, enum map_type map_type);
size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
diff --git a/tools/perf/util/sysfs.c b/tools/perf/util/sysfs.c
new file mode 100644
index 000000000000..48c6902e749f
--- /dev/null
+++ b/tools/perf/util/sysfs.c
@@ -0,0 +1,60 @@
+
+#include "util.h"
+#include "sysfs.h"
+
+static const char * const sysfs_known_mountpoints[] = {
+ "/sys",
+ 0,
+};
+
+static int sysfs_found;
+char sysfs_mountpoint[PATH_MAX];
+
+static int sysfs_valid_mountpoint(const char *sysfs)
+{
+ struct statfs st_fs;
+
+ if (statfs(sysfs, &st_fs) < 0)
+ return -ENOENT;
+ else if (st_fs.f_type != (long) SYSFS_MAGIC)
+ return -ENOENT;
+
+ return 0;
+}
+
+const char *sysfs_find_mountpoint(void)
+{
+ const char * const *ptr;
+ char type[100];
+ FILE *fp;
+
+ if (sysfs_found)
+ return (const char *) sysfs_mountpoint;
+
+ ptr = sysfs_known_mountpoints;
+ while (*ptr) {
+ if (sysfs_valid_mountpoint(*ptr) == 0) {
+ sysfs_found = 1;
+ strcpy(sysfs_mountpoint, *ptr);
+ return sysfs_mountpoint;
+ }
+ ptr++;
+ }
+
+ /* give up and parse /proc/mounts */
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL)
+ return NULL;
+
+ while (!sysfs_found &&
+ fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+ sysfs_mountpoint, type) == 2) {
+
+ if (strcmp(type, "sysfs") == 0)
+ sysfs_found = 1;
+ }
+
+ fclose(fp);
+
+ return sysfs_found ? sysfs_mountpoint : NULL;
+}
diff --git a/tools/perf/util/sysfs.h b/tools/perf/util/sysfs.h
new file mode 100644
index 000000000000..a813b7203938
--- /dev/null
+++ b/tools/perf/util/sysfs.h
@@ -0,0 +1,6 @@
+#ifndef __SYSFS_H__
+#define __SYSFS_H__
+
+const char *sysfs_find_mountpoint(void);
+
+#endif /* __DEBUGFS_H__ */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index a5df131b77c3..e15983cf077d 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -1,6 +1,13 @@
#include <dirent.h>
+#include <limits.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "strlist.h"
+#include <string.h>
#include "thread_map.h"
/* Skip "." and ".." directories */
@@ -23,7 +30,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
sprintf(name, "/proc/%d/task", pid);
items = scandir(name, &namelist, filter, NULL);
if (items <= 0)
- return NULL;
+ return NULL;
threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
if (threads != NULL) {
@@ -51,14 +58,240 @@ struct thread_map *thread_map__new_by_tid(pid_t tid)
return threads;
}
-struct thread_map *thread_map__new(pid_t pid, pid_t tid)
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+ DIR *proc;
+ int max_threads = 32, items, i;
+ char path[256];
+ struct dirent dirent, *next, **namelist = NULL;
+ struct thread_map *threads = malloc(sizeof(*threads) +
+ max_threads * sizeof(pid_t));
+ if (threads == NULL)
+ goto out;
+
+ proc = opendir("/proc");
+ if (proc == NULL)
+ goto out_free_threads;
+
+ threads->nr = 0;
+
+ while (!readdir_r(proc, &dirent, &next) && next) {
+ char *end;
+ bool grow = false;
+ struct stat st;
+ pid_t pid = strtol(dirent.d_name, &end, 10);
+
+ if (*end) /* only interested in proper numerical dirents */
+ continue;
+
+ snprintf(path, sizeof(path), "/proc/%s", dirent.d_name);
+
+ if (stat(path, &st) != 0)
+ continue;
+
+ if (st.st_uid != uid)
+ continue;
+
+ snprintf(path, sizeof(path), "/proc/%d/task", pid);
+ items = scandir(path, &namelist, filter, NULL);
+ if (items <= 0)
+ goto out_free_closedir;
+
+ while (threads->nr + items >= max_threads) {
+ max_threads *= 2;
+ grow = true;
+ }
+
+ if (grow) {
+ struct thread_map *tmp;
+
+ tmp = realloc(threads, (sizeof(*threads) +
+ max_threads * sizeof(pid_t)));
+ if (tmp == NULL)
+ goto out_free_namelist;
+
+ threads = tmp;
+ }
+
+ for (i = 0; i < items; i++)
+ threads->map[threads->nr + i] = atoi(namelist[i]->d_name);
+
+ for (i = 0; i < items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+ threads->nr += items;
+ }
+
+out_closedir:
+ closedir(proc);
+out:
+ return threads;
+
+out_free_threads:
+ free(threads);
+ return NULL;
+
+out_free_namelist:
+ for (i = 0; i < items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+out_free_closedir:
+ free(threads);
+ threads = NULL;
+ goto out_closedir;
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
{
if (pid != -1)
return thread_map__new_by_pid(pid);
+
+ if (tid == -1 && uid != UINT_MAX)
+ return thread_map__new_by_uid(uid);
+
return thread_map__new_by_tid(tid);
}
+static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
+{
+ struct thread_map *threads = NULL, *nt;
+ char name[256];
+ int items, total_tasks = 0;
+ struct dirent **namelist = NULL;
+ int i, j = 0;
+ pid_t pid, prev_pid = INT_MAX;
+ char *end_ptr;
+ struct str_node *pos;
+ struct strlist *slist = strlist__new(false, pid_str);
+
+ if (!slist)
+ return NULL;
+
+ strlist__for_each(pos, slist) {
+ pid = strtol(pos->s, &end_ptr, 10);
+
+ if (pid == INT_MIN || pid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ goto out_free_threads;
+
+ if (pid == prev_pid)
+ continue;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ goto out_free_threads;
+
+ total_tasks += items;
+ nt = realloc(threads, (sizeof(*threads) +
+ sizeof(pid_t) * total_tasks));
+ if (nt == NULL)
+ goto out_free_threads;
+
+ threads = nt;
+
+ if (threads) {
+ for (i = 0; i < items; i++)
+ threads->map[j++] = atoi(namelist[i]->d_name);
+ threads->nr = total_tasks;
+ }
+
+ for (i = 0; i < items; i++)
+ free(namelist[i]);
+ free(namelist);
+
+ if (!threads)
+ break;
+ }
+
+out:
+ strlist__delete(slist);
+ return threads;
+
+out_free_threads:
+ free(threads);
+ threads = NULL;
+ goto out;
+}
+
+static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+{
+ struct thread_map *threads = NULL, *nt;
+ int ntasks = 0;
+ pid_t tid, prev_tid = INT_MAX;
+ char *end_ptr;
+ struct str_node *pos;
+ struct strlist *slist;
+
+ /* perf-stat expects threads to be generated even if tid not given */
+ if (!tid_str) {
+ threads = malloc(sizeof(*threads) + sizeof(pid_t));
+ if (threads != NULL) {
+ threads->map[1] = -1;
+ threads->nr = 1;
+ }
+ return threads;
+ }
+
+ slist = strlist__new(false, tid_str);
+ if (!slist)
+ return NULL;
+
+ strlist__for_each(pos, slist) {
+ tid = strtol(pos->s, &end_ptr, 10);
+
+ if (tid == INT_MIN || tid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ goto out_free_threads;
+
+ if (tid == prev_tid)
+ continue;
+
+ ntasks++;
+ nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
+
+ if (nt == NULL)
+ goto out_free_threads;
+
+ threads = nt;
+ threads->map[ntasks - 1] = tid;
+ threads->nr = ntasks;
+ }
+out:
+ return threads;
+
+out_free_threads:
+ free(threads);
+ threads = NULL;
+ goto out;
+}
+
+struct thread_map *thread_map__new_str(const char *pid, const char *tid,
+ uid_t uid)
+{
+ if (pid)
+ return thread_map__new_by_pid_str(pid);
+
+ if (!tid && uid != UINT_MAX)
+ return thread_map__new_by_uid(uid);
+
+ return thread_map__new_by_tid_str(tid);
+}
+
void thread_map__delete(struct thread_map *threads)
{
free(threads);
}
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
+{
+ int i;
+ size_t printed = fprintf(fp, "%d thread%s: ",
+ threads->nr, threads->nr > 1 ? "s" : "");
+ for (i = 0; i < threads->nr; ++i)
+ printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]);
+
+ return printed + fprintf(fp, "\n");
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 3cb907311409..7da80f14418b 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -2,6 +2,7 @@
#define __PERF_THREAD_MAP_H
#include <sys/types.h>
+#include <stdio.h>
struct thread_map {
int nr;
@@ -10,6 +11,14 @@ struct thread_map {
struct thread_map *thread_map__new_by_pid(pid_t pid);
struct thread_map *thread_map__new_by_tid(pid_t tid);
-struct thread_map *thread_map__new(pid_t pid, pid_t tid);
+struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+
+struct thread_map *thread_map__new_str(const char *pid,
+ const char *tid, uid_t uid);
+
void thread_map__delete(struct thread_map *threads);
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
+
#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 500471dffa4f..09fe579ccafb 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -69,12 +69,15 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
ret += SNPRINTF(bf + ret, size - ret, "], ");
- if (top->target_pid != -1)
- ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %d",
+ if (top->target_pid)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
top->target_pid);
- else if (top->target_tid != -1)
- ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %d",
+ else if (top->target_tid)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
top->target_tid);
+ else if (top->uid_str != NULL)
+ ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+ top->uid_str);
else
ret += SNPRINTF(bf + ret, size - ret, " (all");
@@ -82,7 +85,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
top->evlist->cpus->nr > 1 ? "s" : "", top->cpu_list);
else {
- if (top->target_tid != -1)
+ if (top->target_tid)
ret += SNPRINTF(bf + ret, size - ret, ")");
else
ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index a248f3c2c60d..ce61cb2d1acf 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -23,7 +23,8 @@ struct perf_top {
u64 guest_us_samples, guest_kernel_samples;
int print_entries, count_filter, delay_secs;
int freq;
- pid_t target_pid, target_tid;
+ const char *target_pid, *target_tid;
+ uid_t uid;
bool hide_kernel_symbols, hide_user_symbols, zero;
bool system_wide;
bool use_tui, use_stdio;
@@ -33,7 +34,8 @@ struct perf_top {
bool vmlinux_warned;
bool inherit;
bool group;
- bool sample_id_all_avail;
+ bool sample_id_all_missing;
+ bool exclude_guest_missing;
bool dump_symtab;
const char *cpu_list;
struct hist_entry *sym_filter_entry;
@@ -45,6 +47,7 @@ struct perf_top {
int realtime_prio;
int sym_pcnt_filter;
const char *sym_filter;
+ const char *uid_str;
};
size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 1a8d4dc4f386..e0a4f652f289 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -25,7 +25,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
#include <errno.h>
#include "../perf.h"
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f55cc3a765a1..b9592e0de8d7 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -33,7 +33,6 @@
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
-#include <ctype.h>
#include <errno.h>
#include "../perf.h"
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index a3fdf55f317b..18ae6c1831d3 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -22,7 +22,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <ctype.h>
#include <errno.h>
#include "../perf.h"
diff --git a/tools/perf/util/ui/browsers/hists.c b/tools/perf/util/ui/browsers/hists.c
index e81aef1f2569..bfba0490c098 100644
--- a/tools/perf/util/ui/browsers/hists.c
+++ b/tools/perf/util/ui/browsers/hists.c
@@ -839,6 +839,9 @@ static int hists__browser_title(struct hists *self, char *bf, size_t size,
nr_events = convert_unit(nr_events, &unit);
printed = snprintf(bf, size, "Events: %lu%c %s", nr_events, unit, ev_name);
+ if (self->uid_filter_str)
+ printed += snprintf(bf + printed, size - printed,
+ ", UID: %s", self->uid_filter_str);
if (thread)
printed += snprintf(bf + printed, size - printed,
", Thread: %s(%d)",
diff --git a/tools/perf/util/ui/browsers/map.c b/tools/perf/util/ui/browsers/map.c
index 6905bcc8be2d..eca6575abfd0 100644
--- a/tools/perf/util/ui/browsers/map.c
+++ b/tools/perf/util/ui/browsers/map.c
@@ -3,9 +3,9 @@
#include <newt.h>
#include <inttypes.h>
#include <sys/ttydefaults.h>
-#include <ctype.h>
#include <string.h>
#include <linux/bitops.h>
+#include "../../util.h"
#include "../../debug.h"
#include "../../symbol.h"
#include "../browser.h"
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index d76d1c0ff98f..52bb07c6442a 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -7,6 +7,7 @@
* Copyright (C) Linus Torvalds, 2005
*/
#include "util.h"
+#include "debug.h"
static void report(const char *prefix, const char *err, va_list params)
{
@@ -81,3 +82,41 @@ void warning(const char *warn, ...)
warn_routine(warn, params);
va_end(params);
}
+
+uid_t parse_target_uid(const char *str, const char *tid, const char *pid)
+{
+ struct passwd pwd, *result;
+ char buf[1024];
+
+ if (str == NULL)
+ return UINT_MAX;
+
+ /* UID and PID are mutually exclusive */
+ if (tid || pid) {
+ ui__warning("PID/TID switch overriding UID\n");
+ sleep(1);
+ return UINT_MAX;
+ }
+
+ getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL) {
+ char *endptr;
+ int uid = strtol(str, &endptr, 10);
+
+ if (*endptr != '\0') {
+ ui__error("Invalid user %s\n", str);
+ return UINT_MAX - 1;
+ }
+
+ getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL) {
+ ui__error("Problems obtaining information for user %s\n",
+ str);
+ return UINT_MAX - 1;
+ }
+ }
+
+ return result->pw_uid;
+}
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 813141047fc2..8109a907841e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -6,7 +6,7 @@
* XXX We need to find a better place for these things...
*/
bool perf_host = true;
-bool perf_guest = true;
+bool perf_guest = false;
void event_attr_init(struct perf_event_attr *attr)
{
@@ -14,6 +14,8 @@ void event_attr_init(struct perf_event_attr *attr)
attr->exclude_host = 1;
if (!perf_guest)
attr->exclude_guest = 1;
+ /* to capture ABI version */
+ attr->size = sizeof(*attr);
}
int mkdir_p(char *path, mode_t mode)
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index ecf9898169c8..0f99f394d8e0 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -199,6 +199,8 @@ static inline int has_extension(const char *filename, const char *ext)
#undef isalpha
#undef isprint
#undef isalnum
+#undef islower
+#undef isupper
#undef tolower
#undef toupper
@@ -219,6 +221,8 @@ extern unsigned char sane_ctype[256];
#define isalpha(x) sane_istest(x,GIT_ALPHA)
#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
#define isprint(x) sane_istest(x,GIT_PRINT)
+#define islower(x) (sane_istest(x,GIT_ALPHA) && sane_istest(x,0x20))
+#define isupper(x) (sane_istest(x,GIT_ALPHA) && !sane_istest(x,0x20))
#define tolower(x) sane_case((unsigned char)(x), 0x20)
#define toupper(x) sane_case((unsigned char)(x), 0)
@@ -245,6 +249,8 @@ struct perf_event_attr;
void event_attr_init(struct perf_event_attr *attr);
+uid_t parse_target_uid(const char *str, const char *tid, const char *pid);
+
#define _STR(x) #x
#define STR(x) _STR(x)